From b1c44e91612b00b553ac14c602431c57a2cd7e72 Mon Sep 17 00:00:00 2001
From: Maxim Nikitin <nikitinm117@gmail.com>
Date: Fri, 15 Dec 2023 16:57:55 +0300
Subject: [PATCH] GRA-191: implement pool and crossEntropyLoss

---
 server/api/server.cpp     |   6 +-
 server/core/Layer.cpp     |  39 +++++-
 server/core/Layer.h       |  25 ++++
 server/core/LazyBlob.cpp  | 146 +++++++++++++++++++++
 server/core/LazyBlob.h    |   4 +
 server/core/Operation.cpp |  46 +++++++
 server/core/Operation.h   |  21 +++
 server/core/Parameters.h  |   4 +
 server/core/main.cpp      | 263 +++++++++++++++++++++++++-------------
 9 files changed, 464 insertions(+), 90 deletions(-)
diff --git a/server/api/server.cpp b/server/api/server.cpp
index 151044ca..6048c47e 100644
--- a/server/api/server.cpp
+++ b/server/api/server.cpp
@@ -36,7 +36,8 @@ void train(json::rvalue& json, Graph** graph, int model_id) {
     Allocator::endVirtualMode();
 
     for (int j = 0; j < 1000; ++j) {
-        auto& result = lastNode.forward();
+        lastNode.forward();
+        auto& result = lastNode.output.value();
         printf("%d: %f\n", j, result(0, 0, 0, 0));
         // lastNode.gradient = result;
         lastNode.gradient = Blob::ones({{1}});
@@ -52,7 +53,8 @@ void predict(int model_id, Graph* graph, std::vector<float>& answer) {
 
     auto& lastNode = graph->getLastPredictLayers()[0]->result.value();  // Пока не думаем о нескольких выходах (!) Hard-coded
     lastNode.clear();
-    const Blob& result = lastNode.forward();
+    lastNode.forward();
+    const Blob& result = lastNode.output.value();
 
     answer.reserve(result.shape.rows() * result.shape.cols());
     for (size_t j = 0; j < result.shape.rows(); ++j) {
diff --git a/server/core/Layer.cpp b/server/core/Layer.cpp
index b83ec091..a6a307de 100644
--- a/server/core/Layer.cpp
+++ b/server/core/Layer.cpp
@@ -1,5 +1,6 @@
 #include <unordered_map>
 #include <functional>
+#include <cassert>
 #include <string>
 
 #include "Layer.h"
@@ -113,4 +114,40 @@ LayerNorm::LayerNorm(const AxisParameters& params,
     pipeline.push_back(std::move(_fill));
 
     result = Tensor(div, {pipeline[2], pipeline[5]});
-    }
+}
+
+SoftMax::SoftMax(const AxisParameters& params,
+                   const std::vector<TensorRef>& args)
+    : sum(params.axis) {
+    assert(args.size() == 1);
+    pipeline.reserve(3);
+    TensorRef tensor = args[0];
+
+    Tensor exp_(exp, {tensor});
+    pipeline.push_back(std::move(exp_));
+
+    Tensor sum_(sum, {pipeline[0]});
+    pipeline.push_back(std::move(sum_));
+
+    Tensor fill_(fill, {pipeline[0], pipeline[1]});
+    pipeline.push_back(std::move(fill_));
+
+    result = Tensor(div, {pipeline[0], pipeline[2]});
+}
+
+EntropyLoss::EntropyLoss(const CrossEntropyLossParameters& params,
+                        const std::vector<TensorRef>& args)
+    : softmax({{3}}, {args[0]}), mean({0, 1, 2, 3}), entropy(params.classCount) {
+    assert(args.size() == 2);
+    pipeline.reserve(1);
+
+    Tensor entropy_(entropy, {softmax.result.value(), args[1]});
+    pipeline.push_back(std::move(entropy_));
+
+    result = Tensor(mean, {pipeline[0]});
+}
+
+MaxPool::MaxPool(const std::vector<TensorRef>& args) {
+    assert(args.size() == 1);
+    result = Tensor(maxPool, {args[0]});
+}
diff --git a/server/core/Layer.h b/server/core/Layer.h
index 6c4842b1..f08ed983 100644
--- a/server/core/Layer.h
+++ b/server/core/Layer.h
@@ -87,3 +87,28 @@ class LayerNorm: public Layer {
     LayerNorm(const AxisParameters& params,
         const std::vector<TensorRef>& args);
 };
+
+class SoftMax: public Layer {
+public:
+    Exp exp;
+    SumAxis sum;
+    Fill fill;
+    Divide div;
+    SoftMax(const AxisParameters& params,
+        const std::vector<TensorRef>& args);
+};
+
+class EntropyLoss: public Layer {
+public:
+    SoftMax softmax;
+    Mean mean;
+    Entropy entropy;
+    EntropyLoss(const CrossEntropyLossParameters& params, 
+        const std::vector<TensorRef>& args);
+};
+
+class MaxPool: public Layer {
+public:
+    MaxPoolOp maxPool;
+    MaxPool(const std::vector<TensorRef>& args);
+};
diff --git a/server/core/LazyBlob.cpp b/server/core/LazyBlob.cpp
index 0085afb3..983b1e1d 100644
--- a/server/core/LazyBlob.cpp
+++ b/server/core/LazyBlob.cpp
@@ -1,6 +1,9 @@
 #include <cassert>
 #include <vector>
 #include <optional>
+#include <algorithm>
+#include <cmath>
+
 
 #include "LazyBlob.h"
 #include "Iterations.h"
@@ -8,6 +11,7 @@
 #include "Blob.h"
 
 #define MAX_DIMS_COUNT 4
+#define EPS 1e-9
 
 const Shape& LazyBlob::shape() const {
     if (shape_.has_value()) {
@@ -568,3 +572,145 @@ std::ostream& operator<<(std::ostream& os, const LazyBlob &b) {
     }
     return os;
 }
+
+class LazyBlobEntropy: public LazyBlob {
+public:
+    const LazyBlob &a, &b;
+    const int classCount;
+    LazyBlobEntropy(const LazyBlob &a, const LazyBlob &b, int classCount): 
+        a(a), b(b), classCount(classCount) {};
+
+    void initShape() const final override {
+        shape_ = b.shape();
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        assert(b(k, l, i, j) < classCount);
+        // WARNING: если проблемы, меняем на случай с EPS
+        return std::log(a(k, l, i, (int) b(k, l, i, j)));
+        // return std::log(a(k, l, i, (int) b(k, l, i, j)) + EPS);
+    }
+};
+
+class LazyBlobEntropyDerivative: public LazyBlob {
+public:
+    const LazyBlob &a, &b;
+    const int classCount;
+    LazyBlobEntropyDerivative(const LazyBlob &a, const LazyBlob &b, int classCount): 
+        a(a), b(b), classCount(classCount) {};
+
+    void initShape() const final override {
+        shape_ = a.shape();
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        assert(b(k, l, i, j) < classCount);
+        if (j != (int) b(k, 0, 0, 0)) {
+            return 0;
+        }
+        // WARNING: если проблемы, меняем на случай с EPS
+        return  - 1.0f / (a(k, l, i, j));
+        // return  - 1.0f / (a(k, l, i, j) + EPS);
+    }
+};
+
+const LazyBlob& LazyBlob::entropy(const LazyBlob& a, int classCount) const {
+    assert(shape().cols() == classCount);
+    assert(shape().dim4() == a.shape().dim4());
+    assert(shape().dim3() == 1);
+    assert(shape().rows() == 1);
+    assert(a.shape().dim3() == 1);
+    assert(a.shape().rows() == 1);
+    assert(a.shape().cols() == 1);
+
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropy));
+    return *(new(location) LazyBlobEntropy(*this, a, classCount));
+}
+
+const LazyBlob& LazyBlob::entropyDerivative(const LazyBlob& a, int classCount) const {
+    assert(shape().cols() == classCount);
+    assert(shape().dim4() == a.shape().dim4());
+    assert(shape().dim3() == 1);
+    assert(shape().rows() == 1);
+    assert(a.shape().dim3() == 1);
+    assert(a.shape().rows() == 1);
+    assert(a.shape().cols() == 1);
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropyDerivative));
+    return *(new(location) LazyBlobEntropyDerivative(*this, a, classCount));
+}
+
+class LazyBlobMaxPool: public LazyBlob {
+public:
+    const LazyBlob &a;
+    LazyBlobMaxPool(const LazyBlob &a): a(a) {};
+
+    void initShape() const final override {
+        shape_ = {
+            {
+                a.shape().dim4(), a.shape().dim3(), a.shape().rows() / 2, a.shape().cols() / 2
+            }, 
+            a.shape().dimsCount
+        };
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        return std::max(
+            std::max(a(k, l, i * 2, j * 2), a(k, l, i * 2 + 1, j * 2)),
+            std::max(a(k, l, i * 2, j * 2 + 1), a(k, l, i * 2 + 1, j * 2 + 1))
+        );
+    }
+};
+
+class LazyBlobMaxPoolDerivative: public LazyBlob {
+public:
+    const LazyBlob &a, &b;
+    LazyBlobMaxPoolDerivative(const LazyBlob &a, const LazyBlob& b): a(a), b(b) {};
+
+    void initShape() const final override {
+        shape_ = a.shape();
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        size_t start_i = (i / 2) * 2;
+        size_t start_j = (j / 2) * 2;
+        size_t indexOfMax_i = start_i;
+        size_t indexOfMax_j = start_j;
+        float max = a(k, l, indexOfMax_i, indexOfMax_j);
+        if (max < a(k, l, start_i, start_j + 1)) {
+            indexOfMax_i = start_i;
+            indexOfMax_j = start_j + 1;
+            max =  a(k, l, indexOfMax_i, indexOfMax_j);
+        }
+
+        if (max < a(k, l, start_i + 1, start_j)) {
+            indexOfMax_i = start_i + 1;
+            indexOfMax_j = start_j;
+            max =  a(k, l, indexOfMax_i, indexOfMax_j);
+        }
+
+        if (max < a(k, l, start_i + 1, start_j + 1)) {
+            indexOfMax_i = start_i + 1;
+            indexOfMax_j = start_j + 1;
+            max =  a(k, l, indexOfMax_i, indexOfMax_j);
+        }
+        
+        if (indexOfMax_i == i && indexOfMax_j == j)
+            return b(k, l, i / 2, j / 2);
+
+        return 0.0f;
+    }
+};
+
+const LazyBlob& LazyBlob::maxPool() const {
+    assert(shape().cols() % 2 == 0);
+    assert(shape().rows() % 2 == 0);
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobEntropyDerivative));
+    return *(new(location) LazyBlobMaxPool(*this));
+}
+
+const LazyBlob& LazyBlob::maxPoolDerivative(const LazyBlob& b) const {
+    assert(shape().cols() % 2 == 0);
+    assert(shape().rows() % 2 == 0);
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobMaxPoolDerivative));
+    return *(new(location) LazyBlobMaxPoolDerivative(*this, b));
+}
diff --git a/server/core/LazyBlob.h b/server/core/LazyBlob.h
index 349e8302..6ab59a64 100644
--- a/server/core/LazyBlob.h
+++ b/server/core/LazyBlob.h
@@ -31,6 +31,10 @@ class LazyBlob {
     const LazyBlob& mean(std::vector<short> axis, bool minusOne = false) const;
     const LazyBlob& reverseLast2Dims() const;
     const LazyBlob& transposeFirst2Dims() const;
+    const LazyBlob& entropy(const LazyBlob& b, int classCount) const;
+    const LazyBlob& entropyDerivative(const LazyBlob& b, int classCount) const;
+    const LazyBlob& maxPool() const;
+    const LazyBlob& maxPoolDerivative(const LazyBlob& b) const;
 
     /// To repeat some dimensions several times
     /// - Parameter shape: the size we want to get
diff --git a/server/core/Operation.cpp b/server/core/Operation.cpp
index af707dac..f1a27701 100644
--- a/server/core/Operation.cpp
+++ b/server/core/Operation.cpp
@@ -273,3 +273,49 @@ Shape EPS::computeDim(const vector<LazyBlobRef>& args) const {
     args1(a);
     return a.shape();
 }
+
+Blob Exp::compute(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.applying([](float x) { return std::exp(x); });
+}
+
+vector<LazyBlobRef> Exp::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {grad * a.applying([](float x) { return std::exp(x); })};
+}
+
+Shape Exp::computeDim(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.shape();
+}
+
+Blob Entropy::compute(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    return -a.entropy(b, classCount);
+}
+
+vector<LazyBlobRef> Entropy::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    return {grad.lazy().fill(a.shape()) * a.entropyDerivative(b, classCount), zeroBlob(b.shape())};
+}
+
+Shape Entropy::computeDim(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    (void)a;
+    return b.shape();
+}
+
+Blob MaxPoolOp::compute(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.maxPool();
+}
+
+vector<LazyBlobRef> MaxPoolOp::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {a.maxPoolDerivative(grad.lazy())};
+}
+
+Shape MaxPoolOp::computeDim(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {{a.shape().dim4(), a.shape().dim3(), a.shape().rows() / 2, a.shape().cols() / 2}, a.shape().dimsCount};
+}
diff --git a/server/core/Operation.h b/server/core/Operation.h
index c4192592..eb01170b 100644
--- a/server/core/Operation.h
+++ b/server/core/Operation.h
@@ -131,3 +131,24 @@ struct EPS: Operation {
     std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
     Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
 };
+
+struct Exp: Operation {
+    std::string name = "Exp";
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
+struct Entropy: Operation {
+    int classCount;
+    Entropy(int classCouont): classCount(classCouont) {};
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
+struct MaxPoolOp: Operation {
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
diff --git a/server/core/Parameters.h b/server/core/Parameters.h
index 4c3e82b5..b1fab57d 100644
--- a/server/core/Parameters.h
+++ b/server/core/Parameters.h
@@ -24,3 +24,7 @@ struct AxisParameters
     std::vector<short> axis;
 };
 
+struct CrossEntropyLossParameters
+{
+    std::size_t classCount; 
+};
diff --git a/server/core/main.cpp b/server/core/main.cpp
index 19b21c2a..2fb0a56e 100644
--- a/server/core/main.cpp
+++ b/server/core/main.cpp
@@ -57,120 +57,119 @@ static const ReLU reluOperation;
 
 thread_local int a = 0;
 
-// int main() {
-//     Allocator::startVirtualMode();
-//     {
-//     LinearLayerParameters params1{2ull, 2ull, true};
-//     LinearLayerParameters params2{2ull, 1ull, true};
-//     Conv2DLayerParameters paramsConv{7, 3, 1};
-
-//     auto inputNode = Tensor(Blob::constBlob(Shape {{4, 2}}, input));
+void testNN() {
+    Allocator::startVirtualMode();
+    {
+    LinearLayerParameters params1{2ull, 2ull, true};
+    LinearLayerParameters params2{2ull, 1ull, true};
+    // Conv2DLayerParameters paramsConv{7, 3, 1};
 
-//     auto trueNode = Tensor(Blob::constBlob(Shape {{4, 1}}, output));
+    auto inputNode = Tensor(Blob::constBlob(Shape {{4, 2}}, input));
 
-//     RandomObject initObject(0, 1, 17);
-//     OptimizerBase SGD = OptimizerBase(0.1);
-//     LinearLayer layer1 {params1, {inputNode}, &initObject};
-//     SGD.append(layer1.layerOperationParams);
+    auto trueNode = Tensor(Blob::constBlob(Shape {{4, 1}}, output));
 
-//     TensorRef res = layer1.result.value();
-//     ReLULayer reluLayer1  {{res}};
+    RandomObject initObject(0, 1, 17);
+    OptimizerBase SGD = OptimizerBase(0.1);
+    LinearLayer layer1 {params1, {inputNode}, &initObject};
+    SGD.append(layer1.layerOperationParams);
 
-//     res = reluLayer1.result.value();
-//     LinearLayer layer2 {params2, {res}, &initObject};
-//     res = layer2.result.value();
-//     SGD.append(layer2.layerOperationParams);
+    TensorRef res = layer1.result.value();
+    ReLULayer reluLayer1  {{res}};
 
-//     MSELoss mseLoss {{res, trueNode}};
+    res = reluLayer1.result.value();
+    LinearLayer layer2 {params2, {res}, &initObject};
+    res = layer2.result.value();
+    SGD.append(layer2.layerOperationParams);
 
-//     auto &lastNode = mseLoss.result.value();
-//     lastNode.forward();
-//     lastNode.gradient = Blob::ones(Shape {{1}});
-//     lastNode.backward();
-//     Allocator::endSession();
-//     lastNode.clear();
-//     Allocator::endVirtualMode();
+    MSELoss mseLoss {{res, trueNode}};
 
-//     for (int j = 0; j < 500; ++j) {
-//         auto &result = lastNode.forward();
-//         lastNode.gradient = Blob::ones(Shape {{1}});
-//         printf("%d: %f\n", j, result(0, 0));
-//         lastNode.backward();
-//         SGD.step();
-//         Allocator::endSession();
-//         lastNode.clear();
-//     }
-//     auto &result2 = res.get().forward();
-//     print(result2);
-//     Allocator::endSession();
-//     }
+    auto &lastNode = mseLoss.result.value();
+    lastNode.forward();
+    lastNode.gradient = Blob::ones(Shape {{1}});
+    lastNode.backward();
+    Allocator::endSession();
+    lastNode.clear();
+    Allocator::endVirtualMode();
+
+    for (int j = 0; j < 500; ++j) {
+        lastNode.forward();
+        auto &result = lastNode.output.value();
+        lastNode.gradient = Blob::ones(Shape {{1}});
+        printf("%d: %f\n", j, result(0, 0));
+        lastNode.backward();
+        SGD.step();
+        Allocator::endSession();
+        lastNode.clear();
+    }
+    res.get().forward();
+    auto &result2 = res.get().output.value();
+    print(result2);
+    Allocator::endSession();
+    }
 
-//     Allocator::end();
-//     return 0;
-// }
+    Allocator::end();
+}
 
-// int main() {
-//     Allocator::startVirtualMode();
-//     {
-//     Conv2DLayerParameters paramsConv{3, 3, 1};
+void testConv() {
+    Allocator::startVirtualMode();
+    {
+    Conv2DLayerParameters paramsConv{3, 3, 1};
 
-//     auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input));
+    auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input));
 
-//     auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output));
+    auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output));
 
-//     RandomObject initObject(0, 1, 17);
-//     OptimizerBase SGD = OptimizerBase(0.1);
-//     Conv2DLayer convL = {paramsConv, {inputNode}, &initObject};
-//     SGD.append(convL.layerOperationParams);
+    RandomObject initObject(0, 1, 17);
+    OptimizerBase SGD = OptimizerBase(0.1);
+    Conv2DLayer convL = {paramsConv, {inputNode}, &initObject};
+    SGD.append(convL.layerOperationParams);
 
-//     TensorRef res = convL.result.value();
-//     res.get().forward();
-//     print(res.get().output);
+    TensorRef res = convL.result.value();
+    res.get().forward();
+    print(res.get().output);
 
-//     print(convL.kernel.output);
+    print(convL.kernel.output);
 
-//     res.get().gradient = Blob::ones(Shape {{1, 1, 5, 5}});
-//     res.get().backward();
+    res.get().gradient = Blob::ones(Shape {{1, 1, 5, 5}});
+    res.get().backward();
 
-//     print(convL.kernel.gradient);
+    print(convL.kernel.gradient);
 
-//     print(inputNode.gradient);
-//     Allocator::endSession();
-//     }
+    print(inputNode.gradient);
+    Allocator::endSession();
+    }
 
-//     Allocator::end();
-//     return 0;
-// }
+    Allocator::end();
+}
 
-// int main() {
-//     Allocator::startVirtualMode();
-//     {
-//     AxisParameters paramsConv{{1, 2, 3}};
+void testVar() {
+    Allocator::startVirtualMode();
+    {
+    AxisParameters paramsConv{{1, 2, 3}};
 
-//     auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input));
+    auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input));
 
-//     auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output));
-//     OptimizerBase SGD = OptimizerBase(0.1);
-//     VarLayer var = {paramsConv, {inputNode}};
+    auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output));
+    OptimizerBase SGD = OptimizerBase(0.1);
+    VarLayer var = {paramsConv, {inputNode}};
 
-//     TensorRef res = var.result.value();
-//     res.get().forward();
-//     print(res.get().output);
+    TensorRef res = var.result.value();
+    res.get().forward();
+    print(res.get().output);
 
-//     res.get().gradient = Blob::ones(res.get().output.value().shape);
-//     res.get().backward();
+    res.get().gradient = Blob::ones(res.get().output.value().shape);
+    res.get().backward();
 
-//     // print(convL.kernel.gradient);
+    // print(convL.kernel.gradient);
 
-//     print(inputNode.gradient);
-//     Allocator::endSession();
-//     }
+    print(inputNode.gradient);
+    Allocator::endSession();
+    }
 
-//     Allocator::end();
-//     return 0;
-// }
+    Allocator::end();
+}
 
-int main() {
+void testNorm() {
     Allocator::startVirtualMode();
     {
     AxisParameters paramsConv{{2, 3}};
@@ -198,5 +197,95 @@ int main() {
     }
 
     Allocator::end();
+}
+
+float inputL[] = {
+    10.f, 30.f,
+    25.f, 25.f,
+    1.f, 6.f
+};
+
+float outputL[] = { 0, 0, 1};
+
+void testEntropyLoss() {
+    Allocator::startVirtualMode();
+    {
+    CrossEntropyLossParameters params{2};
+
+    auto inputNode = Tensor(Blob::constBlob(Shape {{3, 1, 1, 2}}, inputL));
+
+    auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1, 1, 1}}, outputL));
+    OptimizerBase SGD = OptimizerBase(0.1);
+    EntropyLoss loss = {params, {inputNode, trueNode}};
+
+    TensorRef res = loss.result.value();
+    res.get().forward();
+    print(res.get().output);
+
+    // res.get().gradient = Blob::ones(res.get().output.value().shape);
+    // res.get().backward();
+
+    res.get().gradient = Blob::fill(res.get().output.value().shape, 1.f);
+    res.get().backward();
+
+    // print(convL.kernel.gradient);
+
+    print(inputNode.gradient);
+    Allocator::endSession();
+    }
+
+    Allocator::end();
+}
+
+float inputM[] = {
+    10.f, 30.f, 3.f, 4.f, 
+    10.f, 30.f, 3.f, 4.f, 
+    10.f, 30.f, 3.f, 4.f, 
+    10.f, 30.f, 3.f, 4.f,
+
+    25.f, 25.f, 5.f, 6.f, 
+    10.f, 30.f, 3.f, 4.f, 
+    10.f, 30.f, 3.f, 4.f, 
+    40.f, 30.f, 3.f, 4.f,
+
+    1.f, 6.f, 7.f, 8.f, 
+    27.f, 25.f, 5.f, 6.f, 
+    10.f, 30.f, 3.f, 4.f, 
+    10.f, 30.f, 3.f, -1.f
+};
+
+float outputM[] = { 0, 0, 1};
+
+void testMaxPoolLoss() {
+    Allocator::startVirtualMode();
+    {
+
+    auto inputNode = Tensor(Blob::constBlob(Shape {{3, 1, 4, 4}}, inputM));
+
+    auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1, 1, 1}}, outputM));
+    OptimizerBase SGD = OptimizerBase(0.1);
+    MaxPool max = {{inputNode}};
+
+    TensorRef res = max.result.value();
+    res.get().forward();
+    print(res.get().output);
+
+    // res.get().gradient = Blob::ones(res.get().output.value().shape);
+    // res.get().backward();
+
+    res.get().gradient = Blob::fill(res.get().output.value().shape, 1.f);
+    res.get().backward();
+
+    // print(convL.kernel.gradient);
+
+    print(inputNode.gradient);
+    Allocator::endSession();
+    }
+
+    Allocator::end();
+}
+
+int main() {
+    testMaxPoolLoss();
     return 0;
 }
\ No newline at end of file