diff --git a/server/core/Allocator.cpp b/server/core/Allocator.cpp
index 2ae4a83c..faa6fa29 100644
--- a/server/core/Allocator.cpp
+++ b/server/core/Allocator.cpp
@@ -9,7 +9,7 @@ using namespace std;
 
 Allocator* Allocator::instance = NULL;
 
-#define TMP_BUFF_SIZE 2048
+#define TMP_BUFF_SIZE 4096
 
 void Allocator::startVirtualMode() {
     assert(!instance);
diff --git a/server/core/Blob.cpp b/server/core/Blob.cpp
index 7df9baca..0c553584 100644
--- a/server/core/Blob.cpp
+++ b/server/core/Blob.cpp
@@ -117,7 +117,7 @@ std::ostream& operator<<(std::ostream& os, const Blob& b) {
         for (int k = 0; k < b.shape.dim3(); ++k){
             for (int i = 0; i < b.shape.rows(); ++i) {
                 for (int j = 0; j < b.shape.cols(); ++j)
-                    os << b(i, j) << " ";
+                    os << b(l, k, i, j) << " ";
                 os << std::endl;
             }
             os << std::endl;
diff --git a/server/core/Layer.cpp b/server/core/Layer.cpp
index 7f143a42..b83ec091 100644
--- a/server/core/Layer.cpp
+++ b/server/core/Layer.cpp
@@ -52,3 +52,65 @@ MSELoss::MSELoss(const std::vector<TensorRef>& args) : mean({0, 1, 2, 3}) {
 MultLayer::MultLayer(const std::vector<TensorRef>& args) {
     result = Tensor(mult, {args[0].get(), args[1].get()});
 }
+
+Conv2DLayer::Conv2DLayer(const Conv2DLayerParameters& params,
+                         const std::vector<TensorRef>& args,
+                         RandomObject* randomInit)
+    : kernel(Blob::constRandomBlob(
+        Shape {{params.outChannels, params.inChannels, params.kernelSize, params.kernelSize}}, 
+        randomInit)
+    ) {
+
+    layerOperationParams.push_back(kernel);
+
+    result = Tensor(conv, {args[0], kernel});
+}
+
+
+
+VarLayer::VarLayer(const AxisParameters& params,
+                   const std::vector<TensorRef>& args)
+    : mean(params.axis), meanMinusOne(params.axis, true), sum(params.axis) {
+    pipeline.reserve(5);
+    TensorRef tensor = args[0];
+    Tensor meanForVar(mean, {tensor});
+    pipeline.push_back(std::move(meanForVar));
+
+    Tensor fillForVar(fill, {tensor, pipeline[0]});
+    pipeline.push_back(std::move(fillForVar));
+
+    Tensor diff(sub, {tensor, pipeline[1]});
+    pipeline.push_back(std::move(diff));
+
+    Tensor square(sqr, {pipeline[2]});
+    pipeline.push_back(std::move(square));
+
+    result = Tensor(meanMinusOne, {pipeline[3]});     
+}
+
+LayerNorm::LayerNorm(const AxisParameters& params,
+                   const std::vector<TensorRef>& args)
+    : varLayer(params, args), mean(params.axis) {
+    pipeline.reserve(6);
+    TensorRef tensor = args[0];
+
+    Tensor mean_(mean, {tensor});
+    pipeline.push_back(std::move(mean_));
+
+    Tensor fill_(fill, {tensor, pipeline[0]});
+    pipeline.push_back(std::move(fill_));
+
+    Tensor diff(sub, {tensor, pipeline[1]});
+    pipeline.push_back(std::move(diff));
+
+    Tensor eps_(eps, {varLayer.result.value()});
+    pipeline.push_back(std::move(eps_));
+
+    Tensor root_(root, {pipeline[3]});
+    pipeline.push_back(std::move(root_));
+
+    Tensor _fill(fill, {tensor, pipeline[4]});
+    pipeline.push_back(std::move(_fill));
+
+    result = Tensor(div, {pipeline[2], pipeline[5]});
+    }
diff --git a/server/core/Layer.h b/server/core/Layer.h
index 8d9cbb50..6c4842b1 100644
--- a/server/core/Layer.h
+++ b/server/core/Layer.h
@@ -54,3 +54,36 @@ class MultLayer: public Layer {
     Multiply mult;
     MultLayer(const std::vector<TensorRef>& args);
 };
+
+class Conv2DLayer: public Layer {
+public:
+    Tensor kernel;
+    Conv2D conv;
+    Conv2DLayer(const Conv2DLayerParameters& params,
+        const std::vector<TensorRef>& args, RandomObject* randomInit = nullptr);
+};
+
+class VarLayer: public Layer {
+public:
+    Mean mean;
+    Mean meanMinusOne;
+    Fill fill;
+    Substract sub;
+    Square sqr;
+    SumAxis sum;
+    VarLayer(const AxisParameters& params,
+        const std::vector<TensorRef>& args);
+};
+
+class LayerNorm: public Layer {
+public:
+    VarLayer varLayer;
+    Mean mean;
+    Fill fill;
+    Substract sub;
+    EPS eps;
+    Root root;
+    Divide div;
+    LayerNorm(const AxisParameters& params,
+        const std::vector<TensorRef>& args);
+};
diff --git a/server/core/LazyBlob.cpp b/server/core/LazyBlob.cpp
index 3b6cd432..0085afb3 100644
--- a/server/core/LazyBlob.cpp
+++ b/server/core/LazyBlob.cpp
@@ -111,7 +111,9 @@ class LazyBlobSelfSum final: public LazyBlobReductOperation {
 
 class LazyBlobMean final: public LazyBlobReductOperation {
 public:
-    LazyBlobMean(const LazyBlob &a, std::vector<short> axis): LazyBlobReductOperation(a, axis) {};
+    bool minusOne;
+    LazyBlobMean(const LazyBlob &a, std::vector<short> axis, bool minusOne)
+    : LazyBlobReductOperation(a, axis), minusOne(minusOne) {};
 
     float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
         float result = 0;
@@ -132,6 +134,8 @@ class LazyBlobMean final: public LazyBlobReductOperation {
             count++;
         });
 
+        if (minusOne)
+            return result / (count - 1);
         return result / count;
     }
 };
@@ -182,6 +186,13 @@ class LazyBlobMult final: public LazyBlobStretchableOperation {
     LazyBlobMult(const LazyBlob &a, const LazyBlob &b): LazyBlobStretchableOperation(a, b, multiply) {};
 };
 
+class LazyBlobDivide final: public LazyBlobStretchableOperation {
+private:
+    static constexpr BinaryTransform divide = [](float x, float y) { return x / y; };
+public:
+    LazyBlobDivide(const LazyBlob &a, const LazyBlob &b): LazyBlobStretchableOperation(a, b, divide) {};
+};
+
 class LazyBlobDot final: public LazyBlobBinaryOperation {
 public:
     LazyBlobDot(const LazyBlob &a, const LazyBlob &b): LazyBlobBinaryOperation(a, b) {};
@@ -222,17 +233,37 @@ class LazyBlobCombine final: public LazyBlobBinaryOperation {
 
 class LazyBlobTranspose final: public LazyBlobUnaryOperation {
 public:
-    LazyBlobTranspose(const LazyBlob &a): LazyBlobUnaryOperation(a) {};
+    bool norm;
+    LazyBlobTranspose(const LazyBlob &a, bool norm = true): LazyBlobUnaryOperation(a), norm(norm) {};
+
+    void initShape() const final override { 
+        if (norm)
+            shape_ = Shape {{a.shape().dim4(), a.shape().dim3(), a.shape().cols(), a.shape().rows()}, a.shape().dimsCount};
+        else
+            shape_ = Shape {{a.shape().dim3(), a.shape().dim4(), a.shape().rows(), a.shape().cols()}, a.shape().dimsCount};
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        if (norm)
+            return a(k, l, j, i);
+        return a(l, k, i, j);
+    }
+};
+
+class LazyBlobReverse final: public LazyBlobUnaryOperation {
+public:
+    LazyBlobReverse(const LazyBlob &a): LazyBlobUnaryOperation(a) {};
 
     void initShape() const final override { 
-        shape_ = Shape {{a.shape().dim4(), a.shape().dim3(), a.shape().cols(), a.shape().rows()}, a.shape().dimsCount};
+        shape_ = Shape {{a.shape().dim4(), a.shape().dim3(), a.shape().rows(), a.shape().cols()}, a.shape().dimsCount};
     }
 
     float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
-        return a(k, l, j, i);
+        return a(k, l, a.shape().rows() - i - 1, a.shape().cols() - j - 1);
     }
 };
 
+
 class LazyBlobApply: public LazyBlobUnaryOperation {
 private:
     const UnaryTransform operation;
@@ -317,6 +348,11 @@ const LazyBlob& operator * (const LazyBlob &a, const LazyBlob &b) {
     return alloc2<LazyBlobMult>(a, b);
 }
 
+const LazyBlob& operator / (const LazyBlob &a, const LazyBlob &b) {
+    assertStretchable(a, b);
+    return alloc2<LazyBlobDivide>(a, b);
+}
+
 const LazyBlob& LazyBlob::dot(const LazyBlob& a) const {
     assert(shape().cols() == a.shape().rows());
     return alloc2<LazyBlobDot>(*this, a);
@@ -326,14 +362,23 @@ const LazyBlob& LazyBlob::transposed() const {
     return alloc1<LazyBlobTranspose>(*this);
 }
 
+const LazyBlob& LazyBlob::reverseLast2Dims() const {
+    return alloc1<LazyBlobReverse>(*this);
+}
+
+const LazyBlob& LazyBlob::transposeFirst2Dims() const {
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobTranspose));
+    return *(new(location) LazyBlobTranspose(*this, false));
+}
+
 const LazyBlob& LazyBlob::sum(std::vector<short> axis) const {
     void* location = Allocator::allocateBytes(sizeof(LazyBlobSelfSum));
     return *(new(location) LazyBlobSelfSum(*this, axis));
 }
 
-const LazyBlob& LazyBlob::mean(std::vector<short> axis) const {
+const LazyBlob& LazyBlob::mean(std::vector<short> axis, bool minusOne) const {
     void* location = Allocator::allocateBytes(sizeof(LazyBlobMean));
-    return *(new(location) LazyBlobMean(*this, axis));
+    return *(new(location) LazyBlobMean(*this, axis, minusOne));
 }
 
 const LazyBlob& LazyBlob::fill(Shape shape) const {
@@ -415,12 +460,106 @@ Blob& operator *= (Blob& a, const LazyBlob& b) {
     return a;
 }
 
+class LazyBlobConv: public LazyBlob {
+public:
+    const LazyBlob &a, &b;
+    const int size_r, size_c;
+    LazyBlobConv(const LazyBlob &a, const LazyBlob &b): 
+        a(a), b(b), 
+        size_r(b.shape().rows()),
+        size_c(b.shape().cols()) 
+        {};
+
+    float a_get(std::size_t k, std::size_t l, long i, long j) const {
+        if (i < 0 || j < 0 || i >= a.shape().rows() || j >= a.shape().cols())
+            return 0;
+        return a(k, l, i, j);
+    }
+
+    void initShape() const final override { 
+        // TODO: assert
+        shape_ = Shape {{a.shape().dim4(), b.shape().dim4(), a.shape().rows(), a.shape().cols()}, a.shape().dimsCount};
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        float res = 0;
+
+        for (size_t c = 0; c < b.shape().dim3(); ++c) {
+            for (long i1 = 0; i1 < size_r; ++i1) {
+                for (long j1 = 0; j1 < size_c; ++j1) {
+                    res += a_get(k, c, i + i1 - size_r / 2, j + j1 - size_c / 2) * b(l, c, i1, j1);
+                }
+            }
+        }
+        return res;
+    }
+};
+
+LazyBlob&  conv(const LazyBlob &a, const LazyBlob &b) {
+    assert(a.shape().dim3() == b.shape().dim3());
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobConv));
+    return *(new(location) LazyBlobConv(a, b));
+}
+
+class LazyBlobConvI: public LazyBlob {
+public:
+    const LazyBlob &a, &b;
+    const size_t kernelSize, index;
+    LazyBlobConvI(const LazyBlob &a, const LazyBlob &b, size_t kernelSize, size_t i): 
+        a(a), b(b), kernelSize(kernelSize), index(i) {};
+
+    float a_get(std::size_t k, std::size_t l, long i, long j) const {
+        if (i < 0 || j < 0 || i >= a.shape().rows() || j >= a.shape().cols())
+            return 0;
+        return a(k, l, i, j);
+    }
+
+    void initShape() const final override { 
+        shape_ = Shape {{b.shape().dim3(), a.shape().dim3(), kernelSize, kernelSize}, a.shape().dimsCount};
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        float res = 0;
+        for (long i1 = 0; i1 < a.shape().rows(); ++i1) {
+            for (long j1 = 0; j1 < a.shape().cols(); ++j1) {
+                res += a_get(index, l, i1 + i - kernelSize / 2, j1 + j - kernelSize / 2) * b(index, k, i1, j1);
+            }
+        }
+        return res;
+    }
+};
+
+LazyBlob&  conv_i(const LazyBlob &a, const LazyBlob &b, size_t kernelSize, size_t i) {
+    assert(a.shape().dim4() == b.shape().dim4());
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobConvI));
+    return *(new(location) LazyBlobConvI(a, b, kernelSize, i));
+}
+
+class LazyBlobZero: public LazyBlob {
+public:
+    const Shape myShape;
+    LazyBlobZero(const Shape& shape): myShape(shape) {};
+
+    void initShape() const final override {
+        shape_ = myShape;
+    }
+
+    float operator() (std::size_t k, std::size_t l, std::size_t i, std::size_t j) const override {
+        return 0;
+    }
+};
+
+LazyBlob&  zeroBlob(const Shape& shape) {
+    void* location = Allocator::allocateBytes(sizeof(LazyBlobZero));
+    return *(new(location) LazyBlobZero(shape));
+}
+
 std::ostream& operator<<(std::ostream& os, const LazyBlob &b) {
     for (int l = 0; l < b.shape().dim4(); ++l) {
         for (int k = 0; k < b.shape().dim3(); ++k){
             for (int i = 0; i < b.shape().rows(); ++i) {
                 for (int j = 0; j < b.shape().cols(); ++j)
-                    os << b(i, j) << " ";
+                    os << b(l, k, i, j) << " ";
                 os << std::endl;
             }
             os << std::endl;
diff --git a/server/core/LazyBlob.h b/server/core/LazyBlob.h
index c9dde2f8..349e8302 100644
--- a/server/core/LazyBlob.h
+++ b/server/core/LazyBlob.h
@@ -28,7 +28,9 @@ class LazyBlob {
     const LazyBlob& transposed() const;
     const LazyBlob& applying(const UnaryTransform t) const;
     const LazyBlob& sum(std::vector<short> axis) const;
-    const LazyBlob& mean(std::vector<short> axis) const;
+    const LazyBlob& mean(std::vector<short> axis, bool minusOne = false) const;
+    const LazyBlob& reverseLast2Dims() const;
+    const LazyBlob& transposeFirst2Dims() const;
 
     /// To repeat some dimensions several times
     /// - Parameter shape: the size we want to get
@@ -39,6 +41,7 @@ class LazyBlob {
     friend const LazyBlob& operator - (const LazyBlob &a);
     /// ELEMENT-WISE
     friend const LazyBlob& operator * (const LazyBlob &a, const LazyBlob &b);
+    friend const LazyBlob& operator / (const LazyBlob &a, const LazyBlob &b);
     /// MATRIX
     friend const LazyBlob& operator & (const LazyBlob &a, const LazyBlob &b);
 
@@ -50,6 +53,10 @@ class LazyBlob {
     friend std::ostream& operator<<(std::ostream& os, const LazyBlob& b);
 };
 
+LazyBlob&  conv(const LazyBlob &a, const LazyBlob &b);
+LazyBlob&  conv_i(const LazyBlob &a, const LazyBlob &b, std::size_t kernelSize, std::size_t i);
+LazyBlob& zeroBlob(const Shape& shape);
+
 class LazyBlobView final: public LazyBlob {
 private:
     const Blob &ref;
diff --git a/server/core/Operation.cpp b/server/core/Operation.cpp
index 903341f2..af707dac 100644
--- a/server/core/Operation.cpp
+++ b/server/core/Operation.cpp
@@ -1,4 +1,5 @@
 #include <cassert>
+#include <cmath>
 
 #include "Allocator.h"
 #include "Operation.h"
@@ -54,6 +55,21 @@ Shape Multiply::computeDim(const vector<LazyBlobRef>& args) const {
     return {{a.shape().rows(), b.shape().cols()}};
 }
 
+Blob Divide::compute(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    assert(a.shape() == b.shape());
+    return a / b;
+}
+vector<LazyBlobRef> Divide::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    return {grad / b, grad * (-a / (b * b))};
+}
+Shape Divide::computeDim(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    (void)b;
+    return {a.shape()};
+}
+
 Blob ReLU::compute(const vector<LazyBlobRef>& args) const {
     args1(a);
     return a.applying([](float x) { return x >= 0 ? x : 0; });
@@ -89,31 +105,94 @@ Blob Square::compute(const vector<LazyBlobRef>& args) const {
     args1(a);
     return a * a;
 }
+
 vector<LazyBlobRef> Square::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
     args1(a);
     return {2 * grad * a};
 }
+
 Shape Square::computeDim(const std::vector<LazyBlobRef>& args) const {
     args1(a);
-    return {{a.shape().rows(), a.shape().cols()}};
+    return {a.shape()};
+}
+
+Blob Root::compute(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.applying([](float x) { return std::sqrt(x); });
+}
+
+vector<LazyBlobRef> Root::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {combine(a, grad, [](float x, float g) { return g / (2 * std::sqrt(x)); })};
+}
+
+Shape Root::computeDim(const std::vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {a.shape()};
 }
 
 Mean::Mean(std::vector<short> axis): axis(axis) {}
+Mean::Mean(std::vector<short> axis, bool minusOne): axis(axis) {
+    this->minusOne = minusOne;
+}
 
 Blob Mean::compute(const vector<LazyBlobRef>& args) const {
     args1(a);
-    return a.mean(axis);
+    return a.mean(axis, minusOne);
 }
 
 vector<LazyBlobRef> Mean::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
     args1(a);
-    return { (grad / (a.shape().rows() * a.shape().cols())).fill(a.shape()) };
+    size_t count = 1; 
+    for (int dim: axis) {
+        count *= a.shape()[dim];
+    }
+    if (minusOne) {
+        count -= 1;
+    }
+    return { grad.lazy().fill(a.shape()) / count };
 }
 Shape Mean::computeDim(const vector<LazyBlobRef>& args) const {
     (void)args;
     return {{1, 1}};
 }
 
+SumAxis::SumAxis(std::vector<short> axis): axis(axis) {}
+
+Blob SumAxis::compute(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.sum(axis);
+}
+
+vector<LazyBlobRef> SumAxis::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return {grad.lazy().fill(a.shape())};
+}
+Shape SumAxis::computeDim(const vector<LazyBlobRef>& args) const {
+    (void)args;
+    return {{1, 1}};
+}
+
+Var::Var(std::vector<short> axis): axis(axis) {}
+
+Blob Var::compute(const vector<LazyBlobRef>& args) const {
+    size_t count = 1; 
+    args1(a);
+    for (int dim: axis) {
+        count *= a.shape()[dim];
+    }
+    return (a - a.mean(axis).fill(a.shape())).sum(axis) * (a - a.mean(axis).fill(a.shape())).sum(axis) / count;
+}
+
+vector<LazyBlobRef> Var::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return { (grad / (a.shape().rows() * a.shape().cols())).fill(a.shape()) };
+}
+Shape Var::computeDim(const vector<LazyBlobRef>& args) const {
+    (void)args;
+    return {{1, 1}};
+}
+
 Blob Substract::compute(const vector<LazyBlobRef>& args) const {
     args2(a, b);
     return a - b;
@@ -127,3 +206,70 @@ Shape Substract::computeDim(const vector<LazyBlobRef>& args) const {
     (void)b;
     return {{a.shape().rows(), a.shape().cols()}};
 }
+
+Blob Conv2D::compute(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    return conv(a, b);
+}
+vector<LazyBlobRef> Conv2D::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    Blob *bgrad = Allocator::allocateBlob(b.shape());
+    for (int i = 0; i < a.shape().dim4(); ++i) {
+        (*bgrad) += conv_i(a, grad.lazy(), b.shape().cols(), i);
+    }
+    return {
+        conv(grad, b.transposeFirst2Dims().reverseLast2Dims()), 
+        *bgrad
+    };
+}
+Shape Conv2D::computeDim(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    return {
+        {
+            a.shape().dim4(), 
+            b.shape().dim4(), 
+            a.shape().rows(), 
+            a.shape().cols()
+        }, 
+        a.shape().dimsCount
+    };
+}
+
+Blob Fill::compute(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    return b.fill(a.shape());
+}
+
+vector<LazyBlobRef> Fill::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    (void)a;
+    std::vector<short> axisForSum = {};
+    for (int i = 0; i < b.shape().getDims().size(); ++i) {
+        if (b.shape().getDims()[i] != grad.shape.getDims()[i]) {
+            axisForSum.push_back(i);
+        }
+    }
+    return {zeroBlob(a.shape()), grad.lazy().sum(axisForSum)};
+}
+
+Shape Fill::computeDim(const vector<LazyBlobRef>& args) const {
+    args2(a, b);
+    (void)b;
+    return a.shape();
+}
+
+Blob EPS::compute(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a + eps;
+}
+
+vector<LazyBlobRef> EPS::grad(const Blob& grad, const vector<LazyBlobRef>& args) const {
+    args1(a);
+    (void)a;
+    return {grad};
+}
+
+Shape EPS::computeDim(const vector<LazyBlobRef>& args) const {
+    args1(a);
+    return a.shape();
+}
diff --git a/server/core/Operation.h b/server/core/Operation.h
index ad8b6e79..c4192592 100644
--- a/server/core/Operation.h
+++ b/server/core/Operation.h
@@ -44,6 +44,13 @@ struct Multiply: Operation {
     Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
 };
 
+struct Divide: Operation {
+    std::string name = "Divide";
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
 struct BiasSum: Operation {
     mutable std::optional<Stretch*> stretch = std::nullopt;
     std::string name = "BiasSum";
@@ -59,10 +66,37 @@ struct Square: Operation {
     Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
 };
 
+struct Root: Operation {
+    std::string name = "Root";
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
 struct Mean: Operation {
     std::vector<short> axis;
     std::string name = "Mean";
+    bool minusOne = false;
     Mean(std::vector<short> axis);
+    Mean(std::vector<short> axis, bool minusOne);
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
+struct SumAxis: Operation {
+    std::vector<short> axis;
+    std::string name = "SumAxis";
+    SumAxis(std::vector<short> axis);
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
+struct Var: Operation {
+    std::vector<short> axis;
+    std::string name = "Var";
+    Var(std::vector<short> axis);
     Blob compute(const std::vector<LazyBlobRef>& args) const override;
     std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
     Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
@@ -74,3 +108,26 @@ struct Substract: Operation {
     std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
     Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
 };
+
+struct Conv2D: Operation {
+    std::string name = "Conv2D";
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
+struct Fill: Operation {
+    std::string name = "Fill";
+    mutable std::optional<Shape> shape = std::nullopt;
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
+
+struct EPS: Operation {
+    std::string name = "EPS";
+    const float eps = 1e-5;
+    Blob compute(const std::vector<LazyBlobRef>& args) const override;
+    std::vector<LazyBlobRef> grad(const Blob& gradient, const std::vector<LazyBlobRef>& args) const override;
+    Shape computeDim(const std::vector<LazyBlobRef>& args) const override;
+};
diff --git a/server/core/Parameters.h b/server/core/Parameters.h
index bb20cc20..4c3e82b5 100644
--- a/server/core/Parameters.h
+++ b/server/core/Parameters.h
@@ -1,12 +1,26 @@
 #pragma once
 
+#include <vector>
+
 struct LinearLayerParameters {
     std::size_t inFeatures;
     std::size_t outFeatures;
     bool bias;
 };
 
+struct Conv2DLayerParameters {
+    std::size_t kernelSize;
+    std::size_t inChannels;
+    std::size_t outChannels;
+};
+
 struct Data2dLayerParameters {
     std::size_t width;
     std::size_t height;
 };
+
+struct AxisParameters
+{
+    std::vector<short> axis;
+};
+
diff --git a/server/core/Tensor.cpp b/server/core/Tensor.cpp
index 525fad62..66ec35a0 100644
--- a/server/core/Tensor.cpp
+++ b/server/core/Tensor.cpp
@@ -9,7 +9,14 @@ using namespace std;
 static const Noop noop = Noop();
 
 void Tensor::getParentsData(vector<LazyBlobRef> &datas) {
-    for (auto p: parents) datas.push_back(p.get().forward());
+    for (auto p: parents) {
+        p.get().forward();
+    }
+
+    for (auto p: parents) {
+        assert(p.get().output.has_value());
+        datas.push_back(p.get().output.value());
+    }
 }
 
 void Tensor::getParentsGrads(vector<LazyBlobRef> &grads) {
@@ -42,16 +49,16 @@ Tensor& Tensor::operator = (Tensor&& other) noexcept {
     return *this;
 };
 
-const Blob& Tensor::forward() {
+void Tensor::forward() {
     if (!output) {
         vector<LazyBlobRef> datas;
+        datas.reserve(parents.size());
         getParentsData(datas);
         // here the blob's move constructor is used
         this->output = operation.compute(datas);
         // Don't need references to parents datas anymore
         Allocator::endSession();
     }
-    return *output;
 }
 
 void Tensor::backward() {
@@ -62,6 +69,7 @@ void Tensor::backward() {
     if (childrenGradReady < childrenCount) return;
 
     vector<LazyBlobRef> datas;
+    datas.reserve(parents.size());
     getParentsData(datas);
     auto grads = operation.grad(*gradient, datas);
     assert(grads.size() == parents.size());
diff --git a/server/core/Tensor.h b/server/core/Tensor.h
index ad268031..ef0fd49e 100644
--- a/server/core/Tensor.h
+++ b/server/core/Tensor.h
@@ -30,7 +30,7 @@ class Tensor {
     Tensor& operator = (Tensor&& other) noexcept;
 //    Tensor& operator=(const Tensor & other);
 
-    const Blob& forward();
+    void forward();
     void backward();
     void accumulate(const LazyBlob& gradient);
 
diff --git a/server/core/main.cpp b/server/core/main.cpp
index 1fc1677c..19b21c2a 100644
--- a/server/core/main.cpp
+++ b/server/core/main.cpp
@@ -10,72 +10,193 @@
 
 using namespace std;
 
+// float input[] = {
+//     0, 0,
+//     0, 1,
+//     1, 0,
+//     1, 1
+// };
+
+// float output[] = { 0, 1, 1, 0 };
+
 float input[] = {
-    0, 0,
-    0, 1,
-    1, 0,
-    1, 1
+    0.f, 0.f, 1.f, 1.f, 1.f,
+    0.f, 0.f, 0.f, 1.f, 1.f,
+    1.f, 0.f, 0.f, 1.f, 0.f, 
+    0.f, 1.f, 0.f, 0.f, 1.f, 
+    0.f, 0.f, 1.f, 0.f, 1.f,
+
+    0.f, 0.f, 1.f, 0.f, 1.f, 
+    1.f, 0.f, 0.f, 1.f, 0.f, 
+    1.f, 1.f, 1.f, 0.f, 0.f, 
+    0.f, 0.f, 1.f, 1.f, 0.f, 
+    0.f, 0.f, 0.f, 1.f, 1.f, 
+    
+    0.f, 0.f, 0.f, 0.f, 1.f, 
+    1.f, 0.f, 0.f, 0.f, 0.f,
+    1.f, 1.f, 0.f, 0.f, 0.f, 
+    0.f, 1.f, 1.f, 0.f, 0.f, 
+    0.f, 0.f, 1.f, 1.f, 1.f
 };
 
-float output[] = { 0, 1, 1, 0 };
+float output[] = { 0, 1, 1};
 
 void print(const Blob &a) {
     cout << a << endl;
 }
 
+void print(std::optional<Blob> &a) {
+    if (a.has_value()) {
+        cout << a.value() << endl;
+    }
+}
+
 static const Multiply multOperation;
 static const BiasSum sumOperation;
 static const ReLU reluOperation;
 
 thread_local int a = 0;
 
+// int main() {
+//     Allocator::startVirtualMode();
+//     {
+//     LinearLayerParameters params1{2ull, 2ull, true};
+//     LinearLayerParameters params2{2ull, 1ull, true};
+//     Conv2DLayerParameters paramsConv{7, 3, 1};
+
+//     auto inputNode = Tensor(Blob::constBlob(Shape {{4, 2}}, input));
+
+//     auto trueNode = Tensor(Blob::constBlob(Shape {{4, 1}}, output));
+
+//     RandomObject initObject(0, 1, 17);
+//     OptimizerBase SGD = OptimizerBase(0.1);
+//     LinearLayer layer1 {params1, {inputNode}, &initObject};
+//     SGD.append(layer1.layerOperationParams);
+
+//     TensorRef res = layer1.result.value();
+//     ReLULayer reluLayer1  {{res}};
+
+//     res = reluLayer1.result.value();
+//     LinearLayer layer2 {params2, {res}, &initObject};
+//     res = layer2.result.value();
+//     SGD.append(layer2.layerOperationParams);
+
+//     MSELoss mseLoss {{res, trueNode}};
+
+//     auto &lastNode = mseLoss.result.value();
+//     lastNode.forward();
+//     lastNode.gradient = Blob::ones(Shape {{1}});
+//     lastNode.backward();
+//     Allocator::endSession();
+//     lastNode.clear();
+//     Allocator::endVirtualMode();
+
+//     for (int j = 0; j < 500; ++j) {
+//         auto &result = lastNode.forward();
+//         lastNode.gradient = Blob::ones(Shape {{1}});
+//         printf("%d: %f\n", j, result(0, 0));
+//         lastNode.backward();
+//         SGD.step();
+//         Allocator::endSession();
+//         lastNode.clear();
+//     }
+//     auto &result2 = res.get().forward();
+//     print(result2);
+//     Allocator::endSession();
+//     }
+
+//     Allocator::end();
+//     return 0;
+// }
+
+// int main() {
+//     Allocator::startVirtualMode();
+//     {
+//     Conv2DLayerParameters paramsConv{3, 3, 1};
+
+//     auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input));
+
+//     auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output));
+
+//     RandomObject initObject(0, 1, 17);
+//     OptimizerBase SGD = OptimizerBase(0.1);
+//     Conv2DLayer convL = {paramsConv, {inputNode}, &initObject};
+//     SGD.append(convL.layerOperationParams);
+
+//     TensorRef res = convL.result.value();
+//     res.get().forward();
+//     print(res.get().output);
+
+//     print(convL.kernel.output);
+
+//     res.get().gradient = Blob::ones(Shape {{1, 1, 5, 5}});
+//     res.get().backward();
+
+//     print(convL.kernel.gradient);
+
+//     print(inputNode.gradient);
+//     Allocator::endSession();
+//     }
+
+//     Allocator::end();
+//     return 0;
+// }
+
+// int main() {
+//     Allocator::startVirtualMode();
+//     {
+//     AxisParameters paramsConv{{1, 2, 3}};
+
+//     auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input));
+
+//     auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output));
+//     OptimizerBase SGD = OptimizerBase(0.1);
+//     VarLayer var = {paramsConv, {inputNode}};
+
+//     TensorRef res = var.result.value();
+//     res.get().forward();
+//     print(res.get().output);
+
+//     res.get().gradient = Blob::ones(res.get().output.value().shape);
+//     res.get().backward();
+
+//     // print(convL.kernel.gradient);
+
+//     print(inputNode.gradient);
+//     Allocator::endSession();
+//     }
+
+//     Allocator::end();
+//     return 0;
+// }
+
 int main() {
     Allocator::startVirtualMode();
     {
-    LinearLayerParameters params1{2ull, 2ull, true};
-    LinearLayerParameters params2{2ull, 1ull, true};
-
-    auto inputNode = Tensor(Blob::constBlob(Shape {{4, 2}}, input));
+    AxisParameters paramsConv{{2, 3}};
 
-    auto trueNode = Tensor(Blob::constBlob(Shape {{4, 1}}, output));
+    auto inputNode = Tensor(Blob::constBlob(Shape {{3, 5, 5}}, input));
 
-    RandomObject initObject(0, 1, 17);
+    auto trueNode = Tensor(Blob::constBlob(Shape {{3, 1}}, output));
     OptimizerBase SGD = OptimizerBase(0.1);
-    LinearLayer layer1 {params1, {inputNode}, &initObject};
-    SGD.append(layer1.layerOperationParams);
+    LayerNorm norm = {paramsConv, {inputNode}};
 
-    TensorRef res = layer1.result.value();
-    ReLULayer reluLayer1  {{res}};
+    TensorRef res = norm.result.value();
+    res.get().forward();
+    print(res.get().output);
 
-    res = reluLayer1.result.value();
-    LinearLayer layer2 {params2, {res}, &initObject};
-    res = layer2.result.value();
-    SGD.append(layer2.layerOperationParams);
+    // res.get().gradient = Blob::ones(res.get().output.value().shape);
+    // res.get().backward();
 
-    MSELoss mseLoss {{res, trueNode}};
+    res.get().gradient = Blob::fill(res.get().output.value().shape, 100000000.f);
+    res.get().backward();
 
-    auto &lastNode = mseLoss.result.value();
-    lastNode.forward();
-    lastNode.gradient = Blob::ones(Shape {{1}});
-    lastNode.backward();
-    Allocator::endSession();
-    lastNode.clear();
-    Allocator::endVirtualMode();
-
-    for (int j = 0; j < 500; ++j) {
-        auto &result = lastNode.forward();
-        lastNode.gradient = Blob::ones(Shape {{1}});
-        printf("%d: %f\n", j, result(0, 0));
-        lastNode.backward();
-        SGD.step();
-        Allocator::endSession();
-        lastNode.clear();
-    }
-    auto &result2 = res.get().forward();
-    print(result2);
+    // print(convL.kernel.gradient);
+
+    print(inputNode.gradient);
     Allocator::endSession();
     }
 
     Allocator::end();
     return 0;
-}
+}
\ No newline at end of file