feat: cleaner code

UndefinedCpp · UndefinedCpp · commit 7ef72959f281 · 2025-07-18T09:58:00.000+08:00
diff --git a/gem.bin b/gem.bin
diff --git a/src/eval.cpp b/src/eval.cpp
@@ -3,6 +3,10 @@
 #include <array>
 #include <cmath>
 
+#define INPUT_SIZE 768
+#define FEATURE_SIZE 32
+#define LAYER1_SIZE 128
+
 std::pair<bool, Value> checkGameStatus(Position& board) {
     // Generate legal moves to validate checkmate or stalemate
     Movelist moves = board.legalMoves();
@@ -27,124 +31,156 @@ std::pair<bool, Value> checkGameStatus(Position& board) {
 
 namespace {
 
-class EvaluatorNet {
+struct alignas(32) Accumulator {
+    int white[FEATURE_SIZE];
+    int black[FEATURE_SIZE];
+};
+
+std::pair<size_t, size_t> getFeatureIndices(const Color color, const PieceType pt, Square sq) {
+    const bool   isWhite    = color == WHITE;
+    const size_t whiteIndex = ((int) (!isWhite) * 6 + (int) pt) * 64 + sq.index();
+    const size_t blackIndex = ((int) (isWhite) * 6 + (int) pt) * 64 + sq.flip().index();
+    return {whiteIndex, blackIndex};
+}
+
+class NNUEState {
 private:
-    const nnue::Weight& w_;
+    std::vector<Accumulator> accumulatorStack;
+    const nnue::Weight&      w;
 
 public:
-    EvaluatorNet() : w_(*nnue::weight) {}
+    inline Accumulator& curr() { return accumulatorStack.back(); }
 
-    int operator()(const int8_t* __restrict__ x1, const int8_t* __restrict__ x2) const {
-        // Set first layer bias
-        int32_t accumulator1[32] = {0};
-        for (int i = 0; i < 32; ++i) {
-            accumulator1[i] = w_.fc1_bias[i]; // auto-vectorizable
-        }
-        int32_t accumulator2[32] = {0};
-        for (int i = 0; i < 32; ++i) {
-            accumulator2[i] = w_.fc1_bias[i];
-        }
-        // Accumulate with weight
-        for (int i = 0; i < 768; ++i) {
-            for (int j = 0; j < 32; ++j) {
-                accumulator1[j] += w_.fc1_weight[i * 32 + j] * x1[i];
-            }
-        }
-        for (int i = 0; i < 768; ++i) {
-            for (int j = 0; j < 32; ++j) {
-                accumulator2[j] += w_.fc1_weight[i * 32 + j] * x2[i];
-            }
-        }
-        // Clamp to 0 and 32767
-        for (int i = 0; i < 32; ++i) {
-            accumulator1[i] = std::clamp(accumulator1[i], 0, 32767);
+public:
+    NNUEState() : w(*nnue::weight) {}
+
+    void push() {
+        Accumulator copy = curr();
+        accumulatorStack.push_back(copy);
+    }
+    void pop() { accumulatorStack.pop_back(); }
+
+    void reset(const Position& pos) {
+        // Create a new accumulator
+        Accumulator accum;
+        // Initialize with bias
+        for (int i = 0; i < FEATURE_SIZE; ++i) {
+            accum.white[i] = w.fc1_bias[i];
+            accum.black[i] = w.fc1_bias[i];
+        }
+        // Clear the stack and push the accumulator
+        accumulatorStack.clear();
+        accumulatorStack.push_back(std::move(accum));
+        // Call the update functions
+        Bitboard occ = pos.occ();
+        while (occ) {
+            Square sq = occ.pop();
+            Piece  p  = pos.at(sq);
+            update<true>(p, sq);
         }
-        for (int i = 0; i < 32; ++i) {
-            accumulator2[i] = std::clamp(accumulator2[i], 0, 32767);
+    }
+
+    template <bool activate> void update(const Piece piece, const Square square) {
+        update<activate>(piece.color(), piece.type(), square);
+    }
+
+    template <bool activate> void update(const Color color, const PieceType pt, const Square sq) {
+        const auto [wi, bi]      = getFeatureIndices(color, pt, sq);
+        constexpr int multiplier = (activate ? 1 : -1);
+        for (int i = 0; i < FEATURE_SIZE; ++i) {
+            curr().white[i] += w.fc1_weight[wi * FEATURE_SIZE + i] * multiplier;
         }
-        // Also compute features with square
-        int32_t acc1_sqr[32] = {0};
-        for (int i = 0; i < 32; ++i) {
-            acc1_sqr[i] = accumulator1[i] * accumulator1[i];
+        for (int i = 0; i < FEATURE_SIZE; ++i) {
+            curr().black[i] += w.fc1_weight[bi * FEATURE_SIZE + i] * multiplier;
         }
+    }
+
+    int evaluate(const Color color) {
+        const int* input1 = ((color == WHITE) ? curr().white : curr().black);
+        const int* input2 = ((color == WHITE) ? curr().black : curr().white);
+        // Clipped ReLU activation
+        int v1[FEATURE_SIZE], v2[FEATURE_SIZE];
         for (int i = 0; i < 32; ++i) {
-            acc1_sqr[i] >>= 15;
+            v1[i] = std::clamp(input1[i], 0, 32767);
+            v2[i] = std::clamp(input2[i], 0, 32767);
         }
-        int32_t acc2_sqr[32] = {0};
+        // Clipped square activation
+        int v1s[FEATURE_SIZE], v2s[FEATURE_SIZE];
         for (int i = 0; i < 32; ++i) {
-            acc2_sqr[i] = accumulator2[i] * accumulator2[i];
+            v1s[i] = v1[i] * v1[i];
+            v2s[i] = v2[i] * v2[i];
         }
         for (int i = 0; i < 32; ++i) {
-            acc2_sqr[i] >>= 15;
+            v1s[i] >>= 15;
+            v2s[i] >>= 15;
         }
-
-        // Values are ready to pass through dense layer.
-        int32_t acc  = w_.fc2_bias; // set bias
-        int32_t temp = 0;
+        // Pass through second layer
+        int temp[4] = {0};
         for (int i = 0; i < 32; ++i) {
-            temp += accumulator1[i] * w_.fc2_weight[i];
+            temp[0] += v1[i] * w.fc2_weight[i];
         }
-        acc += temp / 127;
-        temp = 0;
         for (int i = 0; i < 32; ++i) {
-            temp += acc1_sqr[i] * w_.fc2_weight[i + 32];
+            temp[1] += v1s[i] * w.fc2_weight[i + FEATURE_SIZE];
         }
-        acc += temp / 127;
-        temp = 0;
         for (int i = 0; i < 32; ++i) {
-            temp += accumulator2[i] * w_.fc2_weight[i + 64];
+            temp[2] += v2[i] * w.fc2_weight[i + FEATURE_SIZE * 2];
         }
-        acc += temp / 127;
-        temp = 0;
         for (int i = 0; i < 32; ++i) {
-            temp += acc2_sqr[i] * w_.fc2_weight[i + 96];
+            temp[3] += v2s[i] * w.fc2_weight[i + FEATURE_SIZE * 3];
         }
-        acc += temp / 127;
-
-        return acc / 152;
+        // Accumulate
+        int y = w.fc2_bias + temp[0] / 127 + temp[1] / 127 + temp[2] / 127 + temp[3] / 127;
+        y     = y / 152;
+        return y;
     }
 };
 
-void getInputRepresentationFor(const Board& pos, int8_t* v1, int8_t* v2) {
-    int8_t white[768] = {0};
-    int8_t black[768] = {0};
+// global instance
+NNUEState gNNUE;
 
-    const bool stm_white = (pos.sideToMove() == WHITE);
-
-    auto scan = [&](Bitboard bb, bool isWhite, int idx) {
-        while (bb) {
-            int sq            = bb.pop();
-            int whiteIndex    = ((int) (!isWhite) * 6 + idx) * 64 + sq;
-            white[whiteIndex] = 1;
-            int blackIndex    = ((int) (isWhite) * 6 + idx) * 64 + sq;
-            black[blackIndex] = 1;
-        }
-    };
+} // namespace
 
-    for (int p_index = 0; p_index < 6; ++p_index) {
-        PieceType pt = (PieceType::underlying) p_index;
-        scan(pos.pieces(pt, WHITE), true, p_index);
-        scan(pos.pieces(pt, BLACK), false, p_index);
-    }
+/**
+ * Main evaluation function.
+ */
+Value evaluate(Position& pos) {
+    gNNUE.reset(pos);
+    return gNNUE.evaluate(pos.sideToMove());
+}
 
-    if (stm_white) {
-        memcpy(v1, white, 768);
-        memcpy(v2, black, 768);
-    } else {
-        memcpy(v2, white, 768);
-        memcpy(v1, black, 768);
-    }
+/**
+ * Update evaluator state. This tells the net to incrementally
+ * update since you make some move.
+ */
+void updateEvaluatorState(const Position& pos, const Move& move) {
+    // const Piece pFrom = pos.at(move.from());
+    // const Square sFrom = move.from();
+    // const Piece pTo = pos.at(move.to());
+    // const Square sTo = move.to();
 }
 
-} // namespace
+/**
+ * This tells the net to refresh all accumulators.
+ */
+void updateEvaluatorState(const Position& pos) {
+    // gNNUE.reset(pos);
+}
 
 /**
- * Main evaluation function.
+ * This tells the net that you have undone a move.
  */
-Value evaluate(Position& pos) {
-    static EvaluatorNet net;
-    int8_t              vec1[768];
-    int8_t              vec2[768];
-    getInputRepresentationFor(pos, vec1, vec2);
-    return net(vec1, vec2);
+void updateEvaluatorState() {
+    // gNNUE.pop();
 }
+
+/**
+ * go depth 8
+info string tc 0 0
+info depth 1 score cp 32 nodes 21 seldepth 1 time 7 pv d2d4
+info depth 2 score cp 39 nodes 78 seldepth 2 time 10 pv d2d4
+info depth 3 score cp 21 nodes 693 seldepth 8 time 15 pv e2e4
+info depth 4 score cp 35 nodes 2027 seldepth 8 time 20 pv d2d4
+info depth 5 score cp 37 nodes 9006 seldepth 10 time 46 pv d2d4
+info depth 6 score cp 40 nodes 22119 seldepth 15 time 125 pv d2d4
+info depth 7 score cp 29 nodes 68574 seldepth 15 time 262 pv d2d4
+ */
diff --git a/src/eval.h b/src/eval.h
@@ -6,10 +6,10 @@
 /**
  * Checks if the game is over and returns the appropriate score.
  */
-std::pair<bool, Value> checkGameStatus(Position &board);
+std::pair<bool, Value> checkGameStatus(Position& board);
 
 // Override `operator<<` for fast printing of Values and Scores
-inline std::ostream &operator<<(std::ostream &os, const Value &s) {
+inline std::ostream& operator<<(std::ostream& os, const Value& s) {
     if (!s.isValid()) {
         os << "(invalid score)";
     } else {
@@ -22,9 +22,16 @@ inline std::ostream &operator<<(std::ostream &os, const Value &s) {
     return os;
 }
 // This is mostly for debugging purposes
-inline std::ostream &operator<<(std::ostream &os, const Score &s) {
-    os << "S(" << (int)s.mg << ", " << (int)s.eg << ")";
+inline std::ostream& operator<<(std::ostream& os, const Score& s) {
+    os << "S(" << (int) s.mg << ", " << (int) s.eg << ")";
     return os;
 }
 
-Value evaluate(Position &pos);
+Value evaluate(Position& pos);
+
+/**
+ * Update evaluator network state.
+ */
+void updateEvaluatorState(const Position& pos, const Move& move);
+void updateEvaluatorState(const Position& pos);
+void updateEvaluatorState();
diff --git a/src/search.cpp b/src/search.cpp
@@ -113,6 +113,7 @@ class Searcher {
             stack[i] = Scratchpad();
         }
         searchInterrupted = false;
+        updateEvaluatorState(pos);
 
         this->tc = timeControl;
 
@@ -128,14 +129,15 @@ class Searcher {
         // We don't have to waste time searching if there is only one reply in
         // competition
         if (tc.competitionMode && mp.size() == 1) {
+            updateEvaluatorState(pos); // refresh evaluator
             Value staticEval = evaluate(pos);
             result           = SearchResult::from(stats, tc, staticEval, mp.pick(), pvTable);
             return;
         }
 
         Value alpha        = Value::matedIn(0);
         Value beta         = Value::mateIn(0);
-        Value window       = 30;
+        Value window       = 18;
         Value bestEvalRoot = Value::none();
         Move  bestMoveRoot = Move::NO_MOVE;
 
@@ -210,6 +212,10 @@ class Searcher {
         if (hasReachedHardLimit()) {
             return alpha;
         }
+        // At root node, refresh evaluator
+        if (isRootNode) {
+            updateEvaluatorState(pos);
+        }
         // Go into quiescence search if no more plys are left to search
         if (depth <= 0) {
             return qsearch<NT>(alpha, beta, 10, ply);
@@ -345,6 +351,7 @@ class Searcher {
                 continue;
             }
 
+            updateEvaluatorState(pos, m);
             pos.makeMove(m);
             stack[ply].currentMove = m.move();
             Value score            = VALUE_NONE;
@@ -362,6 +369,7 @@ class Searcher {
             }
 
             pos.unmakeMove(m);
+            updateEvaluatorState();
             stack[ply].currentMove = 0;
 
             if (score > bestValue) {
@@ -483,12 +491,14 @@ class Searcher {
                 continue;
             }
 
+            updateEvaluatorState(pos, m);
             pos.makeMove(m);
             stack[ply].currentMove = m.move();
 
             const Value v = -qsearch<NT>(-beta, -alpha, depth - 1, ply + 1);
 
             pos.unmakeMove(m);
+            updateEvaluatorState();
             stack[ply].currentMove = 0;
 
             if (v > bestValue) {
diff --git a/src/weight.h b/src/weight.h
@@ -8,7 +8,7 @@ INCBIN(netWeight, "gem.bin");
 
 namespace nnue {
 
-struct Weight {
+struct alignas(32) Weight {
     int16_t fc1_weight[768 * 32];
     int16_t fc1_bias[32];
     int16_t fc2_weight[128];