|
5 | 5 |
|
6 | 6 | namespace BDT{
|
7 | 7 |
|
| 8 | +/* --- |
| 9 | +* Balanced tree reduce implementation. |
| 10 | +* Reduces an array of inputs to a single value using the template binary operator 'Op', |
| 11 | +* for example summing all elements with OpAdd, or finding the maximum with OpMax |
| 12 | +* Use only when the input array is fully unrolled. Or, slice out a fully unrolled section |
| 13 | +* before applying and accumulate the result over the rolled dimension. |
| 14 | +* Required for emulation to guarantee equality of ordering. |
| 15 | +* --- */ |
| 16 | +constexpr int floorlog2(int x) { return (x < 2) ? 0 : 1 + floorlog2(x / 2); } |
| 17 | + |
| 18 | +constexpr int pow2(int x) { return x == 0 ? 1 : 2 * pow2(x - 1); } |
| 19 | + |
| 20 | +template <class T, int N, class Op> T reduce(const T *x, Op op) { |
| 21 | + static constexpr int leftN = pow2(floorlog2(N - 1)) > 0 ? pow2(floorlog2(N - 1)) : 0; |
| 22 | + static constexpr int rightN = N - leftN > 0 ? N - leftN : 0; |
| 23 | + if (N == 1) { |
| 24 | + return x[0]; |
| 25 | + } |
| 26 | + if (N == 2) { |
| 27 | + return op(x[0], x[1]); |
| 28 | + } |
| 29 | + return op(reduce<T, leftN, Op>(x, op), reduce<T, rightN, Op>(x + leftN, op)); |
| 30 | +} |
| 31 | + |
| 32 | +template <class T> class OpAdd { |
| 33 | + public: |
| 34 | + T operator()(T a, T b) { return a + b; } |
| 35 | +}; |
| 36 | + |
| 37 | +// Number of trees given number of classes |
8 | 38 | constexpr int fn_classes(int n_classes){
|
9 |
| - // Number of trees given number of classes |
10 | 39 | return n_classes == 2 ? 1 : n_classes;
|
11 | 40 | }
|
12 | 41 |
|
@@ -99,23 +128,24 @@ struct BDT{
|
99 | 128 | public:
|
100 | 129 | score_t normalisation;
|
101 | 130 | score_t init_predict[fn_classes(n_classes)];
|
| 131 | + OpAdd<score_t> op_add; |
102 | 132 |
|
103 |
| - void tree_scores(input_t x, score_t scores[n_trees][fn_classes(n_classes)]) const; |
| 133 | + void tree_scores(input_t x, score_t scores[fn_classes(n_classes)][n_trees]) const; |
104 | 134 |
|
105 | 135 | void decision_function(input_t x, score_t score[fn_classes(n_classes)]) const{
|
106 |
| - score_t scores[n_trees][fn_classes(n_classes)]; |
| 136 | + score_t scores[fn_classes(n_classes)][n_trees]; |
107 | 137 | #pragma HLS ARRAY_PARTITION variable=scores dim=0
|
| 138 | + // Get predictions scores |
| 139 | + tree_scores(x, scores); |
| 140 | + // Reduce |
| 141 | + Reduce: |
108 | 142 | for(int j = 0; j < fn_classes(n_classes); j++){
|
| 143 | + // Init predictions |
109 | 144 | score[j] = init_predict[j];
|
| 145 | + // Sum predictions from trees via "reduce" method |
| 146 | + score[j] += reduce<score_t, n_trees, OpAdd<score_t>>(scores[j], op_add); |
110 | 147 | }
|
111 |
| - tree_scores(x, scores); |
112 |
| - Trees: |
113 |
| - for(int i = 0; i < n_trees; i++){ |
114 |
| - Classes: |
115 |
| - for(int j = 0; j < fn_classes(n_classes); j++){ |
116 |
| - score[j] += scores[i][j]; |
117 |
| - } |
118 |
| - } |
| 148 | + // Normalize predictions |
119 | 149 | for(int j = 0; j < fn_classes(n_classes); j++){
|
120 | 150 | score[j] *= normalisation;
|
121 | 151 | }
|
|
0 commit comments