Skip to content

Commit 5ef5791

Browse files
committed
refactor: reimplement Sollya generator using only Sollya scripts
This patch removes the Python dependency by rewriting the generator using pure Sollya scripting. It also applies general code cleanups and adds support for non-FMA targets.
1 parent 1fb0c2c commit 5ef5791

34 files changed

+4985
-17771
lines changed

npsr/common.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,29 @@
99
#include "precise.h"
1010

1111
#endif // NUMPY_SIMD_ROUTINES_NPSR_COMMON_H_
12+
13+
#if defined(NUMPY_SIMD_ROUTINES_NPSR_COMMON_FOREACH_H_) == \
14+
defined(HWY_TARGET_TOGGLE) // NOLINT
15+
#ifdef NUMPY_SIMD_ROUTINES_NPSR_COMMON_FOREACH_H_
16+
#undef NUMPY_SIMD_ROUTINES_NPSR_COMMON_FOREACH_H_
17+
#else
18+
#define NUMPY_SIMD_ROUTINES_NPSR_COMMON_FOREACH_H_
19+
#endif
20+
21+
HWY_BEFORE_NAMESPACE();
22+
namespace npsr::HWY_NAMESPACE {
23+
namespace hn = hwy::HWY_NAMESPACE;
24+
using hn::DFromV;
25+
using hn::MFromD;
26+
using hn::Rebind;
27+
using hn::RebindToUnsigned;
28+
using hn::TFromD;
29+
using hn::TFromV;
30+
using hn::VFromD;
31+
constexpr bool kNativeFMA = HWY_NATIVE_FMA != 0;
32+
33+
HWY_ATTR void DummyToSuppressUnusedWarning() {}
34+
} // namespace npsr::HWY_NAMESPACE
35+
HWY_AFTER_NAMESPACE();
36+
37+
#endif // NUMPY_SIMD_ROUTINES_NPSR_COMMON_FOREACH_H_

npsr/precise.h

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,15 @@ constexpr auto kLowAccuracy = _LowAccuracy{};
1818

1919
struct Round {
2020
struct _Force {};
21-
struct _Nearest {};
22-
struct _Down {};
23-
struct _Up {};
24-
struct _Zero {};
2521
static constexpr auto kForce = _Force{};
26-
static constexpr auto kNearest = _Nearest{};
27-
#if 0 // not used yet
28-
static constexpr auto kDown = _Down{};
29-
static constexpr auto kUp = _Up{};
30-
static constexpr auto kZero = _Zero{};
31-
#endif
3222
};
3323

3424
struct Subnormal {
3525
struct _DAZ {};
3626
struct _FTZ {};
3727
struct _IEEE754 {};
38-
#if 0 // not used yet
3928
static constexpr auto kDAZ = _DAZ{};
4029
static constexpr auto kFTZ = _FTZ{};
41-
#endif
4230
static constexpr auto kIEEE754 = _IEEE754{};
4331
};
4432

@@ -137,19 +125,6 @@ class Precise {
137125

138126
static constexpr bool kRoundForce =
139127
(std::is_same_v<Round::_Force, Args> || ...);
140-
static constexpr bool _kRoundNearest =
141-
(std::is_same_v<Round::_Nearest, Args> || ...);
142-
static constexpr bool kRoundZero =
143-
(std::is_same_v<Round::_Zero, Args> || ...);
144-
static constexpr bool kRoundDown =
145-
(std::is_same_v<Round::_Down, Args> || ...);
146-
static constexpr bool kRoundUp = (std::is_same_v<Round::_Up, Args> || ...);
147-
// only one rounding mode can be set
148-
static_assert((_kRoundNearest + kRoundDown + kRoundUp + kRoundZero) <= 1,
149-
"Only one rounding mode can be set at a time");
150-
// if no rounding mode is set, default to round nearest
151-
static constexpr bool kRoundNearest =
152-
_kRoundNearest || (!kRoundDown && !kRoundUp && !kRoundZero);
153128

154129
static constexpr bool kDAZ = (std::is_same_v<Subnormal::_DAZ, Args> || ...);
155130
static constexpr bool kFTZ = (std::is_same_v<Subnormal::_FTZ, Args> || ...);
@@ -162,17 +137,7 @@ class Precise {
162137
static constexpr bool kIEEE754 = _kIEEE754 || !(kDAZ || kFTZ);
163138

164139
private:
165-
int _NewRoundingMode() const {
166-
if constexpr (kRoundDown) {
167-
return FE_DOWNWARD;
168-
} else if constexpr (kRoundUp) {
169-
return FE_UPWARD;
170-
} else if constexpr (kRoundZero) {
171-
return FE_TOWARDZERO;
172-
} else {
173-
return FE_TONEAREST;
174-
}
175-
}
140+
int _NewRoundingMode() const { return FE_TONEAREST; }
176141
int _rounding_mode = 0;
177142
bool _retrieve_rounding_mode = false;
178143
fexcept_t _exceptions;

npsr/trig/data/approx.h

Lines changed: 1563 additions & 0 deletions
Large diffs are not rendered by default.

npsr/trig/data/approx.h.sol

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
suppressmessage(186, 185, 184);
2+
3+
procedure ApproxLut4_(pT, pFunc, pFuncDriv) {
4+
var r, i, $;
5+
6+
$.num_lut = match pT.kSize
7+
with 64: (2^9)
8+
default: (2^8);
9+
10+
$.low_round = match pT.kSize
11+
with 64: ([|24, RZ|])
12+
default: ([|pT.kDigits, RN|]);
13+
$.scale = 2.0 * pi / $.num_lut;
14+
15+
r = [||];
16+
for i from 0 to $.num_lut - 1 do {
17+
$.angle = i * $.scale;
18+
$.exact = pFunc($.angle);
19+
$.high = pT.kRound($.exact);
20+
$.low = pT.kRound(round($.exact - $.high, $.low_round[0], $.low_round[1]));
21+
22+
$.deriv_exact = pFuncDriv($.angle);
23+
$.k = ceil(log2(abs($.deriv_exact)));
24+
if ($.deriv_exact < 0) then $.k = -$.k;
25+
26+
$.sigma = 2.0^$.k;
27+
$.deriv = pT.kRound($.deriv_exact - $.sigma);
28+
r = r @ [|$.deriv, $.sigma, $.high, $.low|];
29+
};
30+
return ToStringCArray(r, pT.kCSFX, 4);
31+
};
32+
33+
Append(
34+
"template <typename T> constexpr char kSinApproxTable[] = {};",
35+
"template <> constexpr float kSinApproxTable<float>[] = ",
36+
ApproxLut4_(Float32, sin(x), cos(x)),
37+
"",
38+
"template <> constexpr double kSinApproxTable<double>[] = ",
39+
ApproxLut4_(Float64, sin(x), cos(x)),
40+
""
41+
);
42+
Append(
43+
"template <typename T> constexpr char kCosApproxTable[] = {};",
44+
"template <> constexpr float kCosApproxTable<float>[] = ",
45+
ApproxLut4_(Float32, cos(x), -sin(x)),
46+
"",
47+
"template <> constexpr double kCosApproxTable<double>[] = ",
48+
ApproxLut4_(Float64, cos(x), -sin(x)),
49+
""
50+
);
51+
52+
WriteCPPHeader("npsr::trig::data");

npsr/trig/data/constants.h

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Auto-generated by npsr/trig/data/constants.h.sol
2+
// Use `spin generate -f` to force regeneration
3+
#ifndef NPSR_TRIG_DATA_CONSTANTS_H
4+
#define NPSR_TRIG_DATA_CONSTANTS_H
5+
6+
namespace npsr::trig::data {
7+
template <typename T, bool FMA> constexpr char kPi[] = {};
8+
template <> constexpr float kPi<float, true>[] = {
9+
0x1.921fb6p1f, -0x1.777a5cp-24f, -0x1.ee59dap-49f,
10+
};
11+
template <> constexpr float kPi<float, false>[] = {
12+
0x1.92p1f, 0x1.fb4p-11f, 0x1.444p-23f, 0x1.68c234p-38f,
13+
};
14+
template <> constexpr double kPi<double, true>[] = {
15+
0x1.921fb54442d18p1, 0x1.1a62633145c06p-53, 0x1.c1cd129024e09p-106,
16+
};
17+
template <> constexpr double kPi<double, false>[] = {
18+
0x1.921fb6p1, -0x1.777a5cp-24, -0x1.ee59dap-49, 0x1.98a2e03707345p-76,
19+
};
20+
21+
template <bool FMA> constexpr double kPiPrec35[] = {
22+
0x1.921fb5444p1, 0x1.68c234c4c6628p-38,
23+
};
24+
template <> constexpr double kPiPrec35<false>[] = {
25+
0x1.921fb6p1, -0x1.777a5cp-24, -0x1.ee59dap-49,
26+
};
27+
28+
template <typename T> constexpr char kPiMul2[] = {};
29+
template <> constexpr float kPiMul2<float>[] = {
30+
0x1.921fb6p2f, -0x1.777a5cp-23f,
31+
};
32+
template <> constexpr double kPiMul2<double>[] = {
33+
0x1.921fb54442d18p2, 0x1.1a62633145c07p-52,
34+
};
35+
36+
template <bool FMA> constexpr double kPiDiv16Prec29[] = {
37+
0x1.921fb54442d18p-3, 0x1.1a62633p-57, 0x1.45c06e0e68948p-89,
38+
};
39+
template <> constexpr double kPiDiv16Prec29<false>[] = {
40+
0x1.921fb54p-3, 0x1.a626331p-61, 0x1.1701b839a252p-91, 0x1.10b461p-33,
41+
};
42+
43+
template <typename T> constexpr char kInvPi = '_';
44+
template <> constexpr float kInvPi<float> = 0x1.45f306p-2f;
45+
template <> constexpr double kInvPi<double> = 0x1.45f306dc9c883p-2;
46+
47+
template <typename T> constexpr char kHalfPi = '_';
48+
template <> constexpr float kHalfPi<float> = 0x1.921fb6p0f;
49+
template <> constexpr double kHalfPi<double> = 0x1.921fb54442d18p0;
50+
51+
template <typename T> constexpr char k16DivPi = '_';
52+
template <> constexpr float k16DivPi<float> = 0x1.45f306p2f;
53+
template <> constexpr double k16DivPi<double> = 0x1.45f306dc9c883p2;
54+
55+
} // namespace npsr::trig::data
56+
57+
#endif // NPSR_TRIG_DATA_CONSTANTS_H

npsr/trig/data/constants.h.sol

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
procedure ConstantsToArrayF32_(pArgs = ...) {
2+
return ToStringCArray(ConstantsFromArray(pArgs), "f", 4);
3+
};
4+
procedure ConstantsToArrayF64_(pArgs = ...) {
5+
return ToStringCArray(ConstantsFromArray(pArgs), "", 4);
6+
};
7+
8+
Append(
9+
"template <typename T, bool FMA> constexpr char kPi[] = {};",
10+
11+
"template <> constexpr float kPi<float, true>[] = " @
12+
ConstantsToArrayF32_(pi, [|RN, 24, 24, 24|]),
13+
"template <> constexpr float kPi<float, false>[] = " @
14+
ConstantsToArrayF32_(pi, [|RD, 11, 11, 11|], [|RN, 24|]), // no FMA
15+
16+
17+
"template <> constexpr double kPi<double, true>[] = " @
18+
ConstantsToArrayF64_(pi, [|RN, 53|], [|RD, 53|], [|RU, 53|]),
19+
"template <> constexpr double kPi<double, false>[] = " @
20+
ConstantsToArrayF64_(pi, [|RN, 24, 24, 24|], [|RN, 53|]), // no FMA
21+
22+
""
23+
);
24+
25+
Append(
26+
"template <bool FMA> constexpr double kPiPrec35[] = " @
27+
ConstantsToArrayF64_(pi, [|RN, 35|], [|RD, 53|]),
28+
"template <> constexpr double kPiPrec35<false>[] = " @
29+
ConstantsToArrayF64_(pi, [|RN, 24, 24, 24|]),
30+
""
31+
);
32+
33+
Append(
34+
"template <typename T> constexpr char kPiMul2[] = {};",
35+
36+
"template <> constexpr float kPiMul2<float>[] = " @
37+
ConstantsToArrayF32_(pi*2, [|RN, 24, 24|]),
38+
"template <> constexpr double kPiMul2<double>[] = " @
39+
ConstantsToArrayF64_(pi*2, [|RN, 53, 53|]),
40+
""
41+
);
42+
43+
vNFma = Constants(pi/16, [|RN, 27, 27|], [|RN, 29|], [|RN, 53|]);
44+
Append(
45+
"template <bool FMA> constexpr double kPiDiv16Prec29[] = " @
46+
ConstantsToArrayF64_(pi/16, [|RN, 53|], [|RN, 29|], [|RN, 53|]),
47+
"template <> constexpr double kPiDiv16Prec29<false>[] = " @
48+
ToStringCArray([|vNFma[0], vNFma[2], vNFma[3], vNFma[1]|], "", 4),
49+
""
50+
);
51+
52+
Append(
53+
"template <typename T> constexpr char kInvPi = '_';",
54+
"template <> constexpr float kInvPi<float> = " @
55+
single(1/pi) @ "f;",
56+
57+
"template <> constexpr double kInvPi<double> = " @
58+
double(1/pi) @ ";",
59+
""
60+
);
61+
62+
Append(
63+
"template <typename T> constexpr char kHalfPi = '_';",
64+
65+
"template <> constexpr float kHalfPi<float> = " @
66+
single(pi/2) @ "f;",
67+
68+
"template <> constexpr double kHalfPi<double> = " @
69+
double(pi/2) @ ";",
70+
""
71+
);
72+
73+
Append(
74+
"template <typename T> constexpr char k16DivPi = '_';",
75+
76+
"template <> constexpr float k16DivPi<float> = " @
77+
single(16/pi) @ "f;",
78+
79+
"template <> constexpr double k16DivPi<double> = " @
80+
double(16/pi) @ ";",
81+
""
82+
);
83+
84+
// Dump();
85+
86+
WriteCPPHeader("npsr::trig::data");
87+

npsr/trig/data/data.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Auto-generated by npsr/trig/data/data.h.sol
2+
// Use `spin generate -f` to force regeneration
3+
#ifndef NPSR_TRIG_DATA_DATA_H
4+
#define NPSR_TRIG_DATA_DATA_H
5+
6+
#include "npsr/trig/data/constants.h"
7+
#include "npsr/trig/data/high.h"
8+
#include "npsr/trig/data/approx.h"
9+
#include "npsr/trig/data/reduction.h"
10+
11+
#endif // NPSR_TRIG_DATA_DATA_H

npsr/trig/data/data.h.sol

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
var header;
2+
for header in [|"constants", "high", "approx", "reduction"|] do {
3+
Append(
4+
"#include \"npsr/trig/data/" @ header @ ".h\""
5+
);
6+
};
7+
8+
WriteCPPHeader();
9+
10+

npsr/trig/data/high.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Auto-generated by npsr/trig/data/high.h.sol
2+
// Use `spin generate -f` to force regeneration
3+
#ifndef NPSR_TRIG_DATA_HIGH_H
4+
#define NPSR_TRIG_DATA_HIGH_H
5+
6+
namespace npsr::trig::data {
7+
constexpr double kHiSinKPi16Table[] = {
8+
0,
9+
0x1.8f8b83c69a60bp-3,
10+
0x1.87de2a6aea963p-2,
11+
0x1.1c73b39ae68c8p-1,
12+
0x1.6a09e667f3bcdp-1,
13+
0x1.a9b66290ea1a3p-1,
14+
0x1.d906bcf328d46p-1,
15+
0x1.f6297cff75cbp-1,
16+
0x1p0,
17+
0x1.f6297cff75cbp-1,
18+
0x1.d906bcf328d46p-1,
19+
0x1.a9b66290ea1a3p-1,
20+
0x1.6a09e667f3bcdp-1,
21+
0x1.1c73b39ae68c8p-1,
22+
0x1.87de2a6aea963p-2,
23+
0x1.8f8b83c69a60bp-3,
24+
};
25+
26+
constexpr double kHiCosKPi16Table[] = {
27+
0x1p0,
28+
0x1.f6297cff75cbp-1,
29+
0x1.d906bcf328d46p-1,
30+
0x1.a9b66290ea1a3p-1,
31+
0x1.6a09e667f3bcdp-1,
32+
0x1.1c73b39ae68c8p-1,
33+
0x1.87de2a6aea963p-2,
34+
0x1.8f8b83c69a60bp-3,
35+
0,
36+
-0x1.8f8b83c69a60bp-3,
37+
-0x1.87de2a6aea963p-2,
38+
-0x1.1c73b39ae68c8p-1,
39+
-0x1.6a09e667f3bcdp-1,
40+
-0x1.a9b66290ea1a3p-1,
41+
-0x1.d906bcf328d46p-1,
42+
-0x1.f6297cff75cbp-1,
43+
};
44+
45+
constexpr double kPackedLowSinCosKPi16Table[] = {
46+
0, 0x1.56217bc626d19p-56, 0x1.457e6bc672cedp-56, 0x1.9f6303c8b25ddp-60,
47+
-0x1.bdd34bc8bdd34p-55, 0x1.b25dd3c39f63p-55, -0x1.72ced3c7457e6p-57, -0x1.26d193c756217p-57,
48+
0, 0x1.26d193c756217p-57, 0x1.72ced3c7457e6p-57, -0x1.b25dd3c39f63p-55,
49+
0x1.bdd34bc8bdd34p-55, -0x1.9f6303c8b25ddp-60, -0x1.457e6bc672cedp-56, -0x1.56217bc626d19p-56,
50+
};
51+
52+
} // namespace npsr::trig::data
53+
54+
#endif // NPSR_TRIG_DATA_HIGH_H

0 commit comments

Comments
 (0)