Skip to content

Commit 7249418

Browse files
committed
(u)lltod can now raise FE_INEXACT (disabled by default). Removed (u)itod for now
1 parent 192d97f commit 7249418

File tree

4 files changed

+194
-110
lines changed

4 files changed

+194
-110
lines changed

src/crt/ltod.src

Lines changed: 52 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
assume adl=1
22

3+
__lltod_signal_FE_INEXACT := 0
4+
35
;-------------------------------------------------------------------------------
46

57
section .text
@@ -49,54 +51,74 @@ __lltod_common:
4951
push hl
5052
push bc
5153
ld b, a
52-
ld c, 1
54+
ld c, a
55+
xor a, a
5356
.shift_loop:
54-
jr nc, .no_carry
55-
inc c
56-
.no_carry:
57+
adc a, 0
5758
srl h
5859
rr l
5960
djnz .shift_loop
60-
; test round bit
61+
; round upwards to even if (round && (guard || sticky))
6162
jr nc, .no_round
62-
; test sticky bits
63-
dec c
63+
; we must ensure that FE_INEXACT is raised since rounding has occured
64+
or a, a ; test sticky bits
6465
jr nz, .round_up
65-
; test guard bit
66-
bit 0, l
67-
jr nc, .no_round
66+
inc a ; ld a, 1
67+
and a, l ; test guard bit
68+
jr z, .no_round_inexact
6869
.round_up:
6970
inc b ; round up after shifting
7071
.no_round:
72+
if __lltod_signal_FE_INEXACT
73+
adc a, a ; test sticky and round bits
74+
jr z, .result_is_exact
75+
.no_round_inexact:
76+
ld hl, ___fe_cur_env
77+
set 5, (hl) ; FE_INEXACT
78+
.result_is_exact:
79+
else
80+
.no_round_inexact:
81+
end if
7182
ld h, b
83+
ld a, c
84+
ld l, c
7285
pop bc
7386

74-
ld l, a
7587
ex (sp), hl ; (SP) = shift
7688
call __llshru
7789
ex (sp), hl ; (SP) = shifted HL, H = rounding, L = shift
7890
add a, 51
7991

8092
dec h
81-
push af
82-
; exponent = ($400 + (base2_logarithm - 1)) << 4
83-
; BC = $4EEM
84-
ld l, a
85-
ld h, $04
86-
; clear the implicit mantissa bit
87-
res 4, c ; 52 % 8 == 4
88-
add hl, hl
89-
add hl, hl
90-
add hl, hl
91-
add hl, hl
92-
ld a, l
93-
or a, c
93+
jr nz, __int_to_f64_shl.no_rounding
94+
95+
dec a ; compensate for the implicit mantissa bit
96+
; BC/exponent = [$434*, $43E*]
97+
add a, a
98+
add a, a
99+
add a, a
100+
add a, a
101+
add a, c
94102
ld c, a
95-
ld b, h
96-
pop af
97103
pop hl ; restore shifted HL
98-
call z, __lladd_1 ; round up to even
104+
ld b, $43
105+
if 0
106+
; inlined __lladd_1
107+
inc hl
108+
add hl, de
109+
or a, a
110+
sbc hl, de
111+
jr nz, __int_to_f64_shl.finish
112+
inc de
113+
sbc hl, de
114+
add hl, de
115+
jr nz, __int_to_f64_shl.finish
116+
inc bc
99117
jr __int_to_f64_shl.finish
118+
else
119+
call __lladd_1 ; round up to even
120+
jr __int_to_f64_shl.finish
121+
end if
100122

101123
;-------------------------------------------------------------------------------
102124

@@ -116,31 +138,6 @@ __int_to_f64_zero_or_one:
116138
sbc hl, hl
117139
jr __int_to_f64_shl.finish
118140

119-
;-------------------------------------------------------------------------------
120-
121-
section .text
122-
123-
public __itod
124-
; (long double)int
125-
__itod:
126-
push hl
127-
add hl, hl ; extract signbit
128-
sbc hl, hl ; set Z flag
129-
ld e, l ; sign extend UHL to E:UHL
130-
pop hl
131-
jq __ltod
132-
133-
;-------------------------------------------------------------------------------
134-
135-
section .text
136-
137-
public __utod
138-
; (long double)unsigned int
139-
__utod:
140-
ld e, 0
141-
142-
require __ultod
143-
144141
;-------------------------------------------------------------------------------
145142

146143
section .text
@@ -160,16 +157,6 @@ __ultod:
160157
; (long double)long
161158
__ltod:
162159
bit 7, e
163-
164-
require __ltod.hijack_itod
165-
166-
;-------------------------------------------------------------------------------
167-
168-
section .text
169-
170-
private __ltod.hijack_itod
171-
__ltod.hijack_itod:
172-
173160
push af
174161
call nz, __lneg ; abs(E:UHL)
175162

@@ -211,17 +198,18 @@ __int_to_f64_shl:
211198
ld a, 51
212199
sub a, l
213200

201+
.no_rounding:
214202
; exponent = ($400 + (base2_logarithm - 1)) << 4
215203
; BC = $4EEM
216204
ld l, a
217205
ld h, $04
218206
; clear the implicit mantissa bit
219-
res 4, c ; 52 % 8 == 4
220207
add hl, hl
221208
add hl, hl
222209
add hl, hl
223210
add hl, hl
224211
ld a, l
212+
res 4, c ; 52 % 8 == 4
225213
or a, c
226214
ld c, a
227215
ld b, h
@@ -234,11 +222,11 @@ __int_to_f64_shl:
234222

235223
;-------------------------------------------------------------------------------
236224

237-
extern __ineg
238225
extern __lneg
239226
extern __lctlz
240227
extern __llctlz
241228
extern __llshl
242229
extern __llshru
243230
extern __llneg
244231
extern __lladd_1
232+
extern ___fe_cur_env
Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,44 @@
11
assume adl=1
22

3+
;-------------------------------------------------------------------------------
4+
5+
section .text
6+
7+
public _clear_fe_cur_env
8+
_clear_fe_cur_env:
9+
ld a, (___fe_cur_env)
10+
and a, -125 ; feclearexcept(FE_ALL_EXCEPT)
11+
ld (___fe_cur_env), a
12+
ret
13+
14+
public _get_fe_cur_env
15+
_get_fe_cur_env:
16+
ld a, (___fe_cur_env)
17+
ret
18+
19+
;-------------------------------------------------------------------------------
20+
321
section .text
422

5-
public _CRT_utod, _CRT_itod
23+
public _CRT_uitod, _CRT_itod
624

7-
_CRT_utod:
25+
_CRT_uitod:
826
ld hl, 3
927
add hl, sp
1028
ld hl, (hl)
11-
jp __utod
29+
jp __uitod
1230

1331
_CRT_itod:
1432
ld hl, 3
1533
add hl, sp
1634
ld hl, (hl)
1735
jp __itod
1836

19-
extern __utod
37+
;-------------------------------------------------------------------------------
38+
39+
extern __ultod
40+
extern __ltod
41+
extern ___fe_cur_env
42+
43+
extern __uitod
2044
extern __itod

test/floating_point/float64_from_integer/src/f64_from_integer_LUT.h

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ typedef struct { uint32_t u32; uint64_t u64; } input_type;
99

1010
typedef struct { uint64_t fu32; uint64_t fi32; uint64_t fu64; uint64_t fi64; } output_type;
1111

12-
static const input_type f64_from_integer_LUT_input[256] = {
12+
static const input_type f64_from_integer_LUT_input[259] = {
1313
/* 0 */ {UINT32_C(0x00000000), UINT64_C(0x0000000000000000)},
1414
/* 1 */ {UINT32_C(0x00000001), UINT64_C(0x0000000000000001)},
1515
/* 2 */ {UINT32_C(0xFFFFFFFF), UINT64_C(0xFFFFFFFFFFFFFFFF)},
1616
/* 3 */ {UINT32_C(0x7FFFFFFF), UINT64_C(0x7FFFFFFFFFFFFFFF)},
1717
/* 4 */ {UINT32_C(0x80000000), UINT64_C(0x8000000000000000)},
18-
/* 5 */ {UINT32_C(0xCFA72379), UINT64_C(0x9022BDBCE12368EA)},
19-
/* 6 */ {UINT32_C(0xBCFC9E4C), UINT64_C(0xC53B5C41E4F559D2)},
20-
/* 7 */ {UINT32_C(0x83930797), UINT64_C(0x2F954ADDBC9A079B)},
18+
/* 5 */ {UINT32_C(0x80000001), UINT64_C(0x8000000000000001)},
19+
/* 6 */ {UINT32_C(0x00000002), UINT64_C(0x0000000000000002)},
20+
/* 7 */ {UINT32_C(0xFFFFFFFE), UINT64_C(0xFFFFFFFFFFFFFFFE)},
2121
/* 8 */ {UINT32_C(0xC66AAAFC), UINT64_C(0x8B8B8D6D3691C649)},
2222
/* 9 */ {UINT32_C(0xB3FE2104), UINT64_C(0xA32AC22CB1C97A60)},
2323
/* 10 */ {UINT32_C(0xE02F635F), UINT64_C(0xB36FE887C58B1EC0)},
@@ -266,17 +266,20 @@ static const input_type f64_from_integer_LUT_input[256] = {
266266
/* 253 */ {UINT32_C(0x89FE6A31), UINT64_C(0x0B23A5C0041A0FEA)},
267267
/* 254 */ {UINT32_C(0x1469770E), UINT64_C(0xCDB4EDD42210BA66)},
268268
/* 255 */ {UINT32_C(0xD8B6EA42), UINT64_C(0x34931BF01A51A099)},
269+
/* 256 */ {UINT32_C(0xCFA72379), UINT64_C(0x9022BDBCE12368EA)},
270+
/* 257 */ {UINT32_C(0xBCFC9E4C), UINT64_C(0xC53B5C41E4F559D2)},
271+
/* 258 */ {UINT32_C(0x83930797), UINT64_C(0x2F954ADDBC9A079B)},
269272
};
270273

271-
const output_type f64_from_integer_LUT_output[256] = {
274+
const output_type f64_from_integer_LUT_output[259] = {
272275
/* 0 */ {UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000), UINT64_C(0x0000000000000000)},
273276
/* 1 */ {UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000), UINT64_C(0x3FF0000000000000)},
274277
/* 2 */ {UINT64_C(0x41EFFFFFFFE00000), UINT64_C(0xBFF0000000000000), UINT64_C(0x43F0000000000000), UINT64_C(0xBFF0000000000000)},
275278
/* 3 */ {UINT64_C(0x41DFFFFFFFC00000), UINT64_C(0x41DFFFFFFFC00000), UINT64_C(0x43E0000000000000), UINT64_C(0x43E0000000000000)},
276279
/* 4 */ {UINT64_C(0x41E0000000000000), UINT64_C(0xC1E0000000000000), UINT64_C(0x43E0000000000000), UINT64_C(0xC3E0000000000000)},
277-
/* 5 */ {UINT64_C(0x41E9F4E46F200000), UINT64_C(0xC1C82C6E43800000), UINT64_C(0x43E20457B79C246D), UINT64_C(0xC3DBF75090C7B726)},
278-
/* 6 */ {UINT64_C(0x41E79F93C9800000), UINT64_C(0xC1D0C0D86D000000), UINT64_C(0x43E8A76B883C9EAB), UINT64_C(0xC3CD6251DF0D8553)},
279-
/* 7 */ {UINT64_C(0x41E07260F2E00000), UINT64_C(0xC1DF1B3E1A400000), UINT64_C(0x43C7CAA56EDE4D04), UINT64_C(0x43C7CAA56EDE4D04)},
280+
/* 5 */ {UINT64_C(0x41E0000000200000), UINT64_C(0xC1DFFFFFFFC00000), UINT64_C(0x43E0000000000000), UINT64_C(0xC3E0000000000000)},
281+
/* 6 */ {UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000), UINT64_C(0x4000000000000000)},
282+
/* 7 */ {UINT64_C(0x41EFFFFFFFC00000), UINT64_C(0xC000000000000000), UINT64_C(0x43F0000000000000), UINT64_C(0xC000000000000000)},
280283
/* 8 */ {UINT64_C(0x41E8CD555F800000), UINT64_C(0xC1CCCAAA82000000), UINT64_C(0x43E17171ADA6D239), UINT64_C(0xC3DD1D1CA4B25B8E)},
281284
/* 9 */ {UINT64_C(0x41E67FC420800000), UINT64_C(0xC1D30077BF000000), UINT64_C(0x43E465584596392F), UINT64_C(0xC3D7354F74D38DA1)},
282285
/* 10 */ {UINT64_C(0x41EC05EC6BE00000), UINT64_C(0xC1BFD09CA1000000), UINT64_C(0x43E66DFD10F8B164), UINT64_C(0xC3D32405DE0E9D38)},
@@ -525,6 +528,9 @@ const output_type f64_from_integer_LUT_output[256] = {
525528
/* 253 */ {UINT64_C(0x41E13FCD46200000), UINT64_C(0xC1DD806573C00000), UINT64_C(0x43A6474B80083420), UINT64_C(0x43A6474B80083420)},
526529
/* 254 */ {UINT64_C(0x41B469770E000000), UINT64_C(0x41B469770E000000), UINT64_C(0x43E9B69DBA844217), UINT64_C(0xC3C9258915EEF7A3)},
527530
/* 255 */ {UINT64_C(0x41EB16DD48400000), UINT64_C(0xC1C3A48ADF000000), UINT64_C(0x43CA498DF80D28D0), UINT64_C(0x43CA498DF80D28D0)},
531+
/* 256 */ {UINT64_C(0x41E9F4E46F200000), UINT64_C(0xC1C82C6E43800000), UINT64_C(0x43E20457B79C246D), UINT64_C(0xC3DBF75090C7B726)},
532+
/* 257 */ {UINT64_C(0x41E79F93C9800000), UINT64_C(0xC1D0C0D86D000000), UINT64_C(0x43E8A76B883C9EAB), UINT64_C(0xC3CD6251DF0D8553)},
533+
/* 258 */ {UINT64_C(0x41E07260F2E00000), UINT64_C(0xC1DF1B3E1A400000), UINT64_C(0x43C7CAA56EDE4D04), UINT64_C(0x43C7CAA56EDE4D04)},
528534
};
529535

530536
#endif /* F64_FROM_INTEGER_LUT_H */

0 commit comments

Comments
 (0)