Skip to content

Commit 06f699f

Browse files
committed
Change the memory representation of 64-bit, dual-limbed values to be little-endian.
In memory the low (least significant) 32-bit limb is now loaded or stored from/to `addr` and the high 32-bit limb from/to `addr+1`. On the stack the limb order is still big-endian.
1 parent 47bb21f commit 06f699f

File tree

62 files changed

+1241
-66
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+1241
-66
lines changed

codegen/masm/intrinsics/mem.masm

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ export.realign_dw # [chunk_hi, chunk_mid, chunk_lo, offset]
288288
swap.1 # [x_hi, x_lo]
289289
end
290290

291-
# Shift a double-word (64-bit, in two 32-bit chunks) value by the given offset.
291+
# Shift two 32-bit words by the given offset.
292292
#
293293
# Returns three 32-bit chunks [chunk_lo, chunk_mid, chunk_hi]
294294
export.offset_dw # [value_hi, value_lo, offset]
@@ -308,7 +308,7 @@ export.offset_dw # [value_hi, value_lo, offset]
308308
u32shr # [ chunk_lo, chunk_mid, chunk_hi]
309309
end
310310

311-
# Load a machine double-word (64-bit value, in two 32-bit chunks) to the operand stack
311+
# Load two 32-bit words to the operand stack
312312
export.load_dw # [addr, offset]
313313
# check for alignment and offset validity
314314
dup.1 eq.0 # [offset == 0, addr, offset]
@@ -326,16 +326,17 @@ export.load_dw # [addr, offset]
326326
else
327327
# unaligned; an unaligned double-word spans three elements
328328
#
329-
# convert offset from bytes to bits
330-
swap.1 push.8 u32wrapping_mul swap.1 # [addr, bit_offset]
331-
332-
# load the three elements containing the double-word on the stack
333-
dup.0 push.2 u32overflowing_add assertz mem_load # [e2, addr, bit_offset]
334-
dup.1 push.1 add mem_load # [e1, e2, addr, bit_offset]
335-
movup.2 mem_load # [e0, e1, e2, bit_offset]
336-
337-
# re-align it, and we're done
329+
# convert offset from bytes to bitcount from RHS of triplet (32 - offset * 8)
330+
swap.1 push.8 mul push.32 swap.1 sub swap.1 # [addr, bit_offset]
331+
332+
# load the three elements containing the double-word on the stack and re-align
333+
# NOTE: realign_dw expects the hi, mid, lo word inputs and returns hi, lo (i.e., everything
334+
# is 64-bit big-endian limb oriented), so we must swap the output to be in the correct order
335+
dup.0 mem_load # [e0, addr, bit_offset]
336+
dup.1 push.1 u32overflowing_add assertz mem_load # [e1, e0, addr, bit_offset]
337+
movup.2 push.2 u32overflowing_add assertz mem_load # [e2, e1, e0, bit_offset]
338338
exec.realign_dw
339+
swap.1
339340
end
340341
end
341342

@@ -434,7 +435,7 @@ export.store_sw # [addr, offset, value]
434435
end
435436
end
436437

437-
# Store a 64-bit value, i.e. two 32-bit machine words from the given native pointer tuple.
438+
# Store two 32-bit words to the given native pointer tuple.
438439
#
439440
# A native pointer tuple consists of an element address where the data begins, and a byte offset,
440441
# which is the offset of the first byte, in the 32-bit representation of that element.
@@ -536,7 +537,7 @@ end
536537
# Write `count` copies of `value` to memory, starting at `dst`.
537538
#
538539
# * `dst` is expected to be an address in byte-addressable space, _not_ an element address.
539-
# * `value` must be a 64-bit value or smaller
540+
# * `value` must be a two 32-bit words.
540541
export.memset_dw # [size, dst, count, value_hi, value_lo]
541542
# prepare to loop until `count` iterations have been performed
542543
push.0 # [i, dst, size, count, value_hi, value_lo]

codegen/masm/src/emit/mem.rs

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ impl OpEmitter<'_> {
6262
);
6363
match &ty {
6464
Type::I128 => self.load_quad_word(None, span),
65-
Type::I64 | Type::U64 => self.load_double_word(None, span),
65+
Type::I64 | Type::U64 => self.load_double_word_int(None, span),
6666
Type::Felt => self.load_felt(None, span),
6767
Type::I32 | Type::U32 => self.load_word(None, span),
6868
ty @ (Type::I16 | Type::U16 | Type::U8 | Type::I8 | Type::I1) => {
@@ -87,7 +87,7 @@ impl OpEmitter<'_> {
8787
let ptr = NativePtr::from_ptr(addr);
8888
match &ty {
8989
Type::I128 => self.load_quad_word(Some(ptr), span),
90-
Type::I64 | Type::U64 => self.load_double_word(Some(ptr), span),
90+
Type::I64 | Type::U64 => self.load_double_word_int(Some(ptr), span),
9191
Type::Felt => self.load_felt(Some(ptr), span),
9292
Type::I32 | Type::U32 => self.load_word(Some(ptr), span),
9393
Type::I16 | Type::U16 | Type::U8 | Type::I8 | Type::I1 => {
@@ -170,13 +170,17 @@ impl OpEmitter<'_> {
170170
}
171171
}
172172

173-
/// Load a pair of machine words (32-bit elements) to the operand stack
174-
fn load_double_word(&mut self, ptr: Option<NativePtr>, span: SourceSpan) {
173+
/// Load a 64-bit word from the given address.
174+
fn load_double_word_int(&mut self, ptr: Option<NativePtr>, span: SourceSpan) {
175175
if let Some(imm) = ptr {
176-
return self.load_double_word_imm(imm, span);
176+
self.load_double_word_imm(imm, span);
177+
} else {
178+
self.raw_exec("intrinsics::mem::load_dw", span);
177179
}
178180

179-
self.raw_exec("intrinsics::mem::load_dw", span);
181+
// The mem::intrinsic loads two 32-bit words with the first at the top of the stack. Swap
182+
// them to make a big-endian-limbed stack value.
183+
self.emit(masm::Instruction::Swap1, span);
180184
}
181185

182186
/// Load a sub-word value (u8, u16, etc.) from memory
@@ -538,7 +542,7 @@ impl OpEmitter<'_> {
538542
);
539543
match value_ty {
540544
Type::I128 => self.store_quad_word(None, span),
541-
Type::I64 | Type::U64 => self.store_double_word(None, span),
545+
Type::I64 | Type::U64 => self.store_double_word_int(None, span),
542546
Type::Felt => self.store_felt(None, span),
543547
Type::I32 | Type::U32 => self.store_word(None, span),
544548
ref ty if ty.size_in_bytes() <= 4 => self.store_small(ty, None, span),
@@ -566,7 +570,7 @@ impl OpEmitter<'_> {
566570
let ptr = NativePtr::from_ptr(addr);
567571
match value_ty {
568572
Type::I128 => self.store_quad_word(Some(ptr), span),
569-
Type::I64 | Type::U64 => self.store_double_word(Some(ptr), span),
573+
Type::I64 | Type::U64 => self.store_double_word_int(Some(ptr), span),
570574
Type::Felt => self.store_felt(Some(ptr), span),
571575
Type::I32 | Type::U32 => self.store_word(Some(ptr), span),
572576
ref ty if ty.size_in_bytes() <= 4 => self.store_small(ty, Some(ptr), span),
@@ -853,13 +857,18 @@ impl OpEmitter<'_> {
853857
}
854858
}
855859

856-
/// Store a pair of machine words (32-bit elements) to the operand stack
857-
fn store_double_word(&mut self, ptr: Option<NativePtr>, span: SourceSpan) {
860+
/// Store a 64-bit word to the operand stack
861+
fn store_double_word_int(&mut self, ptr: Option<NativePtr>, span: SourceSpan) {
862+
// The mem::intrinsic stores two 32-bit words in stack order. Swap them (the 3rd and 4th
863+
// params) first to make a little-endian-limbed memory value.
864+
self.emit(masm::Instruction::MovUp2, span);
865+
self.emit(masm::Instruction::MovDn3, span);
866+
858867
if let Some(imm) = ptr {
859-
return self.store_double_word_imm(imm, span);
868+
self.store_double_word_imm(imm, span);
869+
} else {
870+
self.raw_exec("intrinsics::mem::store_dw", span);
860871
}
861-
862-
self.raw_exec("intrinsics::mem::store_dw", span);
863872
}
864873

865874
fn store_double_word_imm(&mut self, ptr: NativePtr, span: SourceSpan) {

tests/integration/expected/abi_transform_stdlib_blake3_hash.masm

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,11 +156,14 @@ pub proc entrypoint(i32, i32)
156156
exec.::intrinsics::mem::load_dw
157157
trace.252
158158
nop
159+
swap.1
159160
push.24
160161
dup.5
161162
u32wrapping_add
162163
u32divmod.4
163164
swap.1
165+
movup.2
166+
movdn.3
164167
trace.240
165168
nop
166169
exec.::intrinsics::mem::store_dw
@@ -183,11 +186,14 @@ pub proc entrypoint(i32, i32)
183186
exec.::intrinsics::mem::load_dw
184187
trace.252
185188
nop
189+
swap.1
186190
push.16
187191
dup.5
188192
u32wrapping_add
189193
u32divmod.4
190194
swap.1
195+
movup.2
196+
movdn.3
191197
trace.240
192198
nop
193199
exec.::intrinsics::mem::store_dw
@@ -210,11 +216,14 @@ pub proc entrypoint(i32, i32)
210216
exec.::intrinsics::mem::load_dw
211217
trace.252
212218
nop
219+
swap.1
213220
push.8
214221
dup.5
215222
u32wrapping_add
216223
u32divmod.4
217224
swap.1
225+
movup.2
226+
movdn.3
218227
trace.240
219228
nop
220229
exec.::intrinsics::mem::store_dw
@@ -233,9 +242,12 @@ pub proc entrypoint(i32, i32)
233242
exec.::intrinsics::mem::load_dw
234243
trace.252
235244
nop
245+
swap.1
236246
movup.3
237247
u32divmod.4
238248
swap.1
249+
movup.2
250+
movdn.3
239251
trace.240
240252
nop
241253
exec.::intrinsics::mem::store_dw

tests/integration/expected/add_i128.masm

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ pub proc entrypoint(i32, [u32; 2], [u32; 2], [u32; 2], [u32; 2])
4444
assertz
4545
u32divmod.4
4646
swap.1
47+
movup.2
48+
movdn.3
4749
trace.240
4850
nop
4951
exec.::intrinsics::mem::store_dw
@@ -58,6 +60,8 @@ pub proc entrypoint(i32, [u32; 2], [u32; 2], [u32; 2], [u32; 2])
5860
assertz
5961
u32divmod.4
6062
swap.1
63+
movup.2
64+
movdn.3
6165
trace.240
6266
nop
6367
exec.::intrinsics::mem::store_dw

tests/integration/expected/add_u128.masm

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ pub proc entrypoint(i32, [u32; 2], [u32; 2], [u32; 2], [u32; 2])
4444
assertz
4545
u32divmod.4
4646
swap.1
47+
movup.2
48+
movdn.3
4749
trace.240
4850
nop
4951
exec.::intrinsics::mem::store_dw
@@ -58,6 +60,8 @@ pub proc entrypoint(i32, [u32; 2], [u32; 2], [u32; 2], [u32; 2])
5860
assertz
5961
u32divmod.4
6062
swap.1
63+
movup.2
64+
movdn.3
6165
trace.240
6266
nop
6367
exec.::intrinsics::mem::store_dw

tests/integration/expected/examples/auth_component_no_auth.masm

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ proc miden:base/authentication-component@1.0.0#auth-procedure(
109109
exec.::intrinsics::mem::load_dw
110110
trace.252
111111
nop
112+
swap.1
112113
push.56
113114
dup.3
114115
add
@@ -121,6 +122,8 @@ proc miden:base/authentication-component@1.0.0#auth-procedure(
121122
assertz
122123
u32divmod.4
123124
swap.1
125+
movup.2
126+
movdn.3
124127
trace.240
125128
nop
126129
exec.::intrinsics::mem::store_dw
@@ -143,6 +146,7 @@ proc miden:base/authentication-component@1.0.0#auth-procedure(
143146
exec.::intrinsics::mem::load_dw
144147
trace.252
145148
nop
149+
swap.1
146150
push.48
147151
dup.3
148152
add
@@ -155,6 +159,8 @@ proc miden:base/authentication-component@1.0.0#auth-procedure(
155159
assertz
156160
u32divmod.4
157161
swap.1
162+
movup.2
163+
movdn.3
158164
trace.240
159165
nop
160166
exec.::intrinsics::mem::store_dw
@@ -194,6 +200,7 @@ proc miden:base/authentication-component@1.0.0#auth-procedure(
194200
exec.::intrinsics::mem::load_dw
195201
trace.252
196202
nop
203+
swap.1
197204
push.56
198205
dup.3
199206
add
@@ -206,6 +213,8 @@ proc miden:base/authentication-component@1.0.0#auth-procedure(
206213
assertz
207214
u32divmod.4
208215
swap.1
216+
movup.2
217+
movdn.3
209218
trace.240
210219
nop
211220
exec.::intrinsics::mem::store_dw
@@ -228,6 +237,7 @@ proc miden:base/authentication-component@1.0.0#auth-procedure(
228237
exec.::intrinsics::mem::load_dw
229238
trace.252
230239
nop
240+
swap.1
231241
push.48
232242
dup.3
233243
add
@@ -240,6 +250,8 @@ proc miden:base/authentication-component@1.0.0#auth-procedure(
240250
assertz
241251
u32divmod.4
242252
swap.1
253+
movup.2
254+
movdn.3
243255
trace.240
244256
nop
245257
exec.::intrinsics::mem::store_dw
@@ -599,6 +611,7 @@ proc miden_stdlib_sys::intrinsics::word::Word::reverse(
599611
exec.::intrinsics::mem::load_dw
600612
trace.252
601613
nop
614+
swap.1
602615
push.8
603616
dup.3
604617
add
@@ -611,6 +624,8 @@ proc miden_stdlib_sys::intrinsics::word::Word::reverse(
611624
assertz
612625
u32divmod.4
613626
swap.1
627+
movup.2
628+
movdn.3
614629
trace.240
615630
nop
616631
exec.::intrinsics::mem::store_dw
@@ -630,6 +645,7 @@ proc miden_stdlib_sys::intrinsics::word::Word::reverse(
630645
exec.::intrinsics::mem::load_dw
631646
trace.252
632647
nop
648+
swap.1
633649
dup.2
634650
push.4
635651
dup.1
@@ -639,6 +655,8 @@ proc miden_stdlib_sys::intrinsics::word::Word::reverse(
639655
assertz
640656
u32divmod.4
641657
swap.1
658+
movup.2
659+
movdn.3
642660
trace.240
643661
nop
644662
exec.::intrinsics::mem::store_dw
@@ -786,6 +804,7 @@ proc miden_stdlib_sys::intrinsics::word::Word::reverse(
786804
exec.::intrinsics::mem::load_dw
787805
trace.252
788806
nop
807+
swap.1
789808
push.8
790809
dup.4
791810
add
@@ -798,6 +817,8 @@ proc miden_stdlib_sys::intrinsics::word::Word::reverse(
798817
assertz
799818
u32divmod.4
800819
swap.1
820+
movup.2
821+
movdn.3
801822
trace.240
802823
nop
803824
exec.::intrinsics::mem::store_dw
@@ -816,6 +837,7 @@ proc miden_stdlib_sys::intrinsics::word::Word::reverse(
816837
exec.::intrinsics::mem::load_dw
817838
trace.252
818839
nop
840+
swap.1
819841
movup.2
820842
push.8
821843
dup.1
@@ -825,6 +847,8 @@ proc miden_stdlib_sys::intrinsics::word::Word::reverse(
825847
assertz
826848
u32divmod.4
827849
swap.1
850+
movup.2
851+
movdn.3
828852
trace.240
829853
nop
830854
exec.::intrinsics::mem::store_dw

0 commit comments

Comments
 (0)