-
Notifications
You must be signed in to change notification settings - Fork 50
Open
Milestone
Description
Discovered in #808
To reproduce add the following test to ests/integration/src/rust_masm_tests/misc.rs:
/// Test that Vec reallocation correctly copies existing elements to the new buffer.
///
/// This test reproduces a bug in Vec reallocation. The test pushes 5 elements to a Vec (which
/// triggers reallocation from capacity 4 to 8), then verifies all elements are accessible.
#[test]
fn test_vec_realloc_copies_data() {
let main_fn = r#"() -> Felt {
extern crate alloc;
use alloc::vec::Vec;
// Create a Vec and push 5 Felt elements
// Vec typically starts with capacity 4, so the 5th push triggers reallocation
let mut v: Vec<Felt> = Vec::new();
v.push(felt!(11111));
v.push(felt!(22222));
v.push(felt!(33333));
v.push(felt!(44444));
v.push(felt!(55555)); // This triggers reallocation
// Sum all elements - if realloc doesn't copy, first 4 elements will be garbage
let sum = v[0] + v[1] + v[2] + v[3] + v[4];
// Expected: 11111 + 22222 + 33333 + 44444 + 55555 = 166665
sum
}"#;
let config = WasmTranslationConfig::default();
let mut test = CompilerTestBuilder::rust_fn_body_with_stdlib_sys(
"vec_realloc_copies_data",
main_fn,
config,
[],
)
.build();
test.expect_wasm(expect_file!["../../expected/vec_realloc_copies_data.wat"]);
test.expect_ir(expect_file!["../../expected/vec_realloc_copies_data.hir"]);
test.expect_masm(expect_file!["../../expected/vec_realloc_copies_data.masm"]);
let package = test.compiled_package();
let args: [Felt; 0] = [];
eval_package::<Felt, _, _>(&package, [], &args, &test.session, |trace| {
let result: u64 = trace.parse_result::<Felt>().unwrap().as_int();
// Expected sum: 11111 + 22222 + 33333 + 44444 + 55555 = 166665
// Result: The test fails with result 55658 instead of 166665, confirming that:
// The 5th element (55555) is stored correctly (55658 - 55555 = 103 came from some values
// at the first four elements)
assert_eq!(result, 166665, "Vec reallocation failed to copy existing elements");
Ok(())
})
.unwrap();
}Our memcpy is at
compiler/codegen/masm/src/emit/mem.rs
Lines 670 to 816 in ba60fd6
| pub fn memcpy(&mut self, span: SourceSpan) { | |
| let src = self.stack.pop().expect("operand stack is empty"); | |
| let dst = self.stack.pop().expect("operand stack is empty"); | |
| let count = self.stack.pop().expect("operand stack is empty"); | |
| assert_eq!(count.ty(), Type::U32, "expected count operand to be a u32"); | |
| let ty = src.ty(); | |
| assert!(ty.is_pointer()); | |
| assert_eq!(ty, dst.ty(), "expected src and dst operands to have the same type"); | |
| let value_ty = ty.pointee().unwrap(); | |
| let value_size = u32::try_from(value_ty.size_in_bytes()).expect("invalid value size"); | |
| // Use optimized intrinsics when available | |
| match value_size { | |
| // Word-sized values have an optimized intrinsic we can lean on | |
| 16 => { | |
| // We have to convert byte addresses to element addresses | |
| self.emit_all( | |
| [ | |
| // Convert `src` to element address, and assert aligned to an element address | |
| // | |
| // TODO: We should probably also assert that the address is word-aligned, but | |
| // that is going to happen anyway. That said, the closer to the source the | |
| // better for debugging. | |
| masm::Instruction::U32DivModImm(4.into()), | |
| masm::Instruction::Assertz, | |
| // Convert `dst` to an element address the same way | |
| masm::Instruction::Swap1, | |
| masm::Instruction::U32DivModImm(4.into()), | |
| masm::Instruction::Assertz, | |
| // Swap with `count` to get us into the correct ordering: [count, src, dst] | |
| masm::Instruction::Swap2, | |
| ], | |
| span, | |
| ); | |
| self.raw_exec("std::mem::memcopy_words", span); | |
| return; | |
| } | |
| // Values which can be broken up into word-sized chunks can piggy-back on the | |
| // intrinsic for word-sized values, but we have to compute a new `count` by | |
| // multiplying `count` by the number of words in each value | |
| size if size > 16 && size.is_multiple_of(16) => { | |
| let factor = size / 16; | |
| self.emit_all( | |
| [ | |
| // Convert `src` to element address, and assert aligned to an element address | |
| // | |
| // TODO: We should probably also assert that the address is word-aligned, but | |
| // that is going to happen anyway. That said, the closer to the source the | |
| // better for debugging. | |
| masm::Instruction::U32DivModImm(4.into()), | |
| masm::Instruction::Assertz, | |
| // Convert `dst` to an element address the same way | |
| masm::Instruction::Swap1, | |
| masm::Instruction::U32DivModImm(4.into()), | |
| masm::Instruction::Assertz, | |
| // Swap with `count` to get us into the correct ordering: [count, src, dst] | |
| masm::Instruction::Swap2, | |
| // Compute the corrected count | |
| masm::Instruction::U32OverflowingMulImm(factor.into()), | |
| masm::Instruction::Assertz, // [count * (size / 16), src, dst] | |
| ], | |
| span, | |
| ); | |
| self.raw_exec("std::mem::memcopy_words", span); | |
| return; | |
| } | |
| // For now, all other values fallback to the default implementation | |
| _ => (), | |
| } | |
| // Create new block for loop body and switch to it temporarily | |
| let mut body = Vec::default(); | |
| let mut body_emitter = OpEmitter::new(self.invoked, &mut body, self.stack); | |
| // Loop body - compute address for next value to be written | |
| // Compute the source and destination addresses | |
| body_emitter.emit_all( | |
| [ | |
| // [i, src, dst, count] | |
| masm::Instruction::Dup2, // [dst, i, src, dst, count] | |
| masm::Instruction::Dup1, // [i, dst, i, src, dst, count] | |
| ], | |
| span, | |
| ); | |
| body_emitter.emit_push(value_size, span); // [offset, i, dst, i, src, dst, count] | |
| body_emitter.emit_all( | |
| [ | |
| masm::Instruction::U32OverflowingMadd, | |
| masm::Instruction::Assertz, // [new_dst := i * offset + dst, i, src, dst, count] | |
| masm::Instruction::Dup2, // [src, new_dst, i, src, dst, count] | |
| masm::Instruction::Dup2, // [i, src, new_dst, i, src, dst, count] | |
| ], | |
| span, | |
| ); | |
| body_emitter.emit_push(value_size, span); // [offset, i, src, new_dst, i, src, dst, count] | |
| body_emitter.emit_all( | |
| [ | |
| masm::Instruction::U32OverflowingMadd, | |
| masm::Instruction::Assertz, // [new_src := i * offset + src, new_dst, i, src, dst, count] | |
| ], | |
| span, | |
| ); | |
| // Load the source value | |
| body_emitter.push(count.clone()); | |
| body_emitter.push(dst.clone()); | |
| body_emitter.push(src.clone()); | |
| body_emitter.push(Type::U32); | |
| body_emitter.push(dst.clone()); | |
| body_emitter.push(src.clone()); | |
| body_emitter.load(value_ty.clone(), span); // [value, new_dst, i, src, dst, count] | |
| // Write to the destination | |
| body_emitter.swap(1, span); // [new_dst, value, i, src, dst, count] | |
| body_emitter.store(span); // [i, src, dst, count] | |
| // Increment iteration count, determine whether to continue loop | |
| body_emitter.emit_all( | |
| [ | |
| masm::Instruction::U32WrappingAddImm(1.into()), | |
| masm::Instruction::Dup0, // [i++, i++, src, dst, count] | |
| masm::Instruction::Dup4, // [count, i++, i++, src, dst, count] | |
| masm::Instruction::U32Gte, // [i++ >= count, i++, src, dst, count] | |
| ], | |
| span, | |
| ); | |
| // Switch back to original block and emit loop header and 'while.true' instruction | |
| // | |
| // Loop header - prepare to loop until `count` iterations have been performed | |
| // [src, dst, count] | |
| self.emit_push(0u32, span); // [i, src, dst, count] | |
| self.emit(masm::Instruction::Dup3, span); // [count, i, src, dst, count] | |
| self.emit_push(Felt::ZERO, span); | |
| self.emit( | |
| masm::Instruction::Gte, // [count > 0, i, src, dst, count] | |
| span, | |
| ); | |
| self.current_block.push(masm::Op::While { | |
| span, | |
| body: masm::Block::new(span, body), | |
| }); | |
| // Cleanup - at end of 'while' loop, drop the 4 operands remaining on the stack | |
| self.dropn(4, span); | |
| } |
Metadata
Metadata
Assignees
Labels
No labels