diff --git a/src/alloc/alloc_bytes.rs b/src/alloc/alloc_bytes.rs index 5d00d3eafc..30fcc01be0 100644 --- a/src/alloc/alloc_bytes.rs +++ b/src/alloc/alloc_bytes.rs @@ -12,8 +12,12 @@ use crate::helpers::ToU64 as _; #[derive(Clone, Debug)] pub enum MiriAllocParams { + /// Allocation was created by calling `alloc::alloc()`. Global, + /// Allocation came from the isolated allocator. Isolated(Rc>), + /// Page mapped elsewhere that we don't try to deallocate. + Forged(usize), } /// Allocation bytes that explicitly handle the layout of the data they're storing. @@ -27,8 +31,7 @@ pub struct MiriAllocBytes { /// * If `self.layout.size() == 0`, then `self.ptr` was allocated with the equivalent layout with size 1. /// * Otherwise, `self.ptr` points to memory allocated with `self.layout`. ptr: *mut u8, - /// Whether this instance of `MiriAllocBytes` had its allocation created by calling `alloc::alloc()` - /// (`Global`) or the discrete allocator (`Isolated`) + /// Metadata on where this allocation came from and therefore how to deallocate it. params: MiriAllocParams, } @@ -56,6 +59,13 @@ impl Drop for MiriAllocBytes { MiriAllocParams::Global => alloc::dealloc(self.ptr, alloc_layout), MiriAllocParams::Isolated(alloc) => alloc.borrow_mut().dealloc(self.ptr, alloc_layout), + // We can't nicely support mapping a page on one side of the FFI + // bound and freeing on the other, so just do nothing on an attempt + // to free. + // + // FIXME: Should emit an unsupported diagnostic when `libc::munmap()` + // is manually called on memory backed by forged bytes. + MiriAllocParams::Forged(_) => (), } } } @@ -121,6 +131,7 @@ impl AllocBytes for MiriAllocBytes { match params { MiriAllocParams::Global => alloc::alloc(layout), MiriAllocParams::Isolated(alloc) => alloc.borrow_mut().alloc(layout), + MiriAllocParams::Forged(addr) => std::ptr::with_exposed_provenance_mut(*addr), } }; let alloc_bytes = MiriAllocBytes::alloc_with(size.to_u64(), align, params, alloc_fn) @@ -141,6 +152,7 @@ impl AllocBytes for MiriAllocBytes { match params { MiriAllocParams::Global => alloc::alloc_zeroed(layout), MiriAllocParams::Isolated(alloc) => alloc.borrow_mut().alloc_zeroed(layout), + MiriAllocParams::Forged(addr) => std::ptr::with_exposed_provenance_mut(*addr), } }; MiriAllocBytes::alloc_with(size, align, params, alloc_fn).ok() diff --git a/src/alloc/isolated_alloc.rs b/src/alloc/isolated_alloc.rs index 1745727b16..ce33f8e9de 100644 --- a/src/alloc/isolated_alloc.rs +++ b/src/alloc/isolated_alloc.rs @@ -26,6 +26,9 @@ pub struct IsolatedAlloc { /// Pointers to multiple-page-sized allocations. These must also be page-aligned, /// with their size stored as the second element of the vector. huge_ptrs: Vec<(NonNull, usize)>, + /// Addresses of pages that we don't actually manage, but which were allocated + /// by foreign code and where we need to track accesses. + forged_pages: Vec>, /// The host (not emulated) page size. page_size: usize, } @@ -37,6 +40,7 @@ impl IsolatedAlloc { page_ptrs: Vec::new(), huge_ptrs: Vec::new(), page_infos: Vec::new(), + forged_pages: Vec::new(), // SAFETY: `sysconf(_SC_PAGESIZE)` is always safe to call at runtime // See https://www.man7.org/linux/man-pages/man3/sysconf.3.html page_size: unsafe { libc::sysconf(libc::_SC_PAGESIZE).try_into().unwrap() }, @@ -308,8 +312,31 @@ impl IsolatedAlloc { /// Returns a list of page ranges managed by the allocator, given in terms of pointers /// and size (in bytes). pub fn pages(&self) -> impl Iterator, usize)> { - let pages = self.page_ptrs.iter().map(|&p| (p, self.page_size)); - pages.chain(self.huge_ptrs.iter().copied()) + let with_pg_sz = |&p| (p, self.page_size); + let pages = self.page_ptrs.iter().map(with_pg_sz); + pages.chain(self.huge_ptrs.iter().copied()).chain(self.forged_pages.iter().map(with_pg_sz)) + } + + /// Makes the allocator also return this page address when `pages` is called. + pub fn forge_page(&mut self, addr: usize) { + assert!(addr.is_multiple_of(self.page_size), "Address is not page-aligned"); + assert!( + !self.forged_pages.iter().any(|a| a.addr().get() == addr), + "Page already contained" + ); + self.forged_pages.push(NonNull::new(std::ptr::with_exposed_provenance_mut(addr)).unwrap()); + } + + /// Deletes an entry from the list of forged pages. + pub fn remove_forged(&mut self, addr: usize) { + assert!(addr.is_multiple_of(self.page_size), "Address is not page-aligned"); + let (index, _) = self + .forged_pages + .iter() + .enumerate() + .find(|(_, p_addr)| addr == p_addr.addr().get()) + .expect("Page not contained"); + self.forged_pages.remove(index); } } diff --git a/src/shims/native_lib/mod.rs b/src/shims/native_lib/mod.rs index 914c666adb..e9988711e9 100644 --- a/src/shims/native_lib/mod.rs +++ b/src/shims/native_lib/mod.rs @@ -26,11 +26,27 @@ use self::ffi::OwnedArg; use crate::*; /// The final results of an FFI trace, containing every relevant event detected -/// by the tracer. +/// by the tracer. Events are ordered sequentially by the real time they occurred. +pub type MemEvents = Vec; + +/// Singular event occurring in an FFI call. #[derive(Serialize, Deserialize, Debug)] -pub struct MemEvents { - /// An list of memory accesses that occurred, in the order they occurred in. - pub acc_events: Vec, +pub enum SingleEvent { + Acc(AccessEvent), + Map(MapEvent), +} + +/// A single page in the address space being modified. Addresses must always be a +/// multiple of the system page size, and the event is assumed to span from the +/// address to `addr + page_size`. +/// +/// TODO: Support pages that are not (just) RW. +#[derive(Serialize, Deserialize, Clone, Debug)] +pub enum MapEvent { + /// A page was mapped with this base address. + Mmap(usize), + /// The page at this address was unmapped. + Munmap(usize), } /// A single memory access. @@ -209,58 +225,68 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { } /// Applies the `events` to Miri's internal state. The event vector must be - /// ordered sequentially by when the accesses happened, and the sizes are - /// assumed to be exact. - fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> { + /// ordered sequentially by when they occurred. + fn tracing_apply(&mut self, events: MemEvents) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + for evt in events { + match evt { + SingleEvent::Acc(acc) => this.tracing_apply_access(acc)?, + SingleEvent::Map(map) => this.tracing_apply_mapping(map)?, + } + } + interp_ok(()) + } + + /// Applies the possible effects of a single memory access. Sizes are assumed + /// to be exact. + fn tracing_apply_access(&mut self, acc: AccessEvent) -> InterpResult<'tcx> { let this = self.eval_context_mut(); - for evt in events.acc_events { - let evt_rg = evt.get_range(); - // LLVM at least permits vectorising accesses to adjacent allocations, - // so we cannot assume 1 access = 1 allocation. :( - let mut rg = evt_rg.addr..evt_rg.end(); - while let Some(curr) = rg.next() { - let Some(alloc_id) = - this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap()) - else { - throw_ub_format!("Foreign code did an out-of-bounds access!") - }; - let alloc = this.get_alloc_raw(alloc_id)?; - // The logical and physical address of the allocation coincide, so we can use - // this instead of `addr_from_alloc_id`. - let alloc_addr = alloc.get_bytes_unchecked_raw().addr(); - - // Determine the range inside the allocation that this access covers. This range is - // in terms of offsets from the start of `alloc`. The start of the overlap range - // will be `curr`; the end will be the minimum of the end of the allocation and the - // end of the access' range. - let overlap = curr.strict_sub(alloc_addr) - ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr)); - // Skip forward however many bytes of the access are contained in the current - // allocation, subtracting 1 since the overlap range includes the current addr - // that was already popped off of the range. - rg.advance_by(overlap.len().strict_sub(1)).unwrap(); - - match evt { - AccessEvent::Read(_) => { - // If a provenance was read by the foreign code, expose it. - for prov in alloc.provenance().get_range(this, overlap.into()) { - this.expose_provenance(prov)?; - } + let acc_rg = acc.get_range(); + // LLVM at least permits vectorising accesses to adjacent allocations, + // so we cannot assume 1 access = 1 allocation. :( + let mut rg = acc_rg.addr..acc_rg.end(); + while let Some(curr) = rg.next() { + let Some(alloc_id) = + this.alloc_id_from_addr(curr.to_u64(), rg.len().try_into().unwrap()) + else { + throw_ub_format!("Foreign code did an out-of-bounds access!") + }; + let alloc = this.get_alloc_raw(alloc_id)?; + // The logical and physical address of the allocation coincide, so we can use + // this instead of `addr_from_alloc_id`. + let alloc_addr = alloc.get_bytes_unchecked_raw().addr(); + + // Determine the range inside the allocation that this access covers. This range is + // in terms of offsets from the start of `alloc`. The start of the overlap range + // will be `curr`; the end will be the minimum of the end of the allocation and the + // end of the access' range. + let overlap = curr.strict_sub(alloc_addr) + ..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr)); + // Skip forward however many bytes of the access are contained in the current + // allocation, subtracting 1 since the overlap range includes the current addr + // that was already popped off of the range. + rg.advance_by(overlap.len().strict_sub(1)).unwrap(); + + match acc { + AccessEvent::Read(_) => { + // If a provenance was read by the foreign code, expose it. + for prov in alloc.provenance().get_range(this, overlap.into()) { + this.expose_provenance(prov)?; } - AccessEvent::Write(_, certain) => { - // Sometimes we aren't certain if a write happened, in which case we - // only initialise that data if the allocation is mutable. - if certain || alloc.mutability.is_mut() { - let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?; - alloc.process_native_write( - &cx.tcx, - Some(AllocRange { - start: Size::from_bytes(overlap.start), - size: Size::from_bytes(overlap.len()), - }), - ) - } + } + AccessEvent::Write(_, certain) => { + // Sometimes we aren't certain if a write happened, in which case we + // only initialise that data if the allocation is mutable. + if certain || alloc.mutability.is_mut() { + let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?; + alloc.process_native_write( + &cx.tcx, + Some(AllocRange { + start: Size::from_bytes(overlap.start), + size: Size::from_bytes(overlap.len()), + }), + ) } } } @@ -269,6 +295,40 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(()) } + /// Forges an allocation corresponding to a page mapping. + fn tracing_apply_mapping(&mut self, map: MapEvent) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + let kind = MemoryKind::Machine(MiriMemoryKind::Mmap); + + match map { + MapEvent::Mmap(addr) => { + let page_size = this.machine.page_size; + // Pretend an allocation was created at this address, and register + // it with the machine's allocator so it can track it. + let forged = Allocation::new( + Size::from_bytes(page_size), + rustc_abi::Align::from_bytes(page_size).unwrap(), + AllocInit::Zero, + crate::alloc::MiriAllocParams::Forged(addr), + ); + let ptr = this.insert_allocation(forged, kind)?; + this.expose_provenance(ptr.provenance)?; + // Also make sure accesses on this page are intercepted. + this.machine.allocator.as_mut().unwrap().borrow_mut().forge_page(addr); + } + MapEvent::Munmap(addr) => { + let ptr = this.ptr_from_addr_cast(addr.to_u64())?; + // This will call `munmap` on already-unmapped memory; that's fine, + // since we intentionally ignore the returned error from `munmap` + // to allow this without more invasive changes. + this.deallocate_ptr(ptr, None, kind)?; + this.machine.allocator.as_mut().unwrap().borrow_mut().remove_forged(addr); + } + } + + interp_ok(()) + } + /// Extract the value from the result of reading an operand from the machine /// and convert it to a `OwnedArg`. fn op_to_ffi_arg(&self, v: &OpTy<'tcx>, tracing: bool) -> InterpResult<'tcx, OwnedArg> { @@ -492,7 +552,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this.call_native_with_args(link_name, dest, code_ptr, &mut libffi_args)?; if tracing { - this.tracing_apply_accesses(maybe_memevents.unwrap())?; + this.tracing_apply(maybe_memevents.unwrap())?; } this.write_immediate(*ret, dest)?; diff --git a/src/shims/native_lib/trace/child.rs b/src/shims/native_lib/trace/child.rs index 95b0617a02..55f59d07e5 100644 --- a/src/shims/native_lib/trace/child.rs +++ b/src/shims/native_lib/trace/child.rs @@ -55,6 +55,17 @@ impl Supervisor { Ok(()) } + unsafe fn protect_pages_ignore_errs( + pages: impl Iterator, usize)>, + prot: mman::ProtFlags, + ) { + for (pg, sz) in pages { + unsafe { + let _ = mman::mprotect(pg.cast(), sz, prot); + }; + } + } + /// Performs an arbitrary FFI call, enabling tracing from the supervisor. /// As this locks the supervisor via a mutex, no other threads may enter FFI /// until this function returns. @@ -113,11 +124,12 @@ impl Supervisor { // SAFETY: We set memory back to normal, so this is safe. unsafe { - Self::protect_pages( + // Use the error-ignoring variant here, since it's possible that + // foreign code may have unmapped a page of ours. + Self::protect_pages_ignore_errs( alloc.pages(), mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE, - ) - .unwrap(); + ); } // Signal the supervisor that we are done. Will block until the supervisor continues us. diff --git a/src/shims/native_lib/trace/parent.rs b/src/shims/native_lib/trace/parent.rs index 3ae98259ab..c10fac3f37 100644 --- a/src/shims/native_lib/trace/parent.rs +++ b/src/shims/native_lib/trace/parent.rs @@ -6,7 +6,7 @@ use nix::unistd; use super::CALLBACK_STACK_SIZE; use super::messages::{Confirmation, StartFfiInfo, TraceRequest}; -use crate::shims::native_lib::{AccessEvent, AccessRange, MemEvents}; +use crate::shims::native_lib::{AccessEvent, AccessRange, MapEvent, MemEvents, SingleEvent}; /// The flags to use when calling `waitid()`. const WAIT_FLAGS: wait::WaitPidFlag = @@ -39,6 +39,15 @@ static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1); trait ArchIndependentRegs { /// Gets the address of the instruction pointer. fn ip(&self) -> usize; + /// Gets the syscall number from the registers (if entering one). + /// Since we compare these to the `libc` values, keep it as `c_long`. + fn syscall_nr(&self) -> libc::c_long; + // Gets the first register-sized argument passed to a function. + fn arg1(&self) -> usize; + // Gets the first register-sized argument passed to a function. + fn arg2(&self) -> usize; + // Gets the register-sized return value of a function call. + fn retval(&self) -> usize; /// Set the instruction pointer; remember to also set the stack pointer, or /// else the stack might get messed up! fn set_ip(&mut self, ip: usize); @@ -54,6 +63,14 @@ impl ArchIndependentRegs for libc::user_regs_struct { #[inline] fn ip(&self) -> usize { self.rip.try_into().unwrap() } #[inline] + fn syscall_nr(&self) -> libc::c_long { self.orig_rax.cast_signed() } + #[inline] + fn arg1(&self) -> usize { self.rdi.try_into().unwrap() } + #[inline] + fn arg2(&self) -> usize { self.rsi.try_into().unwrap() } + #[inline] + fn retval(&self) -> usize { self.rax.try_into().unwrap() } + #[inline] fn set_ip(&mut self, ip: usize) { self.rip = ip.try_into().unwrap() } #[inline] fn set_sp(&mut self, sp: usize) { self.rsp = sp.try_into().unwrap() } @@ -65,6 +82,14 @@ impl ArchIndependentRegs for libc::user_regs_struct { #[inline] fn ip(&self) -> usize { self.eip.cast_unsigned().try_into().unwrap() } #[inline] + fn syscall_nr(&self) -> libc::c_long { self.orig_eax } + #[inline] + fn arg1(&self) -> usize { self.ebx.cast_unsigned().try_into().unwrap() } + #[inline] + fn arg2(&self) -> usize { self.ecx.cast_unsigned().try_into().unwrap() } + #[inline] + fn retval(&self) -> usize { self.eax.cast_unsigned().try_into().unwrap() } + #[inline] fn set_ip(&mut self, ip: usize) { self.eip = ip.cast_signed().try_into().unwrap() } #[inline] fn set_sp(&mut self, sp: usize) { self.esp = sp.cast_signed().try_into().unwrap() } @@ -73,6 +98,7 @@ impl ArchIndependentRegs for libc::user_regs_struct { /// A unified event representing something happening on the child process. Wraps /// `nix`'s `WaitStatus` and our custom signals so it can all be done with one /// `match` statement. +#[derive(Debug)] pub enum ExecEvent { /// Child process requests that we begin monitoring it. Start(StartFfiInfo), @@ -188,7 +214,13 @@ impl Iterator for ChildListener { #[derive(Debug)] pub struct ExecEnd(pub Option); -/// Whether to call `ptrace::cont()` immediately. Used exclusively by `wait_for_signal`. +/// What to wait for. Used exclusively by `wait_for`. +enum WaitFor { + Signal(signal::Signal), + Syscall, +} + +/// Whether to call `ptrace::syscall()` immediately. Used exclusively by `wait_for`. enum InitialCont { Yes, No, @@ -208,7 +240,7 @@ pub fn sv_loop( assert_ne!(page_size, 0); // Things that we return to the child process. - let mut acc_events = Vec::new(); + let mut events = Vec::new(); // Memory allocated for the MiriMachine. let mut ch_pages = Vec::new(); @@ -222,7 +254,7 @@ pub fn sv_loop( let mut curr_pid = init_pid; // There's an initial sigstop we need to deal with. - wait_for_signal(Some(curr_pid), signal::SIGSTOP, InitialCont::No)?; + wait_for(Some(curr_pid), WaitFor::Signal(signal::SIGSTOP), InitialCont::No)?; ptrace::cont(curr_pid, None).unwrap(); for evt in listener { @@ -241,16 +273,17 @@ pub fn sv_loop( confirm_tx.send(Confirmation).unwrap(); // We can't trust simply calling `Pid::this()` in the child process to give the right // PID for us, so we get it this way. - curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No).unwrap(); + curr_pid = + wait_for(None, WaitFor::Signal(signal::SIGSTOP), InitialCont::No).unwrap(); // Continue until next syscall. ptrace::syscall(curr_pid, None).unwrap(); } // Child wants to end tracing. ExecEvent::End => { // Hand over the access info we traced. - event_tx.send(MemEvents { acc_events }).unwrap(); + event_tx.send(events).unwrap(); // And reset our values. - acc_events = Vec::new(); + events = Vec::new(); ch_stack = None; // No need to monitor syscalls anymore, they'd just be ignored. @@ -262,14 +295,11 @@ pub fn sv_loop( // If it was a segfault, check if it was an artificial one // caused by it trying to access the MiriMachine memory. signal::SIGSEGV => - handle_segfault( - pid, - &ch_pages, - ch_stack.unwrap(), - page_size, - &cs, - &mut acc_events, - )?, + events.extend( + handle_segfault(pid, &ch_pages, ch_stack.unwrap(), page_size, &cs)? + .into_iter() + .map(SingleEvent::Acc), + ), // Something weird happened. _ => { eprintln!("Process unexpectedly got {signal}; continuing..."); @@ -281,8 +311,20 @@ pub fn sv_loop( } } }, - // Child entered or exited a syscall. For now we ignore this and just continue. + // Child entered or exited a syscall. ExecEvent::Syscall(pid) => { + let regs = ptrace::getregs(pid).unwrap(); + let evts = match regs.syscall_nr() { + #[cfg(not(target_arch = "x86"))] + libc::SYS_mmap => handle_mmap(pid, page_size)?, + // x86 also has mmap2 which is for our purposes identical. + #[cfg(target_arch = "x86")] + libc::SYS_mmap | libc::SYS_mmap2 => handle_mmap(pid, page_size)?, + libc::SYS_munmap => handle_munmap(pid, page_size)?, + // TODO: Handle sbrk (or not, if you use that you probably deserve UB). + _ => vec![], + }; + events.extend(evts.into_iter().map(SingleEvent::Map)); ptrace::syscall(pid, None).unwrap(); } ExecEvent::Died(code) => { @@ -310,17 +352,17 @@ fn get_disasm() -> capstone::Capstone { .unwrap() } -/// Waits for `wait_signal`. If `init_cont`, it will first do a `ptrace::cont`. +/// Waits for `wait`. If `init_cont`, it will first do a `ptrace::syscall`. /// We want to avoid that in some cases, like at the beginning of FFI. /// /// If `pid` is `None`, only one wait will be done and `init_cont` should be false. -fn wait_for_signal( +fn wait_for( pid: Option, - wait_signal: signal::Signal, + wait: WaitFor, init_cont: InitialCont, ) -> Result { if matches!(init_cont, InitialCont::Yes) { - ptrace::cont(pid.unwrap(), None).unwrap(); + ptrace::syscall(pid.unwrap(), None).unwrap(); } // Repeatedly call `waitid` until we get the signal we want, or the process dies. loop { @@ -329,26 +371,31 @@ fn wait_for_signal( None => wait::Id::All, }; let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecEnd(None))?; - let (signal, pid) = match stat { + match stat { // Report the cause of death, if we know it. wait::WaitStatus::Exited(_, code) => { return Err(ExecEnd(Some(code))); } wait::WaitStatus::Signaled(_, _, _) => return Err(ExecEnd(None)), wait::WaitStatus::Stopped(pid, signal) - | wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid), - // This covers PtraceSyscall and variants that are impossible with - // the flags set (e.g. WaitStatus::StillAlive). - _ => { - ptrace::cont(pid.unwrap(), None).unwrap(); - continue; - } + | wait::WaitStatus::PtraceEvent(pid, signal, _) => + if let WaitFor::Signal(wait_signal) = wait + && signal == wait_signal + { + return Ok(pid); + } else { + ptrace::syscall(pid, signal).map_err(|_| ExecEnd(None))?; + }, + wait::WaitStatus::PtraceSyscall(pid) => + if matches!(wait, WaitFor::Syscall) { + return Ok(pid); + } else { + ptrace::syscall(pid, None).map_err(|_| ExecEnd(None))?; + }, + // This covers variants that are impossible with the flags set + // (e.g. WaitStatus::StillAlive). + _ => unreachable!(), }; - if signal == wait_signal { - return Ok(pid); - } else { - ptrace::cont(pid, signal).map_err(|_| ExecEnd(None))?; - } } } @@ -407,8 +454,7 @@ fn capstone_disassemble( instr: &[u8], addr: usize, cs: &capstone::Capstone, - acc_events: &mut Vec, -) -> capstone::CsResult<()> { +) -> capstone::CsResult> { // The arch_detail is what we care about, but it relies on these temporaries // that we can't drop. 0x1000 is the default base address for Captsone, and // we're expecting 1 instruction. @@ -417,9 +463,9 @@ fn capstone_disassemble( let arch_detail = ins_detail.arch_detail(); let mut found_mem_op = false; - + let mut acc_events = Vec::new(); for op in arch_detail.operands() { - if capstone_find_events(addr, &op, acc_events) { + if capstone_find_events(addr, &op, &mut acc_events) { if found_mem_op { panic!("more than one memory operand found; we don't know which one accessed what"); } @@ -427,7 +473,50 @@ fn capstone_disassemble( } } - Ok(()) + Ok(acc_events) +} + +// Intercepts a call to mmap, making sure we log down which page(s) were allocated. +fn handle_mmap(pid: unistd::Pid, page_size: usize) -> Result, ExecEnd> { + // We only care how large the mapping is for now. + // TODO: Also track the flags passed and make sure when we intercept accesses + // on this page we correctly restore them. + let len = ptrace::getregs(pid).unwrap().arg2().next_multiple_of(page_size); + wait_for(Some(pid), WaitFor::Syscall, InitialCont::Yes)?; + let addr = ptrace::getregs(pid).unwrap().retval(); + assert!( + addr.is_multiple_of(page_size), + "got bad mmap address: {addr:#0x} does not divide by page size" + ); + // Save this mapping if the call succeeded. + if addr != 0 && addr != (-1isize).cast_unsigned() { + // Log each page individually. + Ok((addr.strict_div(page_size)..addr.strict_add(len).strict_div(page_size)) + .map(|a| MapEvent::Mmap(a.strict_mul(page_size))) + .collect::>()) + } else { + Ok(vec![]) + } +} + +/// Same as `handle_mmap`, but for unmappings. +fn handle_munmap(pid: unistd::Pid, page_size: usize) -> Result, ExecEnd> { + let regs = ptrace::getregs(pid).unwrap(); + let addr = regs.arg1(); + let len = regs.arg2(); + wait_for(Some(pid), WaitFor::Syscall, InitialCont::Yes)?; + let status = ptrace::getregs(pid).unwrap().retval(); + // Apply the result of this mapping if the call succeeded. + if status == 0 { + // Get the base page address for this unmapping. We don't do this in + // handle_mmap since there addresses are guaranteed to be page-aligned. + let addr_base = addr.strict_div(page_size).strict_mul(page_size); + Ok((addr_base.strict_div(page_size)..addr.strict_add(len).strict_div(page_size)) + .map(|a| MapEvent::Munmap(a.strict_mul(page_size))) + .collect::>()) + } else { + Ok(vec![]) + } } /// Grabs the access that caused a segfault and logs it down if it's to our memory, @@ -438,8 +527,7 @@ fn handle_segfault( ch_stack: usize, page_size: usize, cs: &capstone::Capstone, - acc_events: &mut Vec, -) -> Result<(), ExecEnd> { +) -> Result, ExecEnd> { // Get information on what caused the segfault. This contains the address // that triggered it. let siginfo = ptrace::getsiginfo(pid).unwrap(); @@ -497,7 +585,8 @@ fn handle_segfault( .collect::>(); // Now figure out the size + type of access and log it down. - capstone_disassemble(&instr, addr, cs, acc_events).expect("Failed to disassemble instruction"); + let acc_events = + capstone_disassemble(&instr, addr, cs).expect("Failed to disassemble instruction"); // Move the instr ptr into the deprotection code. #[expect(clippy::as_conversions)] @@ -522,13 +611,13 @@ fn handle_segfault( ptrace::setregs(pid, new_regs).unwrap(); // Our mempr_* functions end with a raise(SIGSTOP). - wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?; + wait_for(Some(pid), WaitFor::Signal(signal::SIGSTOP), InitialCont::Yes)?; // Step 1 instruction. ptrace::setregs(pid, regs_bak).unwrap(); ptrace::step(pid, None).unwrap(); - // Don't use wait_for_signal here since 1 instruction doesn't give room - // for any uncertainty + we don't want it `cont()`ing randomly by accident + // Don't use wait_for here since 1 instruction doesn't give room for any + // uncertainty + we don't want it `cont()`ing randomly by accident. // Also, don't let it continue with unprotected memory if something errors! let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecEnd(None))?; @@ -545,11 +634,11 @@ fn handle_segfault( new_regs.set_ip(mempr_on as usize); new_regs.set_sp(stack_ptr); ptrace::setregs(pid, new_regs).unwrap(); - wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?; + wait_for(Some(pid), WaitFor::Signal(signal::SIGSTOP), InitialCont::Yes)?; ptrace::setregs(pid, regs_bak).unwrap(); ptrace::syscall(pid, None).unwrap(); - Ok(()) + Ok(acc_events) } // We only get dropped into these functions via offsetting the instr pointer diff --git a/tests/native-lib/mapped_mem.c b/tests/native-lib/mapped_mem.c new file mode 100644 index 0000000000..96955a65de --- /dev/null +++ b/tests/native-lib/mapped_mem.c @@ -0,0 +1,18 @@ +#include +#include +#include + +// See comments in build_native_lib() +#define EXPORT __attribute__((visibility("default"))) + +/* Test: test_write_to_mapped */ + +EXPORT void* map_page(void) { + size_t pg_size = (size_t)sysconf(_SC_PAGESIZE); + return mmap(NULL, pg_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} + +EXPORT void unmap_page(void* pg) { + size_t pg_size = (size_t)sysconf(_SC_PAGESIZE); + munmap(pg, pg_size); +} diff --git a/tests/native-lib/pass/mapped_mem.rs b/tests/native-lib/pass/mapped_mem.rs new file mode 100644 index 0000000000..2e18f8ba24 --- /dev/null +++ b/tests/native-lib/pass/mapped_mem.rs @@ -0,0 +1,20 @@ +//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu +//@compile-flags: -Zmiri-native-lib-enable-tracing -Zmiri-permissive-provenance + +fn main() { + test_write_to_mapped(); +} + +fn test_write_to_mapped() { + extern "C" { + fn map_page() -> *mut std::ffi::c_void; + fn unmap_page(pg: *mut std::ffi::c_void); + } + + unsafe { + let pg = map_page().cast::(); + *pg = 64; + *pg.offset(10) = 1312; + unmap_page(pg.cast()); + } +} diff --git a/tests/native-lib/pass/mapped_mem.stderr b/tests/native-lib/pass/mapped_mem.stderr new file mode 100644 index 0000000000..d46aa55ba9 --- /dev/null +++ b/tests/native-lib/pass/mapped_mem.stderr @@ -0,0 +1,19 @@ +warning: sharing memory with a native function called via FFI + --> tests/native-lib/pass/mapped_mem.rs:LL:CC + | +LL | unmap_page(pg.cast()); + | ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function + | + = help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis + = help: in particular, Miri assumes that the native call initializes all memory it has written to + = help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory + = help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free + = help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here + = note: BACKTRACE: + = note: inside `test_write_to_mapped` at tests/native-lib/pass/mapped_mem.rs:LL:CC +note: inside `main` + --> tests/native-lib/pass/mapped_mem.rs:LL:CC + | +LL | test_write_to_mapped(); + | ^^^^^^^^^^^^^^^^^^^^^^ + diff --git a/tests/ui.rs b/tests/ui.rs index efaaf9fc84..7ecdc08a59 100644 --- a/tests/ui.rs +++ b/tests/ui.rs @@ -63,6 +63,7 @@ fn build_native_lib(target: &str) -> PathBuf { "tests/native-lib/aggregate_arguments.c", "tests/native-lib/ptr_read_access.c", "tests/native-lib/ptr_write_access.c", + "tests/native-lib/mapped_mem.c", // Ensure we notice serious problems in the C code. "-Wall", "-Wextra",