diff --git a/CMakeLists.txt b/CMakeLists.txt index a2339a7425a..4497b735583 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,7 @@ set(supports32bit true) set(x86ish false) if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") set(supports32bit false) - set(FLAGS_COMMON "${FLAGS_COMMON} -march=armv8.3-a") + set(FLAGS_COMMON "${FLAGS_COMMON} -march=armv8.3-a -Wl,-dynamic-linker=/lib/aarch64-linux-gnu/atomics/ld-linux-aarch64.so.1") else() set(x86ish true) set(FLAGS_COMMON "${FLAGS_COMMON} -msse2 -D__MMX__ -D__SSE__ -D__SSE2__") @@ -342,7 +342,7 @@ foreach(file ${RR_PAGE_FILES}) set_source_files_properties("${CMAKE_SOURCE_DIR}/src/preload/${file}" PROPERTIES COMPILE_FLAGS ${PRELOAD_COMPILE_FLAGS}) endforeach(file) -set_target_properties(rrpage PROPERTIES LINK_FLAGS "-Wl,-T -Wl,${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld -nostartfiles -nostdlib -Wl,-z,max-page-size=0x1000 ${LINKER_FLAGS}") +set_target_properties(rrpage PROPERTIES LINK_FLAGS "-Wl,-T -Wl,${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld -Wl,--hash-style=both -nostartfiles -nostdlib -Wl,-z,max-page-size=0x1000 ${LINKER_FLAGS}") set_target_properties(rrpage PROPERTIES LINK_DEPENDS ${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld) # CMake seems to have trouble generating the link line without this set_target_properties(rrpage PROPERTIES LINKER_LANGUAGE C) @@ -364,6 +364,7 @@ set(PRELOAD_SOURCE_FILES rrcalls.h syscallbuf.h ) +if (x86ish) add_library(rrpreload) foreach(file ${PRELOAD_FILES}) target_sources(rrpreload PUBLIC "${CMAKE_SOURCE_DIR}/src/preload/${file}") @@ -372,6 +373,7 @@ foreach(file ${PRELOAD_FILES}) endforeach(file) set_target_properties(rrpreload PROPERTIES LINK_FLAGS "-nostartfiles ${LINKER_FLAGS}") set_target_properties(rrpreload PROPERTIES INSTALL_RPATH "\$ORIGIN") +endif() set(AUDIT_FILES rtld-audit.c @@ -544,29 +546,10 @@ else() set(CMAKE_INSTALL_INCLUDEDIR "include") endif() -option(RR_BUILD_SHARED "Build the rr shared library as well as the binary (experimental).") - -if(RR_BUILD_SHARED) - add_library(rr ${RR_SOURCES}) - set_target_properties(rr PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) - add_executable(rrbin src/main.cc) - set(RR_BIN rrbin) - post_build_executable(rrbin) - set_target_properties(rrbin PROPERTIES ENABLE_EXPORTS true OUTPUT_NAME rr) - set_target_properties(rrbin PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") - set_target_properties(rrbin PROPERTIES INSTALL_RPATH_USE_LINK_PATH true) - set_target_properties(brotli PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(rrbin rr) - install(TARGETS rr - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -else() - add_executable(rr ${RR_SOURCES} src/main.cc) - set_target_properties(rr PROPERTIES ENABLE_EXPORTS true) - post_build_executable(rr) - set(RR_BIN rr) -endif() +add_executable(rr ${RR_SOURCES} src/main.cc) +set_target_properties(rr PROPERTIES ENABLE_EXPORTS true) +post_build_executable(rr) +set(RR_BIN rr) add_dependencies(rr Generated) option(strip "Strip debug info from rr binary") @@ -593,13 +576,12 @@ else() endif() set_target_properties(rr PROPERTIES LINK_FLAGS "${RR_MAIN_LINKER_FLAGS}") -if(RR_BUILD_SHARED) - set_target_properties(rrbin PROPERTIES LINK_FLAGS "${RR_MAIN_LINKER_FLAGS}") -endif() +if (x86ish) target_link_libraries(rrpreload ${CMAKE_DL_LIBS} ) +endif() add_executable(rr_exec_stub src/exec_stub.c) post_build_executable(rr_exec_stub) @@ -661,7 +643,11 @@ install(PROGRAMS scripts/signal-rr-recording.sh install(PROGRAMS scripts/rr_completion DESTINATION ${CMAKE_INSTALL_DATADIR}/bash-completion/completions RENAME rr) -install(TARGETS ${RR_BIN} rrpreload rrpage rraudit rr_exec_stub +set(RR_INSTALL_LIBS rrpage rraudit rr_exec_stub) +if (x86ish) + set(RR_INSTALL_LIBS rrpreload ${RR_INSTALL_LIBS}) +endif() +install(TARGETS ${RR_BIN} ${RR_INSTALL_LIBS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr) @@ -686,7 +672,7 @@ if(rr_32BIT AND rr_64BIT) PROPERTIES COMPILE_FLAGS "-m32 ${PRELOAD_COMPILE_FLAGS}") endforeach(file) - set_target_properties(rrpage_32 PROPERTIES LINK_FLAGS "-m32 -Wl,-T -Wl,${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld -nostartfiles -nostdlib ${LINKER_FLAGS}") + set_target_properties(rrpage_32 PROPERTIES LINK_FLAGS "-m32 -Wl,-T -Wl,${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld -Wl,--hash-style=both -nostartfiles -nostdlib ${LINKER_FLAGS}") set_target_properties(rrpage_32 PROPERTIES LINK_DEPENDS ${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld) set_target_properties(rrpage_32 PROPERTIES LINKER_LANGUAGE C) @@ -785,6 +771,7 @@ set(BASIC_TESTS big_buffers block block_open + bpf brk brk2 capget @@ -835,10 +822,12 @@ set(BASIC_TESTS exit_with_syscallbuf_signal fadvise fanotify + fatal_init_signal fatal_sigsegv_thread - fcntl_dupfd x86/fault_in_code_page + fcntl_dupfd fcntl_misc + fcntl_notify fcntl_owner_ex fcntl_rw_hints fcntl_seals @@ -884,17 +873,23 @@ set(BASIC_TESTS invalid_fcntl invalid_ioctl io + io_uring ioctl + ioctl_fb ioctl_fs ioctl_pty ioctl_sg ioctl_tty + ioctl_vt + x86/ioperm + x86/iopl join_threads joystick kcmp keyctl kill_newborn kill_ptracee + large_hole large_write_deadlock legacy_ugid x86/lsl @@ -965,6 +960,7 @@ set(BASIC_TESTS pid_ns_reap pid_ns_segv pidfd + x86/pkeys poll_sig_race ppoll prctl @@ -1002,7 +998,7 @@ set(BASIC_TESTS ptrace_singlestep ptrace_syscall ptrace_syscall_clone_untraced - ptrace_sysemu + x86/ptrace_sysemu ptrace_sysemu_syscall ptrace_trace_clone ptrace_trace_exit @@ -1026,6 +1022,7 @@ set(BASIC_TESTS rename rlimit robust_futex + rseq rusage samask save_data_fd @@ -1037,6 +1034,7 @@ set(BASIC_TESTS scm_rights scratch_read seccomp + seccomp_cloning seccomp_clone_fail seccomp_desched seccomp_kill_exit @@ -1058,6 +1056,8 @@ set(BASIC_TESTS setsid setuid shared_exec + shared_monitor + shared_offset shared_write shm shm_unmap @@ -1092,6 +1092,7 @@ set(BASIC_TESTS sigtrap simple_threads_stress sioc + small_holes sock_names_opts spinlock_priorities splice @@ -1140,12 +1141,15 @@ set(BASIC_TESTS unexpected_exit_pid_ns unjoined_thread unshare + userfaultfd utimes + vdso_parts vfork_flush vfork_shared video_capture vm_readv_writev vsyscall + vsyscall_timeslice x86/x87env wait wait_sigstop @@ -1224,7 +1228,8 @@ set(TESTS_WITH_PROGRAM exit_group exit_race exit_status - explicit_checkpoints + x86/explicit_checkpoints + fd_limit fork_stress fork_syscalls function_calls @@ -1318,8 +1323,8 @@ set(TESTS_WITH_PROGRAM ttyname unexpected_stack_growth user_ignore_sig - vdso_gettimeofday_stack vdso_clock_gettime_stack + vdso_gettimeofday_stack vdso_time_stack vfork vfork_read_clone_stress @@ -1432,6 +1437,7 @@ set(TESTS_WITHOUT_PROGRAM syscallbuf_timeslice_250 trace_version term_trace_cpu + tty unmap_vdso unwind_on_signal vfork_exec @@ -1666,20 +1672,20 @@ if(BUILD_TESTS) foreach(test ${BASIC_TESTS} ${BASIC_CPP_TESTS} ${OTHER_TESTS}) get_filename_component(testname ${test} NAME) add_test(${test}-32 - bash source_dir/src/test/basic_test.run ${testname}_32 "" bin_dir) + bash source_dir/src/test/basic_test.run ${testname}_32 "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32) add_test(${test}-32-no-syscallbuf - bash source_dir/src/test/basic_test.run ${testname}_32 -n bin_dir) + bash source_dir/src/test/basic_test.run ${testname}_32 -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32-no-syscallbuf) endforeach(test) foreach(test ${TESTS_WITH_PROGRAM} ${TESTS_WITHOUT_PROGRAM}) get_filename_component(testname ${test} NAME) add_test(${test}-32 - bash source_dir/src/test/${test}.run ${testname}_32 "" bin_dir) + bash source_dir/src/test/${test}.run ${testname}_32 "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32) add_test(${test}-32-no-syscallbuf - bash source_dir/src/test/${test}.run ${testname}_32 -n bin_dir) + bash source_dir/src/test/${test}.run ${testname}_32 -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32-no-syscallbuf) endforeach(test) endif() diff --git a/README.md b/README.md index 10f3caa4d3f..c938f250e0b 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ Please contribute! Make sure to review the [pull request checklist](/CONTRIBUTI If you find rr useful, please [add a testimonial](https://github.com/rr-debugger/rr/wiki/Testimonials). +rr development is sponsored by [Pernosco](https://pernos.co) and was originated by [Mozilla](https://www.mozilla.org). + # System requirements * Linux kernel ≥ 3.11 is required (for `PTRACE_SETSIGMASK`). diff --git a/src/AutoRemoteSyscalls.cc b/src/AutoRemoteSyscalls.cc index 865c082e072..4a62e09ad93 100644 --- a/src/AutoRemoteSyscalls.cc +++ b/src/AutoRemoteSyscalls.cc @@ -113,6 +113,11 @@ AutoRemoteSyscalls::AutoRemoteSyscalls(Task* t, } void AutoRemoteSyscalls::setup_path(bool enable_singlestep_path) { +#if defined(__aarch64__) + // XXXkhuey this fast path doesn't work on AArch64 yet, go slow instead + enable_singlestep_path = false; +#endif + if (!replaced_bytes.empty()) { // XXX what to do here to clean up if the task died unexpectedly? t->write_mem(remote_ptr(initial_regs.ip().to_data_ptr()), diff --git a/src/AutoRemoteSyscalls.h b/src/AutoRemoteSyscalls.h index 6150ed1ba01..b68f0670e6f 100644 --- a/src/AutoRemoteSyscalls.h +++ b/src/AutoRemoteSyscalls.h @@ -289,8 +289,6 @@ class AutoRemoteSyscalls { AutoRemoteSyscalls& operator=(const AutoRemoteSyscalls&) = delete; AutoRemoteSyscalls(const AutoRemoteSyscalls&) = delete; - void* operator new(size_t) = delete; - void operator delete(void*) = delete; }; } // namespace rr diff --git a/src/DiversionSession.cc b/src/DiversionSession.cc index c8a372e0bdb..f639835dc48 100644 --- a/src/DiversionSession.cc +++ b/src/DiversionSession.cc @@ -140,8 +140,15 @@ DiversionSession::DiversionResult DiversionSession::diversion_step( // An exit might have occurred while processing a previous syscall. if (t->ptrace_event() == PTRACE_EVENT_EXIT) { + // We're about to destroy the task, so capture the context while + // we can. + TaskContext context(t); handle_ptrace_exit_event(t); + // This is now a dangling pointer, so clear it. + context.task = nullptr; result.status = DIVERSION_EXITED; + result.break_status.task_context = context; + result.break_status.task_exit = true; return result; } diff --git a/src/DumpCommand.cc b/src/DumpCommand.cc index d7f44cf0738..b78e07380b8 100644 --- a/src/DumpCommand.cc +++ b/src/DumpCommand.cc @@ -36,7 +36,8 @@ DumpCommand DumpCommand::singleton( "dump", " rr dump [OPTIONS] [] [...]\n" " Event specs can be either an event number like `127', or a range\n" - " like `1000-5000'. By default, all events are dumped.\n" + " like `1000-5000', or `end' for the last record in the trace.\n" + " By default, all events are dumped.\n" " -b, --syscallbuf dump syscallbuf contents\n" " -e, --task-events dump task events\n" " -m, --recorded-metadata dump recorded data metadata\n" @@ -165,30 +166,21 @@ static void dump_task_event(FILE* out, const TraceTaskEvent& event) { * event sets. No attempt is made to enforce this or normalize specs. */ static void dump_events_matching(TraceReader& trace, const DumpFlags& flags, - FILE* out, const string* spec) { + FILE* out, const string* spec, + const unordered_map& task_events) { uint32_t start = 0, end = numeric_limits::max(); + bool only_end = false; - // Try to parse the "range" syntax '[start]-[end]'. - if (spec && 2 > sscanf(spec->c_str(), "%u-%u", &start, &end)) { - // Fall back on assuming the spec is a single event - // number, however it parses out with atoi(). - start = end = atoi(spec->c_str()); - } - - unordered_map task_events; - FrameTime last_time = 0; - while (true) { - FrameTime time; - TraceTaskEvent r = trace.read_task_event(&time); - if (time < last_time) { - FATAL() << "TraceTaskEvent times non-monotonic"; - } - if (r.type() == TraceTaskEvent::NONE) { - break; + if (spec && *spec == "end") { + only_end = true; + } else { + // Try to parse the "range" syntax '[start]-[end]'. + if (spec && 2 > sscanf(spec->c_str(), "%u-%u", &start, &end)) { + // Fall back on assuming the spec is a single event + // number, however it parses out with atoi(). + start = end = atoi(spec->c_str()); } - task_events.insert(make_pair(time, r)); - last_time = time; } bool process_raw_data = @@ -198,8 +190,9 @@ static void dump_events_matching(TraceReader& trace, const DumpFlags& flags, if (end < frame.time()) { return; } - if (start <= frame.time() && frame.time() <= end && - (!flags.only_tid || flags.only_tid == frame.tid())) { + if (only_end ? trace.at_end() : + (start <= frame.time() && frame.time() <= end && + (!flags.only_tid || flags.only_tid == frame.tid()))) { if (flags.raw_dump) { frame.dump_raw(out); } else { @@ -258,8 +251,20 @@ static void dump_events_matching(TraceReader& trace, const DumpFlags& flags, TraceReader::RawDataMetadata data; while (process_raw_data && trace.read_raw_data_metadata_for_frame(data)) { if (flags.dump_recorded_data_metadata) { - fprintf(out, " { tid:%d, addr:%p, length:%p }\n", data.rec_tid, + fprintf(out, " { tid:%d, addr:%p, length:%p", data.rec_tid, (void*)data.addr.as_int(), (void*)data.size); + if (!data.holes.empty()) { + fputs(", holes:[", out); + bool first = true; + for (auto& h : data.holes) { + if (!first) { + fputs(", ", out); + } + fprintf(out, "%p-%p", (void*)h.offset, (void*)(h.offset + h.size)); + } + fputs("]", out); + } + fputs(" }\n", out); } } if (!flags.raw_dump) { @@ -299,13 +304,28 @@ void dump(const string& trace_dir, const DumpFlags& flags, "eax ebx ecx edx esi edi ebp orig_eax esp eip eflags\n"); } + unordered_map task_events; + FrameTime last_time = 0; + while (true) { + FrameTime time; + TraceTaskEvent r = trace.read_task_event(&time); + if (time < last_time) { + FATAL() << "TraceTaskEvent times non-monotonic"; + } + if (r.type() == TraceTaskEvent::NONE) { + break; + } + task_events.insert(make_pair(time, r)); + last_time = time; + } + if (specs.size() > 0) { for (size_t i = 0; i < specs.size(); ++i) { - dump_events_matching(trace, flags, out, &specs[i]); + dump_events_matching(trace, flags, out, &specs[i], task_events); } } else { // No specs => dump all events. - dump_events_matching(trace, flags, out, nullptr /*all events*/); + dump_events_matching(trace, flags, out, nullptr /*all events*/, task_events); } if (flags.dump_statistics) { diff --git a/src/Dwarf.cc b/src/Dwarf.cc index dd859e19ecd..551e6277a64 100644 --- a/src/Dwarf.cc +++ b/src/Dwarf.cc @@ -45,14 +45,23 @@ template struct __attribute__((packed)) Dwarf5CompilationUnitHeade uint8_t unit_type; uint8_t address_size; typename D::Offset debug_abbrev_offset; + + void install_dwo_id(DwarfCompilationUnit* unit) const { + unit->set_dwo_id(0); + } +}; + +template struct __attribute__((packed)) Dwarf5SkeletonSplitCompilationUnitHeader { + typedef D Size; + typename D::CompilationUnitPreamble preamble; + uint16_t version; + uint8_t unit_type; + uint8_t address_size; + typename D::Offset debug_abbrev_offset; uint64_t dwo_id; void install_dwo_id(DwarfCompilationUnit* unit) const { - if (version == 5 && (unit_type == DW_UT_skeleton || unit_type == DW_UT_split_compile)) { - unit->set_dwo_id(dwo_id); - } else { - unit->set_dwo_id(0); - } + unit->set_dwo_id(dwo_id); } }; @@ -263,6 +272,15 @@ static size_t form_size(DWForm form, size_t address_size, size_t dwarf_size, Dwa case DW_FORM_strx2: return 2; case DW_FORM_strx3: return 3; case DW_FORM_strx4: return 4; + case DW_FORM_string: { + auto before = span->size(); + DwarfSpan a_span(*span); + a_span.read_null_terminated_string(ok); + if (!ok) { + return 0; + } + return before - a_span.size(); + } case DW_FORM_sec_offset: return dwarf_size; case DW_FORM_flag_present: return 0; case DW_FORM_implicit_const: return 0; @@ -385,6 +403,8 @@ static const char* decode_string(const DwarfCompilationUnit& cu, DwarfSpan span, } return debug_strs.debug_str.subspan(offset).read_null_terminated_string(ok); } + case DW_FORM_string: + return span.read_null_terminated_string(ok); default: LOG(warn) << "Unknown string form " << form; *ok = false; @@ -443,7 +463,15 @@ template void DwarfCompilationUnit::init_size(DwarfSpan* debug_info if (2 <= h->version && h->version <= 4) { init>(debug_info, abbrevs, ok); } else if (h->version == 5) { - init>(debug_info, abbrevs, ok); + auto hh = DwarfSpan(*debug_info).read>(ok); + if (!ok) { + return; + } + if (hh->unit_type == DW_UT_skeleton || hh->unit_type == DW_UT_split_compile) { + init>(debug_info, abbrevs, ok); + } else { + init>(debug_info, abbrevs, ok); + } } else { LOG(warn) << "Unknown compilation unit version " << h->version; *ok = false; @@ -453,7 +481,7 @@ template void DwarfCompilationUnit::init_size(DwarfSpan* debug_info template void DwarfCompilationUnit::init(DwarfSpan* debug_info, DwarfAbbrevs& abbrevs, bool* ok) { DwarfSpan span(*debug_info); auto h = span.read(ok); - if (!ok) { + if (!*ok) { return; } uint64_t length = h->preamble.unit_length; @@ -465,6 +493,9 @@ template void DwarfCompilationUnit::init(DwarfSpan* debug_info, Dwa debug_info->consume(length + sizeof(h->preamble)); DwarfAbbrevSet& abbrev_set = abbrevs.lookup(h->debug_abbrev_offset); die_ = make_unique(span, abbrev_set, sizeof(typename H::Size::Offset), h->address_size, ok); + if (!*ok) { + return; + } if (die_->tag() != DW_TAG_compile_unit && die_->tag() != DW_TAG_partial_unit && die_->tag() != DW_TAG_skeleton_unit) { diff --git a/src/Dwarf.h b/src/Dwarf.h index a7796e0550c..eb2d3dba844 100644 --- a/src/Dwarf.h +++ b/src/Dwarf.h @@ -40,6 +40,7 @@ enum DWForm { DW_FORM_data2 = 0x05, DW_FORM_data4 = 0x06, DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, DW_FORM_data1 = 0x0b, DW_FORM_flag = 0x0c, DW_FORM_strp = 0x0e, diff --git a/src/EmuFs.cc b/src/EmuFs.cc index 0de6e44e82f..0cb07be75a4 100644 --- a/src/EmuFs.cc +++ b/src/EmuFs.cc @@ -28,25 +28,62 @@ EmuFile::~EmuFile() { EmuFile::shr_ptr EmuFile::clone(EmuFs& owner) { auto f = EmuFile::create(owner, orig_path.c_str(), device(), inode(), size_); - uint64_t data[65536 / sizeof(uint64_t)]; + // We could try using FICLONE but tmpfs doesn't support that yet so let's just + // not bother for now. + + // Avoid copying holes. + vector buf; uint64_t offset = 0; while (offset < size_) { - ssize_t amount = min(size_ - offset, sizeof(data)); - ssize_t ret = pread64(fd(), data, amount, offset); - if (ret <= 0) { - FATAL() << "Couldn't read all the data"; + ssize_t ret = lseek(fd(), offset, SEEK_HOLE); + if (ret < 0) { + ret = size_; + } else { + if (uint64_t(ret) < offset) { + FATAL() << "lseek returned hole before requested offset"; + } } - // There could have been a short read - amount = ret; - uint8_t* data_ptr = reinterpret_cast(data); - while (amount > 0) { - ret = pwrite64(f->fd(), data_ptr, amount, offset); + uint64_t hole = ret; + // Copy data + while (offset < hole) { + loff_t off_in = offset; + loff_t off_out = offset; + ssize_t ncopied = syscall(NativeArch::copy_file_range, file.get(), &off_in, + f->fd().get(), &off_out, hole - offset, 0); + if (ncopied >= 0) { + if (ncopied == 0) { + FATAL() << "Didn't copy anything"; + } + offset += ncopied; + continue; + } + + ssize_t amount = min(hole - offset, 4*1024*1024); + buf.resize(amount); + ret = pread64(fd(), buf.data(), amount, offset); if (ret <= 0) { + FATAL() << "Couldn't read all the data"; + } + ssize_t written = pwrite_all_fallible(f->fd(), buf.data(), ret, offset); + if (written < ret) { FATAL() << "Couldn't write all the data"; } - amount -= ret; - data_ptr += ret; - offset += ret; + offset += written; + } + if (offset < size_) { + // Look for the end of the hole, if any + ret = lseek(fd(), offset, SEEK_DATA); + if (ret < 0) { + if (errno != ENXIO) { + FATAL() << "Couldn't find data"; + } + break; + } + if (uint64_t(ret) <= offset) { + FATAL() << "Zero sized hole?"; + } + // Skip the hole + offset = ret; } } diff --git a/src/GdbServer.cc b/src/GdbServer.cc index bd5827358af..eca333cb1a3 100644 --- a/src/GdbServer.cc +++ b/src/GdbServer.cc @@ -124,8 +124,8 @@ static const string& gdb_rr_macros() { << "python\n" << "import re\n" << "m = re.compile(" - << "'.* ([0-9]+)\\.([0-9]+)(\\.([0-9]+))?.*'" - << ").match(gdb.execute('show version', False, True))\n" + << "'[^0-9]*([0-9]+)\\.([0-9]+)(\\.([0-9]+))?'" + << ").match(gdb.VERSION)\n" << "ver = int(m.group(1))*10000 + int(m.group(2))*100\n" << "if m.group(4):\n" << " ver = ver + int(m.group(4))\n" @@ -338,6 +338,7 @@ static bool is_in_patch_stubs(Task* t, remote_code_ptr ip) { void GdbServer::maybe_intercept_mem_request(Task* target, const GdbRequest& req, vector* result) { + DEBUG_ASSERT(req.mem_.len >= result->size()); /* Crazy hack! * When gdb tries to read the word at the top of the stack, and we're in our * dynamically-generated stub code, tell it the value is zero, so that gdb's @@ -348,7 +349,7 @@ void GdbServer::maybe_intercept_mem_request(Task* target, const GdbRequest& req, */ size_t size = word_size(target->arch()); if (target->regs().sp().as_int() >= req.mem_.addr && - target->regs().sp().as_int() + size <= req.mem_.addr + req.mem_.len && + target->regs().sp().as_int() + size <= req.mem_.addr + result->size() && is_in_patch_stubs(target, target->ip())) { memset(result->data() + target->regs().sp().as_int() - req.mem_.addr, 0, size); @@ -897,8 +898,9 @@ bool GdbServer::diverter_process_debugger_requests( } static bool is_last_thread_exit(const BreakStatus& break_status) { + // The task set may be empty if the task has already exited. return break_status.task_exit && - break_status.task->thread_group()->task_set().size() == 1; + break_status.task_context.thread_group->task_set().size() <= 1; } static Task* is_in_exec(ReplayTimeline& timeline) { @@ -954,22 +956,36 @@ void GdbServer::maybe_notify_stop(const GdbRequest& req, stop_siginfo = *break_status.signal; LOG(debug) << "Stopping for signal " << stop_siginfo; } - if (is_last_thread_exit(break_status) && dbg->features().reverse_execution) { - do_stop = true; - memset(&stop_siginfo, 0, sizeof(stop_siginfo)); - if (req.cont().run_direction == RUN_FORWARD) { - // The exit of the last task in a thread group generates a fake SIGKILL, - // when reverse-execution is enabled, because users often want to run - // backwards from the end of the task. - stop_siginfo.si_signo = SIGKILL; - LOG(debug) << "Stopping for synthetic SIGKILL"; - } else { - // The start of the debuggee task-group should trigger a silent stop. - stop_siginfo.si_signo = 0; - LOG(debug) << "Stopping at start of execution while running backwards"; + if (is_last_thread_exit(break_status)) { + if (break_status.task_context.session->is_diversion()) { + // If the last task of a diversion session has exited, we need + // to make sure GDB knows it's unrecoverable. There's no good + // way to do this: a stop is insufficient, but an inferior exit + // typically signals the end of a debugging session. Using the + // latter approach appears to work, but stepping through GDB's + // processing of the event seems to indicate it isn't really + // supposed to. FIXME. + LOG(debug) << "Last task of diversion exiting. " + << "Notifying exit with synthetic SIGKILL"; + dbg->notify_exit_signal(SIGKILL); + return; + } else if (dbg->features().reverse_execution) { + do_stop = true; + memset(&stop_siginfo, 0, sizeof(stop_siginfo)); + if (req.cont().run_direction == RUN_FORWARD) { + // The exit of the last task in a thread group generates a fake SIGKILL, + // when reverse-execution is enabled, because users often want to run + // backwards from the end of the task. + stop_siginfo.si_signo = SIGKILL; + LOG(debug) << "Stopping for synthetic SIGKILL"; + } else { + // The start of the debuggee task-group should trigger a silent stop. + stop_siginfo.si_signo = 0; + LOG(debug) << "Stopping at start of execution while running backwards"; + } } } - Task* t = break_status.task; + Task* t = break_status.task(); Task* in_exec_task = is_in_exec(timeline); if (in_exec_task) { do_stop = true; @@ -1075,11 +1091,7 @@ GdbRequest GdbServer::divert(ReplaySession& replay) { } Task* t = diversion_session->find_task(last_continue_tuid); - if (!t) { - diversion_refcount = 0; - req = GdbRequest(DREQ_NONE); - break; - } + DEBUG_ASSERT(t != nullptr); int signal_to_deliver; RunCommand command = @@ -1089,6 +1101,7 @@ GdbRequest GdbServer::divert(ReplaySession& replay) { if (result.status == DiversionSession::DIVERSION_EXITED) { diversion_refcount = 0; + maybe_notify_stop(req, result.break_status); req = GdbRequest(DREQ_NONE); break; } @@ -1195,7 +1208,7 @@ void GdbServer::try_lazy_reverse_singlesteps(GdbRequest& req) { now = previous; need_seek = true; BreakStatus break_status; - break_status.task = t; + break_status.task_context = TaskContext(t); break_status.singlestep_complete = true; LOG(debug) << " using lazy reverse-singlestep"; maybe_notify_stop(req, break_status); @@ -1361,7 +1374,7 @@ GdbServer::ContinueOrStop GdbServer::debug_one_step( } if (req.cont().run_direction == RUN_FORWARD && is_last_thread_exit(result.break_status) && - result.break_status.task->thread_group()->tguid() == debuggee_tguid) { + result.break_status.task_context.thread_group->tguid() == debuggee_tguid) { in_debuggee_end_state = true; } return CONTINUE_DEBUGGING; @@ -1560,7 +1573,7 @@ void GdbServer::restart_session(const GdbRequest& req) { // condition below. DEBUG_ASSERT(result.status != REPLAY_EXITED); if (is_last_thread_exit(result.break_status) && - result.break_status.task->thread_group()->tgid == target.pid) { + result.break_status.task_context.thread_group->tgid == target.pid) { // Debuggee task is about to exit. Stop here. in_debuggee_end_state = true; break; diff --git a/src/LsCommand.cc b/src/LsCommand.cc index ed2be01c8ca..721c73dcf46 100644 --- a/src/LsCommand.cc +++ b/src/LsCommand.cc @@ -105,6 +105,7 @@ static bool get_folder_size(string dir_name, string& size_str) { bytes += st.st_size; } + closedir(dir); static const char suffixes[] = " KMGT"; double size = bytes; @@ -172,6 +173,7 @@ static int ls(const string& traces_dir, const LsFlags& flags, FILE* out) { traces.back().ctime = st.st_ctim; } } + closedir(dir); if (flags.sort_by_time) { auto compare_by_time = [&](const TraceInfo& at, diff --git a/src/Monkeypatcher.cc b/src/Monkeypatcher.cc index 0847eacdf60..7d49f5730be 100644 --- a/src/Monkeypatcher.cc +++ b/src/Monkeypatcher.cc @@ -361,24 +361,30 @@ static bool patch_syscall_with_hook(Monkeypatcher& patcher, RecordTask* t, } template -static void match_extended_jump_patch(uint8_t patch[], +static bool match_extended_jump_patch(uint8_t patch[], uint64_t *return_addr); template <> -void match_extended_jump_patch( +bool match_extended_jump_patch( uint8_t patch[], uint64_t *return_addr) { uint32_t return_addr_lo, return_addr_hi; uint64_t jmp_target; - X64SyscallStubExtendedJump::match(patch, &return_addr_lo, &return_addr_hi, &jmp_target); + if (!X64SyscallStubExtendedJump::match(patch, &return_addr_lo, &return_addr_hi, &jmp_target)) { + return false; + } *return_addr = return_addr_lo | (((uint64_t)return_addr_hi) << 32); + return true; } template <> -void match_extended_jump_patch( +bool match_extended_jump_patch( uint8_t patch[], uint64_t *return_addr) { uint32_t return_addr_32, jmp_target_relative; - X86SyscallStubExtendedJump::match(patch, &return_addr_32, &jmp_target_relative); + if (!X86SyscallStubExtendedJump::match(patch, &return_addr_32, &jmp_target_relative)) { + return false; + } *return_addr = return_addr_32; + return true; } template @@ -412,7 +418,9 @@ static void unpatch_extended_jumps(Monkeypatcher& patcher, uint8_t bytes[ExtendedJumpPatch::size]; t->read_bytes_helper(patch.first, sizeof(bytes), bytes); uint64_t return_addr; - match_extended_jump_patch(bytes, &return_addr); + if (!match_extended_jump_patch(bytes, &return_addr)) { + ASSERT(t, false) << "Failed to match extended jump patch at " << patch.first; + } std::vector syscall = rr::syscall_instruction(t->arch()); @@ -606,6 +614,7 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { uint8_t* following_bytes = &bytes[MAXIMUM_LOOKBACK]; intptr_t syscallno = r.original_syscallno(); + bool success = false; for (auto& hook : syscall_hooks) { bool matches_hook = false; if ((!(hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) && @@ -687,7 +696,12 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { return false; } - LOG(debug) << "Patched syscall at " << ip << " syscall " + // Get out of executing the current syscall before we patch it. + if (entering_syscall && !t->exit_syscall_and_prepare_restart()) { + return false; + } + + LOG(debug) << "Patching syscall at " << ip << " syscall " << syscall_name(syscallno, t->arch()) << " tid " << t->tid << " bytes " << bytes_to_string( @@ -695,26 +709,24 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { min(bytes_count, sizeof(syscall_patch_hook::patch_region_bytes))); - // Get out of executing the current syscall before we patch it. - if (entering_syscall && !t->exit_syscall_and_prepare_restart()) { - return false; - } - - patch_syscall_with_hook(*this, t, hook); - - // Return to caller, which resume normal execution. - return true; + success = patch_syscall_with_hook(*this, t, hook); + break; } } - LOG(debug) << "Failed to patch syscall at " << ip << " syscall " - << syscall_name(syscallno, t->arch()) << " tid " << t->tid - << " bytes " - << bytes_to_string( - following_bytes, - min(bytes_count, - sizeof(syscall_patch_hook::patch_region_bytes))); - tried_to_patch_syscall_addresses.insert(ip); - return false; + + if (!success) { + LOG(debug) << "Failed to patch syscall at " << ip << " syscall " + << syscall_name(syscallno, t->arch()) << " tid " << t->tid + << " bytes " + << bytes_to_string( + following_bytes, + min(bytes_count, + sizeof(syscall_patch_hook::patch_region_bytes))); + tried_to_patch_syscall_addresses.insert(ip); + return false; + } + + return true; } // VDSOs are filled with overhead critical functions related to getting the diff --git a/src/PerfCounters_x86.h b/src/PerfCounters_x86.h index 1f8a7740c57..2b0d35d197b 100644 --- a/src/PerfCounters_x86.h +++ b/src/PerfCounters_x86.h @@ -86,9 +86,12 @@ static CpuMicroarch compute_cpu_microarch() { case 0x70f10: // Matisse (Zen 2) (UNTESTED) if (ext_family == 8) { return AMDZen; + } else if (ext_family == 3) { + return AMDF15R30; } break; case 0x20f10: // Vermeer (Zen 3) + case 0x50f00: // Cezanne (Zen 3) if (ext_family == 0xa) { return AMDZen; } diff --git a/src/RecordSession.cc b/src/RecordSession.cc index 8ce4fcf0451..166d1afdd52 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include "core.h" #include "ftrace.h" #include "kernel_metadata.h" +#include "kernel_supplement.h" #include "log.h" #include "record_signal.h" #include "record_syscall.h" @@ -319,7 +321,22 @@ void RecordSession::handle_seccomp_traced_syscall(RecordTask* t, // SIGSYS. Instead, we set a breakpoint at the return instruction. t->set_regs(regs); t->vm()->add_breakpoint(ret_addr, BKPT_INTERNAL); - t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); + while (true) { + t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); + if (t->ptrace_event() == PTRACE_EVENT_EXIT) { + return; + } + ASSERT(t, !t->ptrace_event()); + if (t->stop_sig() == syscallbuf_desched_sig()) { + continue; + } + if (t->stop_sig() == SIGTRAP && + is_kernel_trap(t->get_siginfo().si_code)) { + // Hit the breakpoint + break; + } + t->stash_sig(); + } t->vm()->remove_breakpoint(ret_addr, BKPT_INTERNAL); ASSERT(t, t->regs().ip().undo_executed_bkpt(t->arch()) == ret_addr); @@ -781,6 +798,13 @@ void RecordSession::task_continue(const StepState& step_state) { } } + // Override requested by the tracee for testing purposes + if (t->tick_request_override != (TicksRequest)0) { + ASSERT(t, !t->next_pmc_interrupt_is_for_user); + ticks_request = t->tick_request_override; + t->tick_request_override = (TicksRequest)0; + } + bool singlestep = is_ptrace_any_singlestep(t->arch(), t->emulated_ptrace_cont_command); if (singlestep && is_at_syscall_instruction(t, t->ip())) { @@ -1557,8 +1581,11 @@ bool RecordSession::signal_state_changed(RecordTask* t, StepState* step_state) { // A SIGSTOP requires us to allow switching to another task. // So does a fatal, core-dumping signal, since we need to allow other // tasks to proceed to their exit events. - bool is_fatal = t->ev().Signal().disposition == DISPOSITION_FATAL; bool is_deterministic = t->ev().Signal().deterministic == DETERMINISTIC_SIG; + // Signals that would normally be fatal are just ignored for init processes, + // unless they're deterministic. + bool is_fatal = t->ev().Signal().disposition == DISPOSITION_FATAL && + (!t->is_container_init() || is_deterministic); Switchable can_switch = ((is_fatal && is_coredumping_signal(sig)) || sig == SIGSTOP) ? ALLOW_SWITCH : PREVENT_SWITCH; @@ -1600,15 +1627,16 @@ bool RecordSession::signal_state_changed(RecordTask* t, StepState* step_state) { RecordTask::ALLOW_RESET_SYSCALLBUF, &r); // Don't actually set_regs(r), the kernel does these modifications. - // If the task is a container init, the kernel will ignore injection - // of fatal signals. Usually, the kernel removes the killable-protection - // when a determinisic fatal signal gets executed, but (due to what is - // arguably a bug) when a ptracer is attached, this does not happen. - // If we try to inject it here, the kernel will just ignore it, - // and we'll go around again. As a hack, we detach here, in the - // expectation that the deterministic instruction will run again and - // actually kill the task now that it isn't under ptrace control anymore. - if (t->is_container_init() && is_fatal && is_deterministic) { + if (t->is_container_init() && is_fatal) { + // Nondeterministic signals were already filtered out. + ASSERT(t, is_deterministic); + // Usually, the kernel removes the killable-protection from an init process + // when a determinisic fatal signal gets executed, but (due to what is + // arguably a bug) when a ptracer is attached, this does not happen. + // If we try to inject it here, the kernel will just ignore it, + // and we'll go around again. As a hack, we detach here, in the + // expectation that the deterministic instruction will run again and + // actually kill the task now that it isn't under ptrace control anymore. t->destroy_buffers(nullptr, nullptr); WaitStatus exit_status = WaitStatus::for_fatal_sig(sig); record_exit_trace_event(t, exit_status); @@ -2109,21 +2137,25 @@ static string lookup_by_path(const string& name) { } else { setenv(SYSCALLBUF_ENABLED_ENV_VAR, "1", 1); - ScopedFd fd("/proc/sys/kernel/perf_event_paranoid", O_RDONLY); - if (fd.is_open()) { - char buf[100]; - ssize_t size = read(fd, buf, sizeof(buf) - 1); - if (size >= 0) { - buf[size] = 0; - int val = atoi(buf); - if (val > 1) { - fprintf(stderr, - "rr needs /proc/sys/kernel/perf_event_paranoid <= 1, but it is %d.\n" - "Change it to 1, or use 'rr record -n' (slow).\n" - "Consider putting 'kernel.perf_event_paranoid = 1' in /etc/sysctl.conf.\n" - "See 'man 8 sysctl', 'man 5 sysctl.d' and 'man 5 sysctl.conf' for more details.\n", - val); - exit(1); + if (!has_effective_caps(uint64_t(1) << CAP_SYS_ADMIN) && + !has_effective_caps(uint64_t(1) << CAP_PERFMON)) { + ScopedFd fd("/proc/sys/kernel/perf_event_paranoid", O_RDONLY); + if (fd.is_open()) { + char buf[100]; + ssize_t size = read(fd, buf, sizeof(buf) - 1); + if (size >= 0) { + buf[size] = 0; + int val = atoi(buf); + if (val > 1) { + fprintf(stderr, + "rr needs /proc/sys/kernel/perf_event_paranoid <= 1, but it is %d.\n" + "Change it to 1, or use 'rr record -n' (slow).\n" + "Consider putting 'kernel.perf_event_paranoid = 1' in /etc/sysctl.d/10-rr.conf.\n" + "See 'man 8 sysctl', 'man 5 sysctl.d' (systemd systems)\n" + "and 'man 5 sysctl.conf' (non-systemd systems) for more details.\n", + val); + exit(1); + } } } } diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 6eb92b7dfc8..3337b18ccfc 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -196,7 +196,8 @@ RecordTask::RecordTask(RecordSession& session, pid_t _tid, uint32_t serial, waiting_for_reap(false), waiting_for_zombie(false), waiting_for_ptrace_exit(false), - retry_syscall_patching(false) { + retry_syscall_patching(false), + tick_request_override((TicksRequest)0) { push_event(Event::sentinel()); if (session.tasks().empty()) { // Initial tracee. It inherited its state from this process, so set it up. @@ -1651,22 +1652,42 @@ void RecordTask::record_remote_writable(remote_ptr addr, } ssize_t RecordTask::record_remote_fallible(remote_ptr addr, - ssize_t num_bytes) { - ASSERT(this, num_bytes >= 0); + uintptr_t num_bytes, + const std::vector& holes) { + auto hole_iter = holes.begin(); + uintptr_t offset = 0; + vector buf; + while (offset < num_bytes) { + if (hole_iter != holes.end() && hole_iter->offset == offset) { + offset += hole_iter->size; + ++hole_iter; + continue; + } - if (record_remote_by_local_map(addr, num_bytes)) { - return num_bytes; - } + uintptr_t bytes = min(uintptr_t(4*1024*1024), num_bytes - offset); + if (hole_iter != holes.end()) { + ASSERT(this, hole_iter->offset > offset); + bytes = min(bytes, uintptr_t(hole_iter->offset) - offset); + } + if (record_remote_by_local_map(addr + offset, bytes)) { + offset += bytes; + continue; + } - vector buf; - ssize_t nread = 0; - if (!addr.is_null()) { - buf.resize(num_bytes); - nread = read_bytes_fallible(addr, num_bytes, buf.data()); - buf.resize(max(0, nread)); - } - trace_writer().write_raw(rec_tid, buf.data(), buf.size(), addr); - return nread; + if (addr) { + buf.resize(bytes); + ssize_t nread = read_bytes_fallible(addr + offset, bytes, buf.data()); + if (nread <= 0) { + break; + } + trace_writer().write_raw_data(buf.data(), nread); + offset += nread; + } else { + offset += bytes; + } + } + trace_writer().write_raw_header(rec_tid, offset, addr, holes); + return offset; } void RecordTask::record_remote_even_if_null(remote_ptr addr, diff --git a/src/RecordTask.h b/src/RecordTask.h index 73afc88325d..ea072fbe5a8 100644 --- a/src/RecordTask.h +++ b/src/RecordTask.h @@ -378,7 +378,8 @@ class RecordTask : public Task { } // Record as much as we can of the bytes in this range. Will record only // contiguous mapped data starting at `addr`. - ssize_t record_remote_fallible(remote_ptr addr, ssize_t num_bytes); + ssize_t record_remote_fallible(remote_ptr addr, uintptr_t num_bytes, + const std::vector& holes = std::vector()); // Record as much as we can of the bytes in this range. Will record only // contiguous mapped-writable data starting at `addr`. void record_remote_writable(remote_ptr addr, ssize_t num_bytes); @@ -751,6 +752,10 @@ class RecordTask : public Task { // When exiting a syscall, we should call MonkeyPatcher::try_patch_syscall again. bool retry_syscall_patching; + + // Set if the tracee requested an override of the ticks request. + // Used for testing. + TicksRequest tick_request_override; }; } // namespace rr diff --git a/src/Registers.cc b/src/Registers.cc index e6e610924ed..62258c7e5cf 100644 --- a/src/Registers.cc +++ b/src/Registers.cc @@ -659,7 +659,7 @@ void Registers::set_from_trace(SupportedArch a, const void* data, memcpy(&u.arm64regs, data, sizeof(u.arm64regs)); } -bool Registers::aarch64_singlestep_flag() { +bool Registers::aarch64_singlestep_flag() const { switch (arch()) { case aarch64: return pstate() & AARCH64_DBG_SPSR_SS; @@ -679,7 +679,7 @@ void Registers::set_aarch64_singlestep_flag() { } } -bool Registers::x86_singlestep_flag() { +bool Registers::x86_singlestep_flag() const { switch (arch()) { case x86: case x86_64: diff --git a/src/Registers.h b/src/Registers.h index 21c71713c55..bbc5233c294 100644 --- a/src/Registers.h +++ b/src/Registers.h @@ -424,7 +424,7 @@ class Registers { * Modify the processor's single step flag. On x86 this is the TF flag in the * eflags register. */ - bool x86_singlestep_flag(); + bool x86_singlestep_flag() const; void clear_x86_singlestep_flag(); /** @@ -441,7 +441,7 @@ class Registers { * likely already be clear, and we'd take a single step exception without * ever having executed any userspace instructions whatsoever. */ - bool aarch64_singlestep_flag(); + bool aarch64_singlestep_flag() const; void set_aarch64_singlestep_flag(); void print_register_file(FILE* f) const; diff --git a/src/ReplayCommand.cc b/src/ReplayCommand.cc index 9f40e033913..c11328f3dd4 100644 --- a/src/ReplayCommand.cc +++ b/src/ReplayCommand.cc @@ -69,7 +69,8 @@ ReplayCommand ReplayCommand::singleton( " --stats= display brief stats every N steps (eg 10000).\n" " --serve-files Serve all files from the trace rather than\n" " assuming they exist on disk. Debugging will\n" - " be slower, but be able to tolerate missing files\n"); + " be slower, but be able to tolerate missing files\n" + " --tty Redirect tracee replay output to \n"); struct ReplayFlags { // Start a debug server for the task scheduled at the first @@ -128,6 +129,8 @@ struct ReplayFlags { // to get them from the filesystem bool serve_files; + string tty; + ReplayFlags() : goto_event(0), singlestep_to_event(0), @@ -167,6 +170,7 @@ static bool parse_replay_arg(vector& args, ReplayFlags& flags) { { 1, "fullname", NO_PARAMETER }, { 2, "stats", HAS_PARAMETER }, { 3, "serve-files", NO_PARAMETER }, + { 4, "tty", HAS_PARAMETER }, { 'u', "cpu-unbound", NO_PARAMETER }, { 'i', "interpreter", HAS_PARAMETER } }; @@ -252,6 +256,9 @@ static bool parse_replay_arg(vector& args, ReplayFlags& flags) { case 3: flags.serve_files = true; break; + case 4: + flags.tty = opt.value; + break; case 'u': flags.cpu_unbound = true; break; @@ -326,6 +333,7 @@ static pid_t waiting_for_child; static ReplaySession::Flags session_flags(const ReplayFlags& flags) { ReplaySession::Flags result; result.redirect_stdio = flags.redirect; + result.redirect_stdio_file = flags.tty; result.share_private_mappings = flags.share_private_mappings; result.cpu_unbound = flags.cpu_unbound; return result; diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index e279d00d168..794a4acc6d7 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -162,9 +162,22 @@ ReplaySession::ReplaySession(const std::string& dir, const Flags& flags) ticks_at_start_of_event(0), flags_(flags), trace_start_time(0) { + if (trace_in.required_forward_compatibility_version() > FORWARD_COMPATIBILITY_VERSION) { + CLEAN_FATAL() + << "This rr build is too old to replay the trace (we support forward compability version " + << FORWARD_COMPATIBILITY_VERSION << " but the trace needs " << trace_in.required_forward_compatibility_version(); + } + ticks_semantics_ = trace_in.ticks_semantics(); rrcall_base_ = trace_in.rrcall_base(); + if (!flags.redirect_stdio_file.empty()) { + tracee_output_fd_ = make_shared(flags.redirect_stdio_file.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0600); + if (!tracee_output_fd_->is_open()) { + FATAL() << "Can't open/create tracee output file " << flags.redirect_stdio_file; + } + } + memset(&last_siginfo_, 0, sizeof(last_siginfo_)); advance_to_next_trace_frame(); @@ -195,6 +208,7 @@ ReplaySession::ReplaySession(const std::string& dir, const Flags& flags) ReplaySession::ReplaySession(const ReplaySession& other) : Session(other), emu_fs(EmuFs::create()), + tracee_output_fd_(other.tracee_output_fd_), trace_in(other.trace_in), trace_frame(other.trace_frame), current_step(other.current_step), @@ -1093,8 +1107,15 @@ void ReplaySession::check_ticks_consistency(ReplayTask* t, const Event& ev) { } static bool treat_signal_event_as_deterministic(const SignalEvent& ev) { - return ev.deterministic == DETERMINISTIC_SIG && - ev.siginfo.si_signo != SIGBUS; + if (ev.siginfo.si_signo == SIGBUS) { + return false; + } + if (ev.siginfo.si_signo == SIGSEGV && ev.siginfo.si_code == SEGV_PKUERR) { + // We don't set up memory protection key state, so pkey-triggered signals + // won't happen. + return false; + } + return ev.deterministic == DETERMINISTIC_SIG; } /** @@ -1313,7 +1334,8 @@ Completion ReplaySession::flush_syscallbuf(ReplayTask* t, Registers r = t->regs(); ASSERT(t, t->stop_sig() == SIGSEGV && r.ip() == t->vm()->do_breakpoint_fault_addr()) - << "Replay got unexpected signal (or none) " << t->stop_sig(); + << "Replay got unexpected signal (or none) " << t->stop_sig() + << " ip " << r.ip() << " breakpoint_fault_addr " << t->vm()->do_breakpoint_fault_addr(); r.set_ip(r.ip().increment_by_movrm_insn_length(t->arch())); t->set_regs(r); @@ -1509,8 +1531,14 @@ static void end_task(ReplayTask* t) { t->set_regs(r); // Enter the syscall. t->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS); - ASSERT(t, t->ptrace_event() == PTRACE_EVENT_EXIT); - t->did_handle_ptrace_exit_event(); + if (t->session().done_initial_exec()) { + ASSERT(t, t->ptrace_event() == PTRACE_EVENT_EXIT); + t->did_handle_ptrace_exit_event(); + } else { + // If we never execed, the trace is totally hosed, + // just clean up. + t->did_kill(); + } t->detach(); delete t; } @@ -1726,7 +1754,7 @@ ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { advance_to_next_trace_frame(); } if (current_step.action == TSTEP_EXIT_TASK) { - result.break_status.task = t; + result.break_status.task_context = TaskContext(t); result.break_status.task_exit = true; } return result; @@ -1735,7 +1763,7 @@ ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { fast_forward_status = FastForwardStatus(); // Now we know |t| hasn't died, so save it in break_status. - result.break_status.task = t; + result.break_status.task_context = TaskContext(t); /* Advance towards fulfilling |current_step|. */ if (try_one_trace_step(t, constraints) == INCOMPLETE) { @@ -1769,7 +1797,11 @@ ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { // If try_one_trace_step set extra-registers already, the values it used from the frame // will already have FIP/FDP cleared if necessary. Clearing them again here is fine. - if (trace_reader().clear_fip_fdp()) { + if (trace_reader().clear_fip_fdp() && + current_step.action != TSTEP_EXIT_TASK) + /* TSTEP_EXIT_TASK means the task object got already + deleted above in try_one_trace_step/exit_task/end_task. */ + { const ExtraRegisters* maybe_extra = t->extra_regs_fallible(); if (maybe_extra) { ExtraRegisters extra_registers = *maybe_extra; @@ -1802,7 +1834,7 @@ ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { } break; case TSTEP_EXIT_TASK: - result.break_status.task = nullptr; + result.break_status.task_context = TaskContext(); t = nullptr; DEBUG_ASSERT(!result.break_status.any_break()); break; diff --git a/src/ReplaySession.h b/src/ReplaySession.h index ea89245d682..f025fdacea6 100644 --- a/src/ReplaySession.h +++ b/src/ReplaySession.h @@ -239,6 +239,7 @@ class ReplaySession : public Session { , cpu_unbound(false) {} Flags(const Flags& other) = default; bool redirect_stdio; + std::string redirect_stdio_file; bool share_private_mappings; bool cpu_unbound; }; @@ -317,6 +318,10 @@ class ReplaySession : public Session { bool has_trace_quirk(TraceReader::TraceQuirks quirk) { return trace_in.quirks() & quirk; } + virtual int tracee_output_fd(int dflt) override { + return tracee_output_fd_.get() ? tracee_output_fd_->get() : dflt; + } + private: ReplaySession(const std::string& dir, const Flags& flags); ReplaySession(const ReplaySession& other); @@ -360,6 +365,7 @@ class ReplaySession : public Session { void clear_syscall_bp(); std::shared_ptr emu_fs; + std::shared_ptr tracee_output_fd_; TraceReader trace_in; TraceFrame trace_frame; ReplayTraceStep current_step; diff --git a/src/ReplayTimeline.cc b/src/ReplayTimeline.cc index b1b758c54cf..9f59583d2be 100644 --- a/src/ReplayTimeline.cc +++ b/src/ReplayTimeline.cc @@ -243,7 +243,7 @@ void ReplayTimeline::mark_after_singlestep(const Mark& from, fprintf(stderr, "Probable duplicated state at %d:", (int)m_prev + 1); m.ptr->full_print(stderr); } - ASSERT(result.break_status.task, false) + ASSERT(result.break_status.task(), false) << " Probable duplicated states leading to " << m << " at index " << i + 1; } break; @@ -612,15 +612,15 @@ bool ReplayTimeline::fix_watchpoint_coalescing_quirk(ReplayResult& result, // no watchpoint hit. Nothing to fix. return false; } - if (!maybe_at_or_after_x86_string_instruction(result.break_status.task)) { + if (!maybe_at_or_after_x86_string_instruction(result.break_status.task())) { return false; } - TaskUid after_tuid = result.break_status.task->tuid(); - Ticks after_ticks = result.break_status.task->tick_count(); + TaskUid after_tuid = result.break_status.task()->tuid(); + Ticks after_ticks = result.break_status.task()->tick_count(); LOG(debug) << "Fixing x86-string coalescing quirk from " << before << " to " << proto_mark() << " (final cx " - << result.break_status.task->regs().cx() << ")"; + << result.break_status.task()->regs().cx() << ")"; seek_to_proto_mark(before); @@ -640,7 +640,7 @@ bool ReplayTimeline::fix_watchpoint_coalescing_quirk(ReplayResult& result, if (!result.break_status.data_watchpoints_hit().empty()) { LOG(debug) << "Fixed x86-string coalescing quirk; now at " << current_mark_key() << " (new cx " - << result.break_status.task->regs().cx() << ")"; + << result.break_status.task()->regs().cx() << ")"; break; } } else { @@ -929,7 +929,7 @@ bool ReplayTimeline::run_forward_to_intermediate_point(const Mark& end, static const int stop_count_limit = 20; static ReplayTask* to_replay_task(const BreakStatus& status) { - return static_cast(status.task); + return static_cast(status.task()); } static bool arch_watch_fires_before_instr(SupportedArch arch) { @@ -1021,10 +1021,10 @@ ReplayResult ReplayTimeline::reverse_continue( !result.break_status.watchpoints_hit.empty(); if (avoidable_stop) { made_progress_between_stops = - avoidable_stop_ip != result.break_status.task->ip() || - avoidable_stop_ticks != result.break_status.task->tick_count(); - avoidable_stop_ip = result.break_status.task->ip(); - avoidable_stop_ticks = result.break_status.task->tick_count(); + avoidable_stop_ip != result.break_status.task()->ip() || + avoidable_stop_ticks != result.break_status.task()->tick_count(); + avoidable_stop_ip = result.break_status.task()->ip(); + avoidable_stop_ticks = result.break_status.task()->tick_count(); } evaluate_conditions(result); @@ -1053,10 +1053,11 @@ ReplayResult ReplayTimeline::reverse_continue( } } final_result = result; - final_tuid = result.break_status.task ? result.break_status.task->tuid() - : TaskUid(); - final_ticks = result.break_status.task - ? result.break_status.task->tick_count() + final_tuid = result.break_status.task() + ? result.break_status.task()->tuid() + : TaskUid(); + final_ticks = result.break_status.task() + ? result.break_status.task()->tick_count() : 0; last_stop_is_watch_or_signal = true; } @@ -1065,10 +1066,11 @@ ReplayResult ReplayTimeline::reverse_continue( if (is_start_of_reverse_execution_barrier_event()) { dest = mark(); final_result = result; - final_result.break_status.task = current->current_task(); + final_result.break_status.task_context = + TaskContext(current->current_task()); final_result.break_status.task_exit = true; - final_tuid = final_result.break_status.task->tuid(); - final_ticks = result.break_status.task->tick_count(); + final_tuid = final_result.break_status.task()->tuid(); + final_ticks = result.break_status.task()->tick_count(); last_stop_is_watch_or_signal = false; } @@ -1084,10 +1086,11 @@ ReplayResult ReplayTimeline::reverse_continue( dest = mark(); LOG(debug) << "Found breakpoint break at " << dest; final_result = result; - final_tuid = result.break_status.task ? result.break_status.task->tuid() - : TaskUid(); - final_ticks = result.break_status.task - ? result.break_status.task->tick_count() + final_tuid = result.break_status.task() + ? result.break_status.task()->tuid() + : TaskUid(); + final_ticks = result.break_status.task() + ? result.break_status.task()->tick_count() : 0; last_stop_is_watch_or_signal = false; } @@ -1096,7 +1099,8 @@ ReplayResult ReplayTimeline::reverse_continue( LOG(debug) << "Interrupted at " << end; seek_to_mark(end); final_result = ReplayResult(); - final_result.break_status.task = current->current_task(); + final_result.break_status.task_context = + TaskContext(current->current_task()); return final_result; } @@ -1138,7 +1142,8 @@ ReplayResult ReplayTimeline::reverse_continue( } // fix break_status.task since the actual ReplayTask* may have changed // since we saved final_result - final_result.break_status.task = current->find_task(final_tuid); + final_result.break_status.task_context = + TaskContext(current->find_task(final_tuid)); // Hide any singlestepping we did, since a continue operation should // never return a singlestep status final_result.break_status.singlestep_complete = false; @@ -1299,8 +1304,8 @@ ReplayResult ReplayTimeline::reverse_singlestep( } else if (now == end && result.break_status.signal && result.break_status.signal->si_signo == SIGTRAP && - is_advanced_pc_and_signaled_instruction(result.break_status.task, - result.break_status.task->ip())) { + is_advanced_pc_and_signaled_instruction(result.break_status.task(), + result.break_status.task()->ip())) { LOG(debug) << " singlestepped exactly to instruction that advances pc and signals (e.g. int3)," << " pretending we stopped earlier."; break; @@ -1309,7 +1314,7 @@ ReplayResult ReplayTimeline::reverse_singlestep( LOG(debug) << "Setting candidate after step: " << destination_candidate; destination_candidate_result = result; - destination_candidate_tuid = result.break_status.task->tuid(); + destination_candidate_tuid = result.break_status.task()->tuid(); destination_candidate_saw_other_task_break = seen_other_task_break; seen_other_task_break = false; step_start = now; @@ -1365,9 +1370,9 @@ ReplayResult ReplayTimeline::reverse_singlestep( if (destination_candidate) { LOG(debug) << "Found destination " << destination_candidate; seek_to_mark(destination_candidate); - destination_candidate_result.break_status.task = - current->find_task(destination_candidate_tuid); - DEBUG_ASSERT(destination_candidate_result.break_status.task); + destination_candidate_result.break_status.task_context = + TaskContext(current->find_task(destination_candidate_tuid)); + DEBUG_ASSERT(destination_candidate_result.break_status.task()); evaluate_conditions(destination_candidate_result); return destination_candidate_result; } diff --git a/src/Session.cc b/src/Session.cc index dd119e08fe4..c9a84efdf82 100644 --- a/src/Session.cc +++ b/src/Session.cc @@ -266,7 +266,7 @@ string Session::read_spawned_task_error() const { BreakStatus Session::diagnose_debugger_trap(Task* t, RunCommand run_command) { assert_fully_initialized(); BreakStatus break_status; - break_status.task = t; + break_status.task_context = TaskContext(t); int stop_sig = t->stop_sig(); if (!stop_sig) { diff --git a/src/Session.h b/src/Session.h index ce5cdeb5f05..6446d4f656b 100644 --- a/src/Session.h +++ b/src/Session.h @@ -12,6 +12,7 @@ #include "AddressSpace.h" #include "MonitoredSharedMemory.h" +#include "Task.h" #include "TaskishUid.h" #include "TraceStream.h" #include "preload/preload_interface.h" @@ -30,18 +31,47 @@ class AutoRemoteSyscalls; // The following types are used by step() APIs in Session subclasses. +/** + * Stores a Task and information about it separately so decisions can + * still be made from a Task's context even if it dies. + */ +struct TaskContext { + TaskContext() + : task(nullptr), + session(nullptr), + thread_group(nullptr) {} + explicit TaskContext(Task* task) + : task(task), + session(task ? &task->session() : nullptr), + thread_group(task ? task->thread_group() : nullptr) {} + TaskContext(Session* session, std::shared_ptr thread_group) + : task(nullptr), + session(session), + thread_group(thread_group) {} + + // A pointer to a task. This may be |nullptr|. When non-NULL, this + // is not necessarily the same as session->current_task() (for + // example, when replay switches to a new task after + // ReplaySession::replay_step()). + Task* task; + // The session to which |task| belongs/belonged. + Session* session; + // The thread group to which |task| belongs/belonged. + std::shared_ptr thread_group; +}; + /** * In general, multiple break reasons can apply simultaneously. */ struct BreakStatus { BreakStatus() - : task(nullptr), + : task_context(TaskContext()), breakpoint_hit(false), singlestep_complete(false), approaching_ticks_target(false), task_exit(false) {} BreakStatus(const BreakStatus& other) - : task(other.task), + : task_context(other.task_context), watchpoints_hit(other.watchpoints_hit), signal(other.signal ? std::unique_ptr(new siginfo_t(*other.signal)) @@ -51,7 +81,7 @@ struct BreakStatus { approaching_ticks_target(other.approaching_ticks_target), task_exit(other.task_exit) {} const BreakStatus& operator=(const BreakStatus& other) { - task = other.task; + task_context = other.task_context; watchpoints_hit = other.watchpoints_hit; signal = other.signal ? std::unique_ptr(new siginfo_t(*other.signal)) @@ -63,9 +93,8 @@ struct BreakStatus { return *this; } - // The triggering Task. This may be different from session->current_task() - // when replay switches to a new task when ReplaySession::replay_step() ends. - Task* task; + // The triggering TaskContext. + TaskContext task_context; // List of watchpoints hit; any watchpoint hit causes a stop after the // instruction that triggered the watchpoint has completed. std::vector watchpoints_hit; @@ -109,6 +138,8 @@ struct BreakStatus { return !watchpoints_hit.empty() || signal || breakpoint_hit || singlestep_complete || approaching_ticks_target; } + + Task* task() const { return task_context.task; } }; enum RunCommand { // Continue until we hit a breakpoint or a new replay event @@ -364,6 +395,10 @@ class Session { const ThreadGroupMap& thread_group_map() const { return thread_group_map_; } + virtual int tracee_output_fd(int dflt) { + return dflt; + } + protected: Session(); virtual ~Session(); diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 71718af3231..93c4a589544 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -88,12 +88,14 @@ ExplicitSourcesCommand ExplicitSourcesCommand::singleton( " LIBRARY is the basename of the original file name,\n" " e.g. libc-2.32.so\n"); -static void parent_dir(string& s) { +static void dir_name(string& s) { size_t p = s.rfind('/'); - if (p == string::npos) { + if (p == string::npos || (p == 0 && s.size() == 1)) { s.clear(); } else if (p > 0) { s.resize(p); + } else { + s.resize(1); } } @@ -104,28 +106,79 @@ static void base_name(string& s) { } } +static bool is_absolute(string& s) { + return s[0] == '/'; +} + +static void prepend_path(const char* prefix, string& s) { + size_t len = strlen(prefix); + if (!len) { + return; + } + if (prefix[len - 1] == '/') { + s = string(prefix) + s; + } else { + s = string(prefix) + '/' + s; + } +} + +struct DirExistsCache { + unordered_map cache; + bool dir_exists(const string& dir) { + auto it = cache.find(dir); + if (it != cache.end()) { + return it->second; + } + bool exists = access(dir.c_str(), F_OK) == 0; + cache.insert(make_pair(dir, exists)); + return exists; + } +}; + +// Resolve a file name relative to a compilation directory and relative directory. // file_name cannot be null, but the others can be. -static void resolve_file_name(const char* original_file_dir, - const char* comp_dir, const char* rel_dir, - const char* file_name, set* file_names) { - const char* names[] = { original_file_dir, comp_dir, rel_dir, file_name }; - ssize_t first_absolute = -1; - for (ssize_t i = 0; i < 4; ++i) { - if (names[i] && names[i][0] == '/') { - first_absolute = i; - } - } - string s = first_absolute >= 0 ? "" : "/"; - for (size_t i = (first_absolute >= 0 ? first_absolute : 0); i < 4; ++i) { - if (!names[i]) { - continue; +// Takes into accout the original file name as follows: +// -- if comp_dir, rel_dir or file_name are absolute, or original_file_name is NULL, +// then ignore original_file_name. +// The result is just the result of combining comp_dir/rel_dir/file_name. +// -- otherwise they're all relative to some build directory. We hypothesize +// the build directory is some ancestor directory of original_file_name. +// We try making comp_dir/rel_dir/file_name relative to each ancestor directory +// of original_file_name, and if we find a file there, we return that name. +static string resolve_file_name(const char* original_file_name, + const char* comp_dir, const char* rel_dir, + const char* file_name, + DirExistsCache& dir_exists_cache) { + string path = file_name; + if (is_absolute(path)) { + return path; + } + if (rel_dir) { + prepend_path(rel_dir, path); + if (is_absolute(path)) { + return path; + } + } + if (comp_dir) { + prepend_path(comp_dir, path); + if (is_absolute(path)) { + return path; + } + } + if (!original_file_name) { + return path; + } + string original(original_file_name); + while (true) { + dir_name(original); + if (original.empty()) { + return path; } - if (!s.empty() && s.back() != '/') { - s.push_back('/'); + string candidate = original + "/" + path; + if (dir_exists_cache.dir_exists(candidate)) { + return candidate; } - s += names[i]; } - file_names->insert(move(s)); } struct DwoInfo { @@ -133,6 +186,7 @@ struct DwoInfo { string trace_file; // Could be an empty string string comp_dir; + string full_path; uint64_t id; }; @@ -141,7 +195,8 @@ static bool process_compilation_units(ElfFileReader& reader, const string& trace_relative_name, const string& original_file_name, const string& comp_dir_substitution, - set* file_names, vector* dwos) { + set* file_names, vector* dwos, + DirExistsCache& dir_exists_cache) { DwarfSpan debug_info = reader.dwarf_section(".debug_info"); DwarfSpan debug_abbrev = reader.dwarf_section(".debug_abbrev"); DwarfSpan debug_str = reader.dwarf_section(".debug_str"); @@ -162,9 +217,6 @@ static bool process_compilation_units(ElfFileReader& reader, debug_line_str, }; - string original_file_dir = original_file_name; - parent_dir(original_file_dir); - DwarfAbbrevs abbrevs(debug_abbrev); do { bool ok = true; @@ -210,11 +262,12 @@ static bool process_compilation_units(ElfFileReader& reader, } } if (has_dwo_id) { + string full_name = resolve_file_name(original_file_name.c_str(), comp_dir, nullptr, dwo_name, dir_exists_cache); string c; if (comp_dir) { c = comp_dir; } - dwos->push_back({ dwo_name, trace_relative_name, move(c), dwo_id }); + dwos->push_back({ dwo_name, trace_relative_name, move(c), full_name, dwo_id }); } else { LOG(warn) << "DW_AT_GNU_dwo_name but not DW_AT_GNU_dwo_id"; } @@ -224,7 +277,7 @@ static bool process_compilation_units(ElfFileReader& reader, continue; } if (source_file_name) { - resolve_file_name(original_file_dir.c_str(), comp_dir, nullptr, source_file_name, file_names); + file_names->insert(resolve_file_name(original_file_name.c_str(), comp_dir, nullptr, source_file_name, dir_exists_cache)); } intptr_t stmt_list = cu.die().section_ptr_attr(DW_AT_stmt_list, &ok); if (stmt_list < 0 || !ok) { @@ -240,7 +293,7 @@ static bool process_compilation_units(ElfFileReader& reader, continue; } const char* dir = lines.directories()[f.directory_index]; - resolve_file_name(original_file_dir.c_str(), comp_dir, dir, f.file_name, file_names); + file_names->insert(resolve_file_name(original_file_name.c_str(), comp_dir, dir, f.file_name, dir_exists_cache)); } } while (!debug_info.empty()); @@ -255,13 +308,13 @@ struct ExternalDebugInfo { if (path < other.path) { return true; } - if (path == other.path) { + if (path > other.path) { return false; } if (build_id < other.build_id) { return true; } - if (build_id == other.build_id) { + if (build_id > other.build_id) { return false; } return type < other.type; @@ -288,14 +341,20 @@ find_auxiliary_file(const string& original_file_name, // Try in the same directory as the original file. string original_file_dir = original_file_name; - parent_dir(original_file_dir); + dir_name(original_file_dir); full_file_name = original_file_dir + "/" + aux_file_name; normalize_file_name(full_file_name); fd = ScopedFd(full_file_name.c_str(), O_RDONLY); if (fd.is_open()) { - goto found; + // Debian/Ubuntu built /lib/x86_64-linux-gnu/ld-2.31.so with a + // .gnu_debuglink of "ld-2.31.so", expecting it to be found at + // /usr/lib/debug/lib/x86_64-linux-gnu/ld-2.31.so. So we need to make + // sure we aren't using the binary file as its own debuginfo. + if (real_path(original_file_name) != real_path(full_file_name)) { + goto found; + } } - LOG(warn) << "Can't find external debuginfo file " << full_file_name; + LOG(info) << "Can't find external debuginfo file " << full_file_name; // Next try in a subdirectory called .debug full_file_name = original_file_dir + "/.debug/" + aux_file_name; @@ -304,7 +363,7 @@ find_auxiliary_file(const string& original_file_name, if (fd.is_open()) { goto found; } - LOG(warn) << "Can't find external debuginfo file " << full_file_name; + LOG(info) << "Can't find external debuginfo file " << full_file_name; // Then try in /usr/lib/debug full_file_name = "/usr/lib/debug/" + aux_file_name; @@ -313,7 +372,7 @@ find_auxiliary_file(const string& original_file_name, if (fd.is_open()) { goto found; } - LOG(warn) << "Can't find external debuginfo file " << full_file_name; + LOG(info) << "Can't find external debuginfo file " << full_file_name; // Try in an appropriate subdirectory of /usr/lib/debug full_file_name = "/usr/lib/debug" + original_file_dir + "/" + aux_file_name; @@ -322,7 +381,7 @@ find_auxiliary_file(const string& original_file_name, if (fd.is_open()) { goto found; } - LOG(warn) << "Can't find external debuginfo file " << full_file_name; + LOG(info) << "Can't find external debuginfo file " << full_file_name; // If none of those worked, give up. LOG(warn) << "Exhausted auxilliary debuginfo search locations for " << aux_file_name; @@ -339,6 +398,36 @@ find_auxiliary_file(const string& original_file_name, return reader; } +static unique_ptr +find_auxiliary_file_by_buildid(ElfFileReader& trace_file_reader, string& full_file_name) { + string build_id = trace_file_reader.read_buildid(); + if (build_id.empty()) { + LOG(warn) << "Main ELF binary has no build ID!"; + return nullptr; + } + if (build_id.size() < 3) { + LOG(warn) << "Build ID is too short!"; + return nullptr; + } + + string path = "/usr/lib/debug/.build-id/" + build_id.substr(0, 2) + "/" + build_id.substr(2) + ".debug"; + ScopedFd fd(path.c_str(), O_RDONLY); + if (!fd.is_open()) { + LOG(info) << "Can't find external debuginfo file " << path; + return nullptr; + } + + LOG(info) << "Examining external by buildid " << path; + auto reader = make_unique(fd); + if (!reader->ok()) { + LOG(warn) << "Not an ELF file!"; + return nullptr; + } + full_file_name = path; + return reader; +} + +// Traverse the compilation units of an auxiliary file to collect their source files static bool process_auxiliary_file(ElfFileReader& trace_file_reader, ElfFileReader& aux_file_reader, ElfFileReader* alt_file_reader, @@ -350,7 +439,8 @@ static bool process_auxiliary_file(ElfFileReader& trace_file_reader, const map& comp_dir_substitutions, vector* dwos, set* external_debug_info, - bool already_used_file) { + bool already_used_file, + DirExistsCache& dir_exists_cache) { string build_id = trace_file_reader.read_buildid(); if (build_id.empty()) { LOG(warn) << "Main ELF binary has no build ID!"; @@ -365,12 +455,12 @@ static bool process_auxiliary_file(ElfFileReader& trace_file_reader, LOG(debug) << "\tFound comp_dir substitution " << it->second; did_work = process_compilation_units(aux_file_reader, alt_file_reader, trace_relative_name, original_file_name, - it->second, file_names, dwos); + it->second, file_names, dwos, dir_exists_cache); } else { - LOG(debug) << "\tNone found"; + LOG(debug) << "\tNo comp_dir substitution found"; did_work = process_compilation_units(aux_file_reader, alt_file_reader, trace_relative_name, original_file_name, - {}, file_names, dwos); + {}, file_names, dwos, dir_exists_cache); } if (!did_work) { @@ -392,12 +482,16 @@ static bool try_debuglink_file(ElfFileReader& trace_file_reader, set* file_names, const string& aux_file_name, const map& comp_dir_substitutions, vector* dwos, - set* external_debug_info) { + set* external_debug_info, + DirExistsCache& dir_exists_cache) { string full_file_name; auto reader = find_auxiliary_file(original_file_name, aux_file_name, full_file_name); if (!reader) { - return false; + reader = find_auxiliary_file_by_buildid(trace_file_reader, full_file_name); + if (!reader) { + return false; + } } /* A debuglink file can have its own debugaltlink */ @@ -410,14 +504,14 @@ static bool try_debuglink_file(ElfFileReader& trace_file_reader, trace_relative_name, original_file_name, file_names, full_file_name, DEBUGLINK, comp_dir_substitutions, - dwos, external_debug_info, false); + dwos, external_debug_info, false, dir_exists_cache); if (altlink_reader) { has_source_files |= process_auxiliary_file(trace_file_reader, *altlink_reader, nullptr, trace_relative_name, original_file_name, file_names, full_altfile_name, DEBUGALTLINK, comp_dir_substitutions, - dwos, external_debug_info, has_source_files); + dwos, external_debug_info, has_source_files, dir_exists_cache); } return has_source_files; } @@ -522,7 +616,7 @@ static string resolve_symlinks(const string& path, string target; if (buf[0] != '/') { target = base; - parent_dir(target); + dir_name(target); if (target.size() > 1) { target.push_back('/'); } @@ -602,6 +696,7 @@ static int sources(const map& binary_file_names, const map external_debug_info; vector dwos; vector output_comp_dir_substitutions; + DirExistsCache dir_exists_cache; for (auto& pair : binary_file_names) { string trace_relative_name = pair.first; string original_name = pair.second; @@ -634,12 +729,12 @@ static int sources(const map& binary_file_names, const mapsecond }); has_source_files = process_compilation_units(reader, altlink_reader.get(), trace_relative_name, pair.second, - it->second, &file_names, &dwos); + it->second, &file_names, &dwos, dir_exists_cache); } else { - LOG(debug) << "\tNone found"; + LOG(debug) << "\tNo comp_dir substitution found"; has_source_files = process_compilation_units(reader, altlink_reader.get(), trace_relative_name, pair.second, - {}, &file_names, &dwos); + {}, &file_names, &dwos, dir_exists_cache); } /* If the original binary had source files, force the inclusion of any debugaltlink * file, even if it does not itself have compilation units (it may have relevant strings) @@ -650,7 +745,7 @@ static int sources(const map& binary_file_names, const map& binary_file_names, const map& binary_file_names, const map& binary_file_names, const map& args) { if (!files) { FATAL() << "Can't open trace dir"; } + closedir(files); map binary_file_names; while (true) { diff --git a/src/Task.cc b/src/Task.cc index 13106ed1ed7..47a7f396cc1 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -163,22 +163,28 @@ void Task::wait_exit() { * for this we add `| WNOWAIT` to prevent dequeing the event and simply take * it as an indication that the task has execed. */ - int ret = waitid(P_PID, tid, &info, WSTOPPED | WNOWAIT); - if (ret == 0) { - ASSERT(this, info.si_pid == tid) << "Expected " << tid << " got " << info.si_pid; - if (WaitStatus(info).ptrace_event() == PTRACE_EVENT_EXIT) { - // It's possible that the earlier exit event was synthetic, in which - // case we're only now catching up to the real process exit. In that - // case, just ask the process to actually exit. (TODO: We may want to - // catch this earlier). - return proceed_to_exit(true); - } - ASSERT(this, WaitStatus(info).ptrace_event() == PTRACE_EVENT_EXEC) - << "Expected PTRACE_EVENT_EXEC, got " << WaitStatus(info); - // The kernel will do the reaping for us in this case - was_reaped = true; - } else { - ASSERT(this, ret == -1 && errno == ECHILD) << "Got ret=" << ret << " errno=" << errno; + while (true) { + int ret = waitid(P_PID, tid, &info, WSTOPPED | WNOWAIT); + if (ret == 0) { + ASSERT(this, info.si_pid == tid) << "Expected " << tid << " got " << info.si_pid; + if (WaitStatus(info).ptrace_event() == PTRACE_EVENT_EXIT) { + // It's possible that the earlier exit event was synthetic, in which + // case we're only now catching up to the real process exit. In that + // case, just ask the process to actually exit. (TODO: We may want to + // catch this earlier). + return proceed_to_exit(true); + } + ASSERT(this, WaitStatus(info).ptrace_event() == PTRACE_EVENT_EXEC) + << "Expected PTRACE_EVENT_EXEC, got " << WaitStatus(info); + // The kernel will do the reaping for us in this case + was_reaped = true; + break; + } else if (ret == -1 && errno == EINTR) { + continue; + } else { + ASSERT(this, ret == -1 && errno == ECHILD) << "Got ret=" << ret << " errno=" << errno; + break; + } } } @@ -212,15 +218,17 @@ WaitStatus Task::kill() { */ LOG(debug) << "Sending SIGKILL to " << tid; int ret = syscall(SYS_tgkill, real_tgid(), tid, SIGKILL); - DEBUG_ASSERT(ret == 0); + ASSERT(this, ret == 0); int raw_status = -1; int wait_ret = ::waitpid(tid, &raw_status, __WALL | WUNTRACED); WaitStatus status = WaitStatus(raw_status); LOG(debug) << " -> " << status; bool is_exit_event = status.ptrace_event() == PTRACE_EVENT_EXIT; - DEBUG_ASSERT(wait_ret == tid && - (is_exit_event || status.type() == WaitStatus::FATAL_SIGNAL || - status.type() == WaitStatus::EXIT)); + ASSERT(this, wait_ret == tid) << "Expected " << tid << " got " << wait_ret; + ASSERT(this, + is_exit_event || status.type() == WaitStatus::FATAL_SIGNAL || + status.type() == WaitStatus::EXIT) + << "Expected exit or fatal signal for " << tid << " got " << status; did_kill(); if (is_exit_event) { /* If this is the exit event, we can detach here and the task will @@ -228,8 +236,9 @@ WaitStatus Task::kill() { * the exit event, we already reaped it from the ptrace perspective, * which implicitly detached. */ - if (ptrace_if_alive(PTRACE_GETEVENTMSG, nullptr, &raw_status)) { - status = WaitStatus(raw_status); + unsigned long long_status; + if (ptrace_if_alive(PTRACE_GETEVENTMSG, nullptr, &long_status)) { + status = WaitStatus(long_status); } else { status = WaitStatus::for_fatal_sig(SIGKILL); } @@ -562,7 +571,8 @@ void Task::on_syscall_exit_arch(int syscallno, const Registers& regs) { // failing. // SYS_rrcall_mprotect_record always fails with ENOSYS, though we want to // note its usage here. - if (regs.syscall_failed() && !is_mprotect_syscall(syscallno, regs.arch())) { + if (regs.syscall_failed() && !is_mprotect_syscall(syscallno, regs.arch()) + && !is_pkey_mprotect_syscall(syscallno, regs.arch())) { return; } @@ -576,6 +586,7 @@ void Task::on_syscall_exit_arch(int syscallno, const Registers& regs) { return; } + case Arch::pkey_mprotect: case Arch::mprotect: { remote_ptr addr = regs.orig_arg1(); size_t num_bytes = regs.arg2(); @@ -1484,13 +1495,11 @@ void Task::flush_regs() { orig_syscallno_dirty = false; } #elif defined(__aarch64__) - struct iovec vec = { &ptrace_regs, - sizeof(ptrace_regs) }; - if (ptrace_if_alive(PTRACE_SETREGSET, NT_PRSTATUS, &vec)) { + if (ptrace_if_alive(PTRACE_SETREGSET, NT_PRSTATUS, &ptrace_regs)) { registers_dirty = false; } #else - #error "Unknown archietcture" + #error "Unknown architecture" #endif } #if defined(__i386__) || defined(__x86_64__) @@ -1502,7 +1511,7 @@ void Task::flush_regs() { uintptr_t syscall = registers.original_syscallno(); struct iovec vec = { &syscall, sizeof(syscall) }; - LOG(debug) << "Chaning syscall to " << syscall; + LOG(debug) << "Changing syscall to " << syscall; if (ptrace_if_alive(PTRACE_SETREGSET, NT_ARM_SYSTEM_CALL, &vec)) { orig_syscallno_dirty = false; } @@ -2906,21 +2915,31 @@ void Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, return; } + ssize_t nwritten = write_bytes_helper_no_notifications(addr, buf_size, buf, ok, flags); + if (nwritten > 0) { + vm()->notify_written(addr, nwritten, flags); + } +} + +ssize_t Task::write_bytes_helper_no_notifications(remote_ptr addr, ssize_t buf_size, + const void* buf, bool* ok, uint32_t flags) { + ASSERT(this, buf_size >= 0) << "Invalid buf_size " << buf_size; + if (0 == buf_size) { + return 0; + } + if (uint8_t* local_addr = as->local_mapping(addr, buf_size)) { memcpy(local_addr, buf, buf_size); - return; + return buf_size; } if (!as->mem_fd().is_open()) { ssize_t nwritten = write_bytes_ptrace(addr, buf_size, static_cast(buf)); - if (nwritten > 0) { - vm()->notify_written(addr, nwritten, flags); - } if (ok && nwritten < buf_size) { *ok = false; } - return; + return nwritten; } errno = 0; @@ -2928,7 +2947,7 @@ void Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, // See comment in read_bytes_helper(). if (0 == nwritten && 0 == errno) { open_mem_fd(); - return write_bytes_helper(addr, buf_size, buf, ok, flags); + return write_bytes_helper_no_notifications(addr, buf_size, buf, ok, flags); } if (errno == EPERM) { FATAL() << "Can't write to /proc/" << tid << "/mem\n" @@ -2944,9 +2963,7 @@ void Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, << "Should have written " << buf_size << " bytes to " << addr << ", but only wrote " << nwritten; } - if (nwritten > 0) { - vm()->notify_written(addr, nwritten, flags); - } + return nwritten; } uint64_t Task::write_ranges(const vector& ranges, @@ -2966,6 +2983,49 @@ uint64_t Task::write_ranges(const vector& ranges, return result; } +void Task::write_zeroes(unique_ptr* remote, remote_ptr addr, size_t size) { + if (!size) { + return; + } + + bool remove_ok = true; + remote_ptr initial_addr = addr; + size_t initial_size = size; + vector zeroes; + while (size > 0) { + size_t bytes; + remote_ptr first_page = ceil_page_size(addr); + if (addr < first_page) { + bytes = min(first_page - addr, size); + } else { + if (remove_ok) { + remote_ptr last_page = floor_page_size(addr + size); + if (first_page < last_page) { + if (!*remote) { + *remote = make_unique(this); + } + int ret = (*remote)->syscall(syscall_number_for_madvise(arch()), first_page, last_page - first_page, MADV_REMOVE); + if (ret == 0) { + addr = last_page; + size -= last_page - first_page; + continue; + } + // Don't try MADV_REMOVE again + remove_ok = false; + } + } + bytes = min(4*1024*1024, size); + } + zeroes.resize(bytes); + memset(zeroes.data(), 0, bytes); + ssize_t written = write_bytes_helper_no_notifications(addr, bytes, zeroes.data(), nullptr, 0); + ASSERT(this, written == (ssize_t)bytes); + addr += bytes; + size -= bytes; + } + vm()->notify_written(initial_addr, initial_size, 0); +} + const TraceStream* Task::trace_stream() const { if (session().as_record()) { return &session().as_record()->trace_writer(); @@ -3097,8 +3157,8 @@ static long perform_remote_clone(AutoRemoteSyscalls& remote, } static void setup_fd_table(Task* t, FdTable& fds, int tracee_socket_fd_number) { - fds.add_monitor(t, STDOUT_FILENO, new StdioMonitor(STDOUT_FILENO)); - fds.add_monitor(t, STDERR_FILENO, new StdioMonitor(STDERR_FILENO)); + fds.add_monitor(t, STDOUT_FILENO, new StdioMonitor(t->session().tracee_output_fd(STDOUT_FILENO))); + fds.add_monitor(t, STDERR_FILENO, new StdioMonitor(t->session().tracee_output_fd(STDERR_FILENO))); fds.add_monitor(t, RR_MAGIC_SAVE_DATA_FD, new MagicSaveDataMonitor()); fds.add_monitor(t, tracee_socket_fd_number, new PreserveFileMonitor()); } @@ -3148,8 +3208,6 @@ static void set_up_process(Session& session, const ScopedFd& err_fd, /* TODO tracees can probably undo some of the setup below * ... */ - restore_initial_resource_limits(); - /* CLOEXEC so that the original fd here will be closed by the exec that's * about to happen. */ @@ -3185,6 +3243,10 @@ static void set_up_process(Session& session, const ScopedFd& err_fd, // signals being sent to these processes by the terminal --- in particular // SIGTSTP/SIGINT/SIGWINCH. setsid(); + // Preserve increased resource limits, in case the tracee + // increased its limits and we need high limits to apply during replay. + } else { + restore_initial_resource_limits(); } /* Do any architecture specific setup, such as disabling non-deterministic diff --git a/src/Task.h b/src/Task.h index 9b86f98aeb5..a214f87988f 100644 --- a/src/Task.h +++ b/src/Task.h @@ -728,6 +728,13 @@ class Task { uint64_t write_ranges(const std::vector& ranges, void* data, size_t size); + /** + * Writes zeroes to the given memory range. + * For efficiency tries using MADV_REMOVE via `remote`. Caches + * an AutoRemoteSyscalls in `*remote`. + */ + void write_zeroes(std::unique_ptr* remote, remote_ptr addr, size_t size); + /** * Don't use these helpers directly; use the safer and more * convenient variants above. @@ -749,6 +756,13 @@ class Task { void write_bytes_helper(remote_ptr addr, ssize_t buf_size, const void* buf, bool* ok = nullptr, uint32_t flags = 0); + /** + * |flags| is bits from WriteFlags. + * Returns number of bytes written. + */ + ssize_t write_bytes_helper_no_notifications(remote_ptr addr, ssize_t buf_size, + const void* buf, bool* ok = nullptr, + uint32_t flags = 0); SupportedArch detect_syscall_arch(); diff --git a/src/TraceInfoCommand.cc b/src/TraceInfoCommand.cc index e85794fcd56..2128d381470 100644 --- a/src/TraceInfoCommand.cc +++ b/src/TraceInfoCommand.cc @@ -41,6 +41,7 @@ TraceInfoCommand TraceInfoCommand::singleton( " Dump trace header in JSON format.\n"); static int dump_trace_info(const string& trace_dir, FILE* out) { + int ret = 0; TraceReader trace(trace_dir); fputs("{\n", out); @@ -61,6 +62,8 @@ static int dump_trace_info(const string& trace_dir, FILE* out) { fprintf(out, " \"cpuidFaulting\":%s,\n", trace.uses_cpuid_faulting() ? "true" : "false"); + fprintf(out, " \"requiredForwardCompatibilityVersion\":%d,\n", trace.required_forward_compatibility_version()); + const char* semantics; switch (trace.ticks_semantics()) { case TICKS_RETIRED_CONDITIONAL_BRANCHES: semantics = "rcb"; break; @@ -99,10 +102,10 @@ static int dump_trace_info(const string& trace_dir, FILE* out) { flags.cpu_unbound = true; ReplaySession::shr_ptr replay_session = ReplaySession::create(trace_dir, flags); - fputs(" \"environ\":[", out); while (true) { auto result = replay_session->replay_step(RUN_CONTINUE); if (replay_session->done_initial_exec()) { + fputs(" \"environ\":[", out); auto environ = read_env(replay_session->current_task()); for (size_t i = 0; i < environ.size(); ++i) { if (i > 0) { @@ -110,17 +113,18 @@ static int dump_trace_info(const string& trace_dir, FILE* out) { } fprintf(out, "\n \"%s\"", json_escape(environ[i]).c_str()); } + fputs("\n ]\n", out); break; } if (result.status == REPLAY_EXITED) { fputs("Replay finished before initial exec!\n", stderr); - return 1; + ret = 1; + break; } } - fputs("\n ]\n", out); fputs("}\n", out); - return 0; + return ret; } int TraceInfoCommand::run(vector& args) { diff --git a/src/TraceStream.cc b/src/TraceStream.cc index 42109913c6d..a481cf3a343 100644 --- a/src/TraceStream.cc +++ b/src/TraceStream.cc @@ -42,9 +42,8 @@ namespace rr { // version number doesn't track the rr version number, because changes // to the trace format will be rare. // -// NB: if you *do* change the trace format for whatever reason, you -// MUST increment this version number. Otherwise users' old traces -// will become unreplayable and they won't know why. +// We don't plan to ever change this again. Instead, we use CapnpProto +// to define the trace format in an extensible way (see rr_trace.capnp). // #define TRACE_VERSION 85 @@ -417,6 +416,11 @@ void TraceWriter::write_frame(RecordTask* t, const Event& ev, w.setTid(r.rec_tid); w.setAddr(r.addr.as_int()); w.setSize(r.size); + auto holes = w.initHoles(r.holes.size()); + for (size_t j = 0; j < r.holes.size(); ++j) { + holes[j].setOffset(r.holes[j].offset); + holes[j].setSize(r.holes[j].size); + } } raw_recs.clear(); frame.setArch(to_trace_arch(t->arch())); @@ -533,7 +537,14 @@ TraceFrame TraceReader::read_frame() { for (size_t i = 0; i < raw_recs.size(); ++i) { // Build list in reverse order so we can efficiently pull records from it auto w = mem_writes[raw_recs.size() - 1 - i]; - raw_recs[i] = { w.getAddr(), (size_t)w.getSize(), i32_to_tid(w.getTid()) }; + auto holes = w.getHoles(); + vector h; + h.resize(holes.size()); + for (size_t j = 0; j < h.size(); ++j) { + const auto& hole = holes[j]; + h[j] = { hole.getOffset(), hole.getSize() }; + } + raw_recs[i] = { w.getAddr(), (size_t)w.getSize(), i32_to_tid(w.getTid()), h }; } TraceFrame ret; @@ -1152,10 +1163,14 @@ KernelMapping TraceReader::read_mapped_region(MappedData* data, bool* found, map.getFileOffsetBytes()); } -void TraceWriter::write_raw(pid_t rec_tid, const void* d, size_t len, - remote_ptr addr) { +void TraceWriter::write_raw_header(pid_t rec_tid, size_t total_len, + remote_ptr addr, + const std::vector& holes = std::vector()) { + raw_recs.push_back({ addr, total_len, rec_tid, holes }); +} + +void TraceWriter::write_raw_data(const void* d, size_t len) { auto& data = writer(RAW_DATA); - raw_recs.push_back({ addr, len, rec_tid }); data.write(d, len); } @@ -1174,8 +1189,44 @@ bool TraceReader::read_raw_data_for_frame(RawData& d) { auto& rec = raw_recs[raw_recs.size() - 1]; d.rec_tid = rec.rec_tid; d.addr = rec.addr; + d.data.resize(rec.size); - reader(RAW_DATA).read((char*)d.data.data(), rec.size); + auto hole_iter = rec.holes.begin(); + uintptr_t offset = 0; + while (offset < d.data.size()) { + uintptr_t end = rec.size; + if (hole_iter != rec.holes.end()) { + if (offset == hole_iter->offset) { + memset(d.data.data() + offset, 0, hole_iter->size); + offset += hole_iter->size; + ++hole_iter; + continue; + } + end = hole_iter->offset; + } + reader(RAW_DATA).read((char*)d.data.data() + offset, end - offset); + offset = end; + } + + raw_recs.pop_back(); + return true; +} + +bool TraceReader::read_raw_data_for_frame_with_holes(RawDataWithHoles& d) { + if (raw_recs.empty()) { + return false; + } + auto& rec = raw_recs[raw_recs.size() - 1]; + d.rec_tid = rec.rec_tid; + d.addr = rec.addr; + d.holes = move(rec.holes); + size_t data_size = rec.size; + for (auto& h : d.holes) { + data_size -= h.size; + } + d.data.resize(data_size); + reader(RAW_DATA).read((char*)d.data.data(), data_size); + raw_recs.pop_back(); return true; } @@ -1185,7 +1236,11 @@ bool TraceReader::read_raw_data_metadata_for_frame(RawDataMetadata& d) { return false; } d = raw_recs[raw_recs.size() - 1]; - reader(RAW_DATA).skip(d.size); + size_t data_size = d.size; + for (auto& h : d.holes) { + data_size -= h.size; + } + reader(RAW_DATA).skip(data_size); raw_recs.pop_back(); return true; } @@ -1318,6 +1373,7 @@ void TraceWriter::close(CloseStatus status, const TraceUuid* uuid) { header.setTicksSemantics( to_trace_ticks_semantics(PerfCounters::default_ticks_semantics())); header.setSyscallbufProtocolVersion(SYSCALLBUF_PROTOCOL_VERSION); + header.setRequiredForwardCompatibilityVersion(FORWARD_COMPATIBILITY_VERSION); header.setPreloadThreadLocalsRecorded(true); header.setRrcallBase(syscall_number_for_rrcall_init_preload(x86_64)); @@ -1486,6 +1542,7 @@ TraceReader::TraceReader(const string& dir) preload_thread_locals_recorded_ = header.getPreloadThreadLocalsRecorded(); ticks_semantics_ = from_trace_ticks_semantics(header.getTicksSemantics()); rrcall_base_ = header.getRrcallBase(); + required_forward_compatibility_version_ = header.getRequiredForwardCompatibilityVersion(); quirks_ = 0; { auto quirks = header.getQuirks(); @@ -1566,6 +1623,7 @@ TraceReader::TraceReader(const TraceReader& other) exclusion_range_ = other.exclusion_range_; quirks_ = other.quirks_; clear_fip_fdp_ = other.clear_fip_fdp_; + required_forward_compatibility_version_ = other.required_forward_compatibility_version_; } TraceReader::~TraceReader() {} diff --git a/src/TraceStream.h b/src/TraceStream.h index d7d084aa9b2..3e66e00f389 100644 --- a/src/TraceStream.h +++ b/src/TraceStream.h @@ -22,12 +22,22 @@ namespace rr { +/** + * Bump this when rr changes mean that traces produced by new rr can't be replayed by old rr. + */ +const int FORWARD_COMPATIBILITY_VERSION = 1; + struct CPUIDRecord; struct DisableCPUIDFeatures; class KernelMapping; class RecordTask; struct TraceUuid; +struct WriteHole { + uint64_t offset; + uint64_t size; +}; + /** * TraceStream stores all the data common to both recording and * replay. TraceWriter deals with recording-specific logic, and @@ -45,6 +55,7 @@ class TraceStream { remote_ptr addr; size_t size; pid_t rec_tid; + std::vector holes; }; /** @@ -213,8 +224,13 @@ class TraceWriter : public TraceStream { * 'addr' is the address in the tracee where the data came from/will be * restored to. */ - void write_raw(pid_t tid, const void* data, size_t len, - remote_ptr addr); + void write_raw(pid_t tid, const void* data, size_t len, remote_ptr addr) { + write_raw_data(data, len); + write_raw_header(tid, len, addr, std::vector()); + } + void write_raw_data(const void* data, size_t len); + void write_raw_header(pid_t tid, size_t total_len, remote_ptr addr, + const std::vector& holes); /** * Write a task event (clone or exec record) to the trace. @@ -313,7 +329,7 @@ class TraceReader : public TraceStream { public: /** * A parcel of recorded tracee data. |data| contains the data read - * from |addr| in the tracee. + * from |addr| in the tracee. `data` contains zeroes for holes. */ struct RawData { std::vector data; @@ -321,6 +337,16 @@ class TraceReader : public TraceStream { pid_t rec_tid; }; + /** + * Like RawData, but returns positions of holes. `data` excludes holes. + */ + struct RawDataWithHoles { + std::vector data; + remote_ptr addr; + pid_t rec_tid; + std::vector holes; + }; + /** * Read relevant data from the trace. * @@ -361,9 +387,18 @@ class TraceReader : public TraceStream { /** * Reads the next raw data record for last-read frame. If there are no more * raw data records for this frame, return false. + * Holes are filled with zeroes in the output buffer. */ bool read_raw_data_for_frame(RawData& d); + /** + * Reads the next raw data record for last-read frame. If there are no more + * raw data records for this frame, return false. + * Returns hole metadata so you can do something smarter with it than + * explicitly filling with zeroes. + */ + bool read_raw_data_for_frame_with_holes(RawDataWithHoles& d); + /** * Like read_raw_data_for_frame, but doesn't actually read the data bytes. * The array is resized but the data is not filled in. @@ -450,6 +485,8 @@ class TraceReader : public TraceStream { int quirks() const { return quirks_; } + int required_forward_compatibility_version() const { return required_forward_compatibility_version_; } + private: CompressedReader& reader(Substream s) { return *readers[s]; } const CompressedReader& reader(Substream s) const { return *readers[s]; } @@ -468,6 +505,7 @@ class TraceReader : public TraceStream { bool chaos_mode_known_; bool chaos_mode_; int rrcall_base_; + int required_forward_compatibility_version_; SupportedArch arch_; int quirks_; }; diff --git a/src/chaos-test/futex_wakeup.c b/src/chaos-test/futex_wakeup.c index cd5e7a85d2e..2ca25c07328 100644 --- a/src/chaos-test/futex_wakeup.c +++ b/src/chaos-test/futex_wakeup.c @@ -12,7 +12,7 @@ static void* run_thread(__attribute__((unused)) void* p) { return NULL; } -int main(__attribute__((unused)) int argc) { +int main(void) { int i; pthread_t thread; struct timespec ts = { 0, 10000000 }; diff --git a/src/kernel_abi.cc b/src/kernel_abi.cc index 0f4c092e5e5..9ad4271554a 100644 --- a/src/kernel_abi.cc +++ b/src/kernel_abi.cc @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -19,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/src/kernel_abi.h b/src/kernel_abi.h index 65a6de63d2e..fb05fee38db 100644 --- a/src/kernel_abi.h +++ b/src/kernel_abi.h @@ -108,6 +108,7 @@ struct FcntlConstants { OFD_SETLK = 37, OFD_SETLKW = 38, // Other Linux-specific operations + NOTIFY = 0x400 + 2, DUPFD_CLOEXEC = 0x400 + 6, SETPIPE_SZ = 0x400 + 7, GETPIPE_SZ = 0x400 + 8, @@ -622,6 +623,14 @@ struct BaseArch : public wordsize, }; RR_VERIFY_TYPE(termio); + struct seccomp_notif_sizes { + uint16_t seccomp_notif; + uint16_t seccomp_notif_resp; + uint16_t seccomp_data; + }; + // seccomp_notif_sizes is not present in older kernels + // RR_VERIFY_TYPE(seccomp_notif_sizes); + struct serial_struct { signed_int type; signed_int line; @@ -1707,6 +1716,124 @@ struct BaseArch : public wordsize, __u32 exe_fd; }; RR_VERIFY_TYPE(prctl_mm_map); + + struct fiemap_extent { + __u64 fe_logical; + __u64 fe_physical; + __u64 fe_length; + __u64 fe_reserved64[2]; + __u32 fe_flags; + __u32 fe_reserved[3]; + }; + RR_VERIFY_TYPE(fiemap_extent); + struct fiemap { + __u64 fm_start; + __u64 fm_length; + __u32 fm_flags; + __u32 fm_mapped_extents; + __u32 fm_extent_count; + __u32 fm_reserved; + struct fiemap_extent fm_extents[0]; + }; + RR_VERIFY_TYPE(fiemap); + + struct vt_stat { + unsigned short v_active; + unsigned short v_signal; + unsigned short v_state; + }; + RR_VERIFY_TYPE(vt_stat); + + struct fb_fix_screeninfo { + char id[16]; + unsigned long smem_start; + __u32 smem_len; + __u32 type; + __u32 type_aux; + __u32 visual; + uint16_t xpanstep; + uint16_t ypanstep; + uint16_t xwrapstep; + __u32 line_length; + unsigned long mmio_start; + __u32 mmio_len; + __u32 accel; + uint16_t capabilities; + uint16_t reserved[2]; + }; + RR_VERIFY_TYPE(fb_fix_screeninfo); + + struct fb_bitfield { + __u32 offset; + __u32 length; + __u32 msb_right; + }; + RR_VERIFY_TYPE(fb_bitfield); + struct fb_var_screeninfo { + __u32 xres; + __u32 yres; + __u32 xres_virtual; + __u32 yres_virtual; + __u32 xoffset; + __u32 yoffset; + __u32 bits_per_pixel; + __u32 grayscale; + struct fb_bitfield red; + struct fb_bitfield green; + struct fb_bitfield blue; + struct fb_bitfield transp; + __u32 nonstd; + __u32 active; + __u32 height; + __u32 width; + __u32 accel_flags; + __u32 pixclock; + __u32 left_margin; + __u32 right_margin; + __u32 upper_margin; + __u32 lower_margin; + __u32 hsync_len; + __u32 vsync_len; + __u32 sync; + __u32 vmode; + __u32 rotate; + __u32 colorspace; + __u32 reserved[4]; + }; + RR_VERIFY_TYPE(fb_var_screeninfo); + + struct ib_uverbs_attr { + uint16_t attr_id; /* command specific type attribute */ + uint16_t len; /* only for pointers and IDRs array */ + uint16_t flags; /* combination of UVERBS_ATTR_F_XXXX */ + union { + struct { + uint8_t elem_id; + uint8_t reserved; + } enum_data; + uint16_t reserved; + } attr_data; + union { + /* + * ptr to command, inline data, idr/fd or + * ptr to __u32 array of IDRs + */ + uint64_t __attribute__((aligned(8))) data; + /* Used by FD_IN and FD_OUT */ + int64_t data_s64; + }; + }; + + struct ib_uverbs_ioctl_hdr { + uint16_t length; + uint16_t object_id; + uint16_t method_id; + uint16_t num_attrs; + uint64_t __attribute__((aligned(8))) reserved1; + uint32_t driver_id; + uint32_t reserved2; + struct ib_uverbs_attr attrs[0]; + }; }; struct X64Arch : public BaseArch { diff --git a/src/kernel_supplement.h b/src/kernel_supplement.h index 03a6ff2beba..617ed173657 100644 --- a/src/kernel_supplement.h +++ b/src/kernel_supplement.h @@ -5,6 +5,7 @@ #define _GNU_SOURCE 1 +#include #include #include #include @@ -75,6 +76,12 @@ namespace rr { #ifndef SECCOMP_FILTER_FLAG_TSYNC #define SECCOMP_FILTER_FLAG_TSYNC 1 #endif +#ifndef SECCOMP_GET_ACTION_AVAIL +#define SECCOMP_GET_ACTION_AVAIL 2 +#endif +#ifndef SECCOMP_GET_NOTIF_SIZES +#define SECCOMP_GET_NOTIF_SIZES 3 +#endif #ifndef SYS_SECCOMP #define SYS_SECCOMP 1 @@ -358,6 +365,36 @@ enum { BPF_MAP_DELETE_ELEM, BPF_MAP_GET_NEXT_KEY, BPF_PROG_LOAD, + BPF_OBJ_PIN, + BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, + BPF_MAP_FREEZE, + BPF_BTF_GET_NEXT_ID, + BPF_MAP_LOOKUP_BATCH, + BPF_MAP_LOOKUP_AND_DELETE_BATCH, + BPF_MAP_UPDATE_BATCH, + BPF_MAP_DELETE_BATCH, + BPF_LINK_CREATE, + BPF_LINK_UPDATE, + BPF_LINK_GET_FD_BY_ID, + BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, + BPF_ITER_CREATE, + BPF_LINK_DETACH, + BPF_PROG_BIND_MAP, }; #ifndef O_PATH @@ -380,6 +417,10 @@ enum { #define RLIMIT_RTTIME 15 #endif +#ifndef CAP_PERFMON +#define CAP_PERFMON 38 +#endif + } // namespace rr #endif /* RR_KERNEL_SUPPLEMENT_H_ */ diff --git a/src/log.cc b/src/log.cc index 93dcc996e78..67cc422a4c3 100644 --- a/src/log.cc +++ b/src/log.cc @@ -59,12 +59,12 @@ static char simple_to_lower(char ch) { } static string simple_to_lower(const string& s) { - char* buf = new char[s.size() + 1]; + std::unique_ptr buf(new char[s.size() + 1]); for (size_t i = 0; i < s.size(); ++i) { buf[i] = simple_to_lower(s[i]); } buf[s.size()] = 0; - return string(buf); + return string(buf.get()); } #if __has_attribute(require_constant_initialization) diff --git a/src/preload/raw_syscall.S b/src/preload/raw_syscall.S index 717109198b3..1c42d86c7cd 100644 --- a/src/preload/raw_syscall.S +++ b/src/preload/raw_syscall.S @@ -136,6 +136,24 @@ _raw_syscall: .cfi_endproc .size _raw_syscall, . - _raw_syscall +#elif defined(__aarch64__) + .text + .globl _raw_syscall + .hidden _raw_syscall + .type _raw_syscall, @function +_raw_syscall: + .cfi_startproc + mov x8,x0 + mov x0,x1 + mov x1,x2 + mov x2,x3 + mov x3,x4 + mov x4,x5 + mov x5,x6 + svc #0 + ret + .cfi_endproc + .size _raw_syscall, . - _raw_syscall #else #error unknown CPU architecture #endif /* __i386__/__x86_64__ */ diff --git a/src/preload/rr_page.S b/src/preload/rr_page.S index b8136f2f093..aa1e7ae5e62 100644 --- a/src/preload/rr_page.S +++ b/src/preload/rr_page.S @@ -33,6 +33,8 @@ #define TRAP \ int $3; \ ret +#define PAGE_ALIGN \ + .align 0x1000 #elif defined(__x86_64__) #define CALL \ syscall; \ @@ -43,6 +45,8 @@ #define TRAP \ nop; int $3; \ ret +#define PAGE_ALIGN \ + .align 0x1000 #elif defined(__aarch64__) #define CALL \ svc #0; \ @@ -53,19 +57,21 @@ #define TRAP \ brk #0; \ ret +#define PAGE_ALIGN \ + .align 12 #endif .section .sh_placeholder, "a" -.align 0x1000 +PAGE_ALIGN .fill 0x1000, 1, 0xff .section .vdso.text, "a", @progbits -.align 0x1000 +PAGE_ALIGN #include "rr_vdso.S" .section .record.text, "a", @progbits -.align 0x1000 +PAGE_ALIGN .global rr_page_start rr_page_start: @@ -76,7 +82,7 @@ rr_page_start: #include "rr_page_instructions.S" .section .replay.text, "", @progbits -.align 0x1000 +PAGE_ALIGN replay_page: // No CFI instructions or symbols for the replay page - we'll implicitly share // those of the record copy diff --git a/src/preload/rr_page.ld b/src/preload/rr_page.ld index 552ef883c2b..f0e5ca909f5 100644 --- a/src/preload/rr_page.ld +++ b/src/preload/rr_page.ld @@ -37,3 +37,9 @@ SECTIONS .replay.text : { *(.replay.text) } :replay /DISCARD/ : { *(.debug_* ) } } + +VERSION { + VER_1 { + global:__vdso_time; + }; +} \ No newline at end of file diff --git a/src/preload/rr_vdso.S b/src/preload/rr_vdso.S index 82dd43870e5..b673cb2e98a 100644 --- a/src/preload/rr_vdso.S +++ b/src/preload/rr_vdso.S @@ -38,6 +38,9 @@ WEAK_ALIAS(time, __vdso_time) WEAK_ALIAS(clock_gettime, __vdso_clock_gettime) WEAK_ALIAS(gettimeofday,__vdso_gettimeofday) +// Dummy versioned symbol to trigger presence of DT_VERDEF/DT_VERSYM/DT_VERDEFNUM: +.symver __vdso_time,__vdso_time@VER_1 + #elif defined(__i386__) // __vdso functions use the C calling convention, so @@ -89,6 +92,11 @@ WEAK_ALIAS(clock_gettime, __vdso_clock_gettime) WEAK_ALIAS(clock_gettime64, __vdso_clock_gettime64) WEAK_ALIAS(gettimeofday,__vdso_gettimeofday) +// Dummy versioned symbol to trigger presence of DT_VERDEF/DT_VERSYM/DT_VERDEFNUM: +.symver __vdso_time,__vdso_time@VER_1 + +#elif defined(__aarch64__) +// XXXkhuey there should probably be something here #else #error "VDSO Hooks not defined for this platform" diff --git a/src/preload/rrcalls.h b/src/preload/rrcalls.h index be11210d454..296b75d15ae 100644 --- a/src/preload/rrcalls.h +++ b/src/preload/rrcalls.h @@ -78,3 +78,9 @@ * process tree, such that it may run without seccomp. */ #define SYS_rrcall_detach_teleport (RR_CALL_BASE + 9) +/** + * Requests that rr reset the time slice signal to the + * requested period. Used for testing interaction corner + * cases between the time slice signal and other rr behavior. + */ +#define SYS_rrcall_arm_time_slice (RR_CALL_BASE + 10) diff --git a/src/preload/syscallbuf.c b/src/preload/syscallbuf.c index 0ab8e0fd6b8..58919b35a2e 100644 --- a/src/preload/syscallbuf.c +++ b/src/preload/syscallbuf.c @@ -280,6 +280,17 @@ static long traced_raw_syscall(const struct syscall_info* call) { RR_PAGE_SYSCALL_TRACED, 0, 0); } +/** + * Make a raw traced syscall using the params in |call|, privileged. + */ +static long privileged_traced_raw_syscall(const struct syscall_info* call) { + /* FIXME: pass |call| to avoid pushing these on the stack + * again. */ + return _raw_syscall(call->no, call->args[0], call->args[1], call->args[2], + call->args[3], call->args[4], call->args[5], + RR_PAGE_SYSCALL_PRIVILEGED_TRACED, 0, 0); +} + #if defined(SYS_fcntl64) #define RR_FCNTL_SYSCALL SYS_fcntl64 #else @@ -443,9 +454,9 @@ untraced_replay_assist_syscall_base(int syscallno, long a0, long a1, long a2, untraced_replay_assist_syscall1(no, 0) #define privileged_untraced_syscall6(no, a0, a1, a2, a3, a4, a5) \ - _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, (uintptr_t)a3, \ - (uintptr_t)a4, (uintptr_t)a5, \ - RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY, 0, 0) + untraced_syscall_base(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, \ + (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5, \ + RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY) #define privileged_untraced_syscall5(no, a0, a1, a2, a3, a4) \ privileged_untraced_syscall6(no, a0, a1, a2, a3, a4, 0) #define privileged_untraced_syscall4(no, a0, a1, a2, a3) \ @@ -458,6 +469,22 @@ untraced_replay_assist_syscall_base(int syscallno, long a0, long a1, long a2, privileged_untraced_syscall2(no, a0, 0) #define privileged_untraced_syscall0(no) privileged_untraced_syscall1(no, 0) +#define privileged_unrecorded_syscall6(no, a0, a1, a2, a3, a4, a5) \ + _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, \ + (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5, \ + RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY, 0, 0) +#define privileged_unrecorded_syscall5(no, a0, a1, a2, a3, a4) \ + privileged_unrecorded_syscall6(no, a0, a1, a2, a3, a4, 0) +#define privileged_unrecorded_syscall4(no, a0, a1, a2, a3) \ + privileged_unrecorded_syscall5(no, a0, a1, a2, a3, 0) +#define privileged_unrecorded_syscall3(no, a0, a1, a2) \ + privileged_unrecorded_syscall4(no, a0, a1, a2, 0) +#define privileged_unrecorded_syscall2(no, a0, a1) \ + privileged_unrecorded_syscall3(no, a0, a1, 0) +#define privileged_unrecorded_syscall1(no, a0) \ + privileged_unrecorded_syscall2(no, a0, 0) +#define privileged_unrecorded_syscall0(no) privileged_unrecorded_syscall1(no, 0) + #define replay_only_syscall6(no, a0, a1, a2, a3, a4, a5) \ _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, (uintptr_t)a3, \ (uintptr_t)a4, (uintptr_t)a5, \ @@ -473,7 +500,7 @@ untraced_replay_assist_syscall_base(int syscallno, long a0, long a1, long a2, #define replay_only_syscall0(no) replay_only_syscall1(no, 0) static int privileged_untraced_close(int fd) { - return privileged_untraced_syscall1(SYS_close, fd); + return privileged_unrecorded_syscall1(SYS_close, fd); } static int privileged_untraced_fcntl(int fd, int cmd, ...) { @@ -484,7 +511,7 @@ static int privileged_untraced_fcntl(int fd, int cmd, ...) { arg = va_arg(ap, void*); va_end(ap); - return privileged_untraced_syscall3(RR_FCNTL_SYSCALL, fd, cmd, arg); + return privileged_unrecorded_syscall3(RR_FCNTL_SYSCALL, fd, cmd, arg); } /** @@ -923,18 +950,18 @@ static void arm_desched_event(void) { * avoid! :) Although we don't allocate extra space for these * ioctl's, we do record that we called them; the replayer * knows how to skip over them. */ - if ((int)privileged_untraced_syscall3(SYS_ioctl, - thread_locals->desched_counter_fd, - PERF_EVENT_IOC_ENABLE, 0)) { + if ((int)privileged_unrecorded_syscall3(SYS_ioctl, + thread_locals->desched_counter_fd, + PERF_EVENT_IOC_ENABLE, 0)) { fatal("Failed to ENABLE counter"); } } static void disarm_desched_event(void) { /* See above. */ - if ((int)privileged_untraced_syscall3(SYS_ioctl, - thread_locals->desched_counter_fd, - PERF_EVENT_IOC_DISABLE, 0)) { + if ((int)privileged_unrecorded_syscall3(SYS_ioctl, + thread_locals->desched_counter_fd, + PERF_EVENT_IOC_DISABLE, 0)) { fatal("Failed to DISABLE counter"); } } @@ -1007,9 +1034,9 @@ static int start_commit_buffered_syscall(int syscallno, void* record_end, pid_t tid = 0; uid_t uid = 0; if (impose_spurious_desched) { - pid = privileged_untraced_syscall0(SYS_getpid); - tid = privileged_untraced_syscall0(SYS_gettid); - uid = privileged_untraced_syscall0(SYS_getuid); + pid = privileged_unrecorded_syscall0(SYS_getpid); + tid = privileged_unrecorded_syscall0(SYS_gettid); + uid = privileged_unrecorded_syscall0(SYS_getuid); } /* NB: the ordering of the next two statements is @@ -1042,9 +1069,9 @@ static int start_commit_buffered_syscall(int syscallno, void* record_end, si.si_fd = thread_locals->desched_counter_fd; si.si_pid = pid; si.si_uid = uid; - privileged_untraced_syscall4(SYS_rt_tgsigqueueinfo, pid, tid, - globals.desched_sig, - &si); + privileged_unrecorded_syscall4(SYS_rt_tgsigqueueinfo, pid, tid, + globals.desched_sig, + &si); } } return 1; @@ -1287,6 +1314,23 @@ static long sys_generic_nonblocking_fd(const struct syscall_info* call) { return commit_raw_syscall(call->no, ptr, ret); } +/** + * Call this for syscalls that have no memory effects, don't block, and + * have an fd as their first parameter, and should run privileged. + */ +static long privileged_sys_generic_nonblocking_fd(const struct syscall_info* call) { + int fd = call->args[0]; + void* ptr = prep_syscall_for_fd(fd); + long ret; + + if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) { + return privileged_traced_raw_syscall(call); + } + ret = privileged_untraced_syscall6(call->no, fd, call->args[1], call->args[2], + call->args[3], call->args[4], call->args[5]); + return commit_raw_syscall(call->no, ptr, ret); +} + static long sys_clock_gettime(const struct syscall_info* call) { const int syscallno = SYS_clock_gettime; __kernel_clockid_t clk_id = (__kernel_clockid_t)call->args[0]; @@ -1470,8 +1514,10 @@ static int sys_fcntl64_setlkw64(const struct syscall_info* call) { } #if defined(SYS_fcntl64) +/* 32-bit system */ static long sys_fcntl64(const struct syscall_info* call) #else +/* 64-bit system */ static long sys_fcntl(const struct syscall_info* call) #endif { @@ -1490,16 +1536,16 @@ static long sys_fcntl(const struct syscall_info* call) case F_SETOWN_EX: return sys_fcntl64_own_ex(call); -#if F_SETLK != F_SETLK64 case F_SETLK64: -#else +#if !defined(SYS_fcntl64) + /* Also uses 64-bit flock format */ case F_SETLK: #endif return sys_fcntl64_setlk64(call); -#if F_SETLKW != F_SETLKW64 case F_SETLKW64: -#else +#if !defined(SYS_fcntl64) + /* Also uses 64-bit flock format */ case F_SETLKW: #endif return sys_fcntl64_setlkw64(call); @@ -2003,7 +2049,7 @@ static int supported_open(const char* file_name, int flags) { (flags & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT); } -static long sys_readlinkat(const struct syscall_info* call); +static long sys_readlinkat(const struct syscall_info* call, int privileged); static int check_file_open_ok(const struct syscall_info* call, int ret, int did_abort) { if (did_abort || ret < 0) { @@ -2014,7 +2060,7 @@ static int check_file_open_ok(const struct syscall_info* call, int ret, int did_ char link[PATH_MAX]; struct syscall_info readlink_call = { SYS_readlinkat, { -1, (long)buf, (long)link, sizeof(link), 0, 0 } }; - long link_ret = sys_readlinkat(&readlink_call); + long link_ret = sys_readlinkat(&readlink_call, 1); if (link_ret >= 0 && link_ret < (ssize_t)sizeof(link)) { link[link_ret] = 0; if (allow_buffered_open(link)) { @@ -2270,7 +2316,7 @@ static long sys_read(const struct syscall_info* call) { sizeof(void*) == 8 && !(count & 4095)) { struct syscall_info lseek_call = { SYS_lseek, { fd, 0, SEEK_CUR, 0, 0, 0 } }; - off_t lseek_ret = sys_generic_nonblocking_fd(&lseek_call); + off_t lseek_ret = privileged_sys_generic_nonblocking_fd(&lseek_call); if (lseek_ret >= 0 && !(lseek_ret & 4095)) { struct btrfs_ioctl_clone_range_args ioctl_args; int ioctl_ret; @@ -2289,11 +2335,11 @@ static long sys_read(const struct syscall_info* call) { { thread_locals->cloned_file_data_fd, BTRFS_IOC_CLONE_RANGE, (long)&ioctl_args, 0, 0, 0 } }; - ioctl_ret = traced_raw_syscall(&ioctl_call); + ioctl_ret = privileged_traced_raw_syscall(&ioctl_call); } else { ioctl_ret = - untraced_syscall3(SYS_ioctl, thread_locals->cloned_file_data_fd, - BTRFS_IOC_CLONE_RANGE, &ioctl_args); + privileged_untraced_syscall3(SYS_ioctl, thread_locals->cloned_file_data_fd, + BTRFS_IOC_CLONE_RANGE, &ioctl_args); ioctl_ret = commit_raw_syscall(SYS_ioctl, ioctl_ptr, ioctl_ret); } @@ -2406,7 +2452,7 @@ static long sys_readlink(const struct syscall_info* call) { } #endif -static long sys_readlinkat(const struct syscall_info* call) { +static long sys_readlinkat(const struct syscall_info* call, int privileged) { const int syscallno = SYS_readlinkat; int dirfd = call->args[0]; const char* path = (const char*)call->args[1]; @@ -2424,10 +2470,17 @@ static long sys_readlinkat(const struct syscall_info* call) { ptr += bufsiz; } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { + if (privileged) { + return privileged_traced_raw_syscall(call); + } return traced_raw_syscall(call); } - ret = untraced_syscall4(syscallno, dirfd, path, buf2, bufsiz); + if (privileged) { + ret = privileged_untraced_syscall4(syscallno, dirfd, path, buf2, bufsiz); + } else { + ret = untraced_syscall4(syscallno, dirfd, path, buf2, bufsiz); + } ptr = copy_output_buffer(ret, ptr, buf, buf2); return commit_raw_syscall(syscallno, ptr, ret); } @@ -3289,7 +3342,8 @@ case SYS_epoll_pwait: #if defined(SYS_readlink) CASE(readlink); #endif - CASE(readlinkat); + case SYS_readlinkat: + return sys_readlinkat(call, 0); #if defined(SYS_recvfrom) CASE(recvfrom); #endif diff --git a/src/preload/tweak_librrpage.py b/src/preload/tweak_librrpage.py index 41139fda79d..ea62a9735fe 100755 --- a/src/preload/tweak_librrpage.py +++ b/src/preload/tweak_librrpage.py @@ -97,7 +97,7 @@ def write_uptr(is64, f, v): write_uptr(is64, f, new_table_offset) alloc_offset = new_table_offset + size - for n in range(12, 15): + for n in range(e_shnum-3, e_shnum): seek_nth_section_sh_offset(f, new_table_offset, e_shentsize, n, sh_offset_offset) sh_offs = read_uptr(is64, f) sh_size = read_uptr(is64, f) diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 19c42523de0..6c00b820bb7 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include #include @@ -18,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -30,8 +33,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -1597,6 +1603,73 @@ template void prepare_ethtool_ioctl(RecordTask* t, TaskSyscallSt syscall_state.after_syscall_action(record_page_below_stack_ptr); } +template +static void prepare_rdma_verbs_ioctl(RecordTask* t, + TaskSyscallState& syscall_state) +{ + remote_ptr arg_p = syscall_state.syscall_entry_registers.arg(3); + auto ib_uverbs_hdr_p = arg_p.cast(); + if (ib_uverbs_hdr_p.is_null()) { + // TODO(sodar): Logging. + LOG(warn) << "ib_uverbs_hdr_p is NULL"; + syscall_state.expect_errno = EINVAL; + return; + } + + bool ok = true; + auto ib_uverbs_hdr = t->read_mem(ib_uverbs_hdr_p, &ok); + if (!ok) { + // TODO(sodar): Logging. + LOG(fatal) << "failed to read ib_uverbs_ioctl_hdr contents"; + syscall_state.expect_errno = EFAULT; + return; + } + + { + auto size = ib_uverbs_hdr.length; + auto p = syscall_state.reg_parameter(3, size, IN_OUT); + if (p.is_null()) { + // TODO(sodar): Logging. + syscall_state.expect_errno = EINVAL; + return; + } + } + + auto attr_p = REMOTE_PTR_FIELD(ib_uverbs_hdr_p, attrs[0]); + for (unsigned int i = 0; i < ib_uverbs_hdr.num_attrs; ++i, ++attr_p) { + auto attr = t->read_mem(attr_p, &ok); + ASSERT(t, ok) << "failed to read attrs[" << i << "]"; + + if (ib_uverbs_hdr.object_id == UVERBS_OBJECT_DEVICE + && ib_uverbs_hdr.method_id == UVERBS_METHOD_INVOKE_WRITE) { + switch (attr.attr_id) { + case UVERBS_ATTR_CORE_IN: + case UVERBS_ATTR_UHW_IN: { + if (attr.len > sizeof(uint64_t)) { + auto data_p = REMOTE_PTR_FIELD(attr_p, data); + syscall_state.mem_ptr_parameter(data_p, attr.len, IN_OUT); + } + break; + } + case UVERBS_ATTR_CORE_OUT: + case UVERBS_ATTR_UHW_OUT: { + auto data_p = REMOTE_PTR_FIELD(attr_p, data); + syscall_state.mem_ptr_parameter(data_p, attr.len, IN_OUT); + break; + } + case UVERBS_ATTR_WRITE_CMD: { + // Should be inside attr struct. + break; + } + default: + ASSERT(t, false) << "unknown attr_id for INVOKE_WRITE verb"; + } + } + } + + return; +} + template static Switchable prepare_ioctl(RecordTask* t, TaskSyscallState& syscall_state) { @@ -1730,9 +1803,11 @@ static Switchable prepare_ioctl(RecordTask* t, syscall_state.reg_parameter(3); return PREVENT_SWITCH; + case KDGKBMODE: case TIOCINQ: case TIOCOUTQ: case TIOCGETD: + case VT_OPENQRY: syscall_state.reg_parameter(3); return PREVENT_SWITCH; @@ -1771,7 +1846,7 @@ static Switchable prepare_ioctl(RecordTask* t, syscall_state.reg_parameter(3); return PREVENT_SWITCH; - case SG_IO: + case SG_IO: { auto argsp = syscall_state.reg_parameter(3, IN_OUT); auto args = t->read_mem(argsp); syscall_state.mem_ptr_parameter(REMOTE_PTR_FIELD(argsp, dxferp), args.dxfer_len); @@ -1779,6 +1854,23 @@ static Switchable prepare_ioctl(RecordTask* t, syscall_state.mem_ptr_parameter(REMOTE_PTR_FIELD(argsp, sbp), args.mx_sb_len); //usr_ptr: This value is not acted upon by the sg driver. return PREVENT_SWITCH; + } + + case VT_GETSTATE: + syscall_state.reg_parameter(3); + return PREVENT_SWITCH; + + case FBIOGET_FSCREENINFO: + syscall_state.reg_parameter(3); + return PREVENT_SWITCH; + + case FBIOGET_VSCREENINFO: + syscall_state.reg_parameter(3); + return PREVENT_SWITCH; + + case RDMA_VERBS_IOCTL: + prepare_rdma_verbs_ioctl(t, syscall_state); + return PREVENT_SWITCH; } /* In ioctl language, "_IOC_READ" means "outparam". Both @@ -2004,6 +2096,14 @@ static Switchable prepare_ioctl(RecordTask* t, args.wLength); return PREVENT_SWITCH; } + case IOCTL_MASK_SIZE(FS_IOC_FIEMAP): { + auto argsp = remote_ptr(t->regs().arg3()); + auto args = t->read_mem(argsp); + size = sizeof(typename Arch::fiemap) + + sizeof(typename Arch::fiemap_extent) * args.fm_extent_count; + syscall_state.reg_parameter(3, size, IN_OUT); + return PREVENT_SWITCH; + } } /* These ioctls are mostly regular but require additional recording. */ @@ -2038,6 +2138,8 @@ static Switchable prepare_bpf(RecordTask* t, case BPF_MAP_UPDATE_ELEM: case BPF_MAP_DELETE_ELEM: return PREVENT_SWITCH; + case BPF_OBJ_GET: + return ALLOW_SWITCH; case BPF_PROG_LOAD: { auto argsp = syscall_state.reg_parameter(2, IN); @@ -3549,6 +3651,7 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, case Arch::SETOWN_EX: case Arch::GETSIG: case Arch::SETSIG: + case Arch::NOTIFY: case Arch::SETPIPE_SZ: case Arch::GETPIPE_SZ: case Arch::ADD_SEALS: @@ -3937,10 +4040,11 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, (size_t)regs.arg4())); return PREVENT_SWITCH; - case Arch::io_setup: { - // Prevent the io_setup from running and fake an ENOSYS return. We want - // to discourage applications from using this API because the async - // reads are writes by the kernel that can race with userspace execution. + case Arch::io_uring_setup: + case Arch::io_setup: + case Arch::rseq: { + // Prevent the io_setup/io_uring_setup/rseq from running and fake an ENOSYS return. We want + // to stop applications from using these APIs because we don't support them currently. Registers r = regs; r.set_arg2(0); t->set_regs(r); @@ -3948,6 +4052,15 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, return PREVENT_SWITCH; } + case Arch::userfaultfd: { + // Pretend the kernel doesn't support this. + Registers r = regs; + r.set_arg1(0xffffffff); + t->set_regs(r); + syscall_state.emulate_result(-ENOSYS); + return PREVENT_SWITCH; + } + case Arch::memfd_create: { string name = t->read_c_str(remote_ptr(regs.arg1())); if (is_blacklisted_memfd(name.c_str())) { @@ -4593,6 +4706,7 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, case Arch::seccomp: switch ((unsigned int)regs.arg1()) { case SECCOMP_SET_MODE_STRICT: + case SECCOMP_GET_ACTION_AVAIL: break; case SECCOMP_SET_MODE_FILTER: { // Prevent the actual seccomp call. We'll fix this up afterwards. @@ -4601,6 +4715,9 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, t->set_regs(r); break; } + case SECCOMP_GET_NOTIF_SIZES: + syscall_state.reg_parameter(3); + break; default: syscall_state.expect_errno = EINVAL; break; @@ -4704,6 +4821,7 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, prepare_mmap_register_params(t); return PREVENT_SWITCH; + case Arch::pkey_mprotect: case Arch::mprotect: // Since we're stripping MAP_GROWSDOWN from kernel mmap calls, we need // to implement PROT_GROWSDOWN ourselves. @@ -4778,6 +4896,29 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, return ALLOW_SWITCH; } + case SYS_rrcall_arm_time_slice: { + Registers r = t->regs(); + bool arguments_are_zero = true; + for (int i = 2; i <= 6; ++i) { + arguments_are_zero &= r.arg(i) == 0; + } + // Ticks request of zero is invalid for the moment + // for purposes of this syscall. In the future we + // want to have it mean to simulate a timeslice expiry + // at the end of this syscall, but we have no use for + // that at the moment. + if (r.arg(1) == 0 || r.arg(1) > (uintptr_t)MAX_TICKS_REQUEST || + !arguments_are_zero) { + syscall_state.emulate_result((uintptr_t)-EINVAL); + syscall_state.expect_errno = ENOSYS; + return PREVENT_SWITCH; + } + t->tick_request_override = (TicksRequest)r.arg(1); + syscall_state.emulate_result(0); + syscall_state.expect_errno = ENOSYS; + return PREVENT_SWITCH; + } + case Arch::brk: case Arch::munmap: case Arch::process_vm_readv: @@ -5316,6 +5457,42 @@ static bool monitor_fd_for_mapping(RecordTask* mapped_t, int mapped_fd, const st return our_mapping_writable; } +// The returned hole offsets are relative to 'offset' +static vector find_holes(RecordTask* t, int desc, uint64_t offset, uint64_t size) { + vector ret; + ScopedFd fd = t->open_fd(desc, O_RDONLY); + if (!fd.is_open()) { + return ret; + } + uint64_t file_start = offset; + uint64_t file_end = offset + size; + while (offset < file_end) { + off64_t r = lseek(fd, offset, SEEK_HOLE); + if (r < 0) { + // SEEK_HOLE not supported? + return ret; + } + uint64_t hole = (uint64_t)r; + ASSERT(t, hole >= offset); + if (hole >= file_end) { + return ret; + } + r = lseek(fd, hole, SEEK_DATA); + if (r < 0) { + if (errno == ENXIO) { + r = file_end; + } else { + return ret; + } + } + uint64_t data = min((uint64_t)r, file_end); + ASSERT(t, data > hole); + ret.push_back({ hole - file_start, data - hole }); + offset = data; + } + return ret; +} + static void process_mmap(RecordTask* t, size_t length, int prot, int flags, int fd, off_t offset_pages) { if (t->regs().syscall_failed()) { @@ -5388,7 +5565,8 @@ static void process_mmap(RecordTask* t, size_t length, int prot, int flags, TraceWriter::RECORD_IN_TRACE) { off64_t end = (off64_t)st.st_size - km.file_offset_bytes(); off64_t nbytes = min(end, (off64_t)km.size()); - ssize_t nread = t->record_remote_fallible(addr, nbytes); + vector holes = find_holes(t, fd, km.file_offset_bytes(), (uint64_t)nbytes); + ssize_t nread = t->record_remote_fallible(addr, nbytes, holes); if (!adjusted_size && nread != nbytes) { // If we adjusted the size, we're not guaranteed that the bytes we're // reading are actually valid (it could actually have been a zero-sized @@ -5416,7 +5594,9 @@ static void process_mmap(RecordTask* t, size_t length, int prot, int flags, if (rt->fd_table()->is_monitoring(f.fd)) { ASSERT(rt, rt->fd_table()->get_monitor(f.fd)->type() == - FileMonitor::Type::Mmapped); + FileMonitor::Type::Mmapped) + << "Expected monitor type Mmapped for fd " << f.fd << ", got monitor type " + << rt->fd_table()->get_monitor(f.fd)->type(); ((MmappedFileMonitor*)rt->fd_table()->get_monitor(f.fd))->revive(); } else { rt->fd_table()->add_monitor(rt, f.fd, new MmappedFileMonitor(rt, f.fd)); @@ -5595,6 +5775,9 @@ static string extra_expected_errno_info(RecordTask* t, case Arch::madvise: ss << "; unknown madvise(" << (int)t->regs().arg3() << ")"; break; + case Arch::bpf: + ss << "; unknown bpf(cmd=" << (int)t->regs().arg1() << ")"; + break; } break; case EIO: @@ -6124,15 +6307,19 @@ static void rec_process_syscall_arch(RecordTask* t, case Arch::futex: case Arch::ioctl: case Arch::io_setup: + case Arch::io_uring_setup: case Arch::madvise: case Arch::memfd_create: + case Arch::mprotect: + case Arch::pkey_mprotect: case Arch::pread64: case Arch::preadv: case Arch::ptrace: case Arch::read: case Arch::readv: + case Arch::rseq: case Arch::sched_setaffinity: - case Arch::mprotect: { + case Arch::userfaultfd: { // Restore the registers that we may have altered. Registers r = t->regs(); r.set_orig_arg1(syscall_state.syscall_entry_registers.arg1()); diff --git a/src/replay_syscall.cc b/src/replay_syscall.cc index 2bbb9a5be68..17da2f78857 100644 --- a/src/replay_syscall.cc +++ b/src/replay_syscall.cc @@ -557,14 +557,46 @@ static void finish_anonymous_mmap(ReplayTask* t, AutoRemoteSyscalls& remote, device, inode, nullptr, &recorded_km, emu_file); } +static void write_mapped_data_with_holes(ReplayTask* t, const TraceReader::RawDataWithHoles& buf) { + unique_ptr remote; + size_t data_offset = 0; + size_t addr_offset = 0; + auto holes_iter = buf.holes.begin(); + while (data_offset < buf.data.size() || holes_iter != buf.holes.end()) { + if (holes_iter != buf.holes.end() && holes_iter->offset == addr_offset) { + t->write_zeroes(&remote, buf.addr + addr_offset, holes_iter->size); + addr_offset += holes_iter->size; + ++holes_iter; + continue; + } + size_t data_end = buf.data.size(); + if (holes_iter != buf.holes.end()) { + data_end = data_offset + holes_iter->offset - addr_offset; + } + t->write_bytes_helper(buf.addr + addr_offset, data_end - data_offset, buf.data.data() + data_offset, + nullptr); + addr_offset += data_end - data_offset; + data_offset = data_end; + } +} + static void write_mapped_data(ReplayTask* t, remote_ptr rec_addr, size_t size, TraceReader::MappedData& data) { switch (data.source) { - case TraceReader::SOURCE_TRACE: - t->set_data_from_trace(); + case TraceReader::SOURCE_TRACE: { + TraceReader::RawDataWithHoles buf; + ASSERT(t, t->trace_reader().read_raw_data_for_frame_with_holes(buf)); + ASSERT(t, buf.addr == rec_addr); + // Note that this gets called for remaps and shared maps that refer to the same pages + // as previous maps and so the data we're recording might not be the initial data + // for those pages, but it is the inital data *for this mapping*. + write_mapped_data_with_holes(t, buf); + t->vm()->maybe_update_breakpoints(t, rec_addr.cast(), + buf.data.size()); break; + } case TraceReader::SOURCE_FILE: { ScopedFd file(data.file_name.c_str(), O_RDONLY); ASSERT(t, file.is_open()) << "Can't open " << data.file_name; @@ -1030,7 +1062,7 @@ static void handle_opened_files(ReplayTask* t, int flags) { if (emu_file) { file_monitor = new MmappedFileMonitor(t, emu_file); } else if (o.path == "terminal") { - file_monitor = new StdioMonitor(STDERR_FILENO); + file_monitor = new StdioMonitor(t->session().tracee_output_fd(STDERR_FILENO)); } else if (is_proc_mem_file(o.path.c_str())) { file_monitor = new ProcMemMonitor(t, o.path); } else if (is_proc_fd_dir(o.path.c_str())) { @@ -1095,6 +1127,7 @@ static void rep_process_syscall_arch(ReplayTask* t, ReplayTraceStep* step, switch (non_negative_syscall(sys)) { case Arch::madvise: case Arch::mprotect: + case Arch::pkey_mprotect: case Arch::sigreturn: case Arch::rt_sigreturn: break; @@ -1182,19 +1215,21 @@ static void rep_process_syscall_arch(ReplayTask* t, ReplayTraceStep* step, case Arch::munmap: case Arch::mprotect: case Arch::modify_ldt: + case Arch::pkey_mprotect: case Arch::set_thread_area: { // Using AutoRemoteSyscalls here fails for arch_prctl, not sure why. Registers r = t->regs(); - r.set_syscallno(sys); + int modified_sys = sys == Arch::pkey_mprotect ? Arch::mprotect : sys; + r.set_syscallno(modified_sys); r.set_ip(r.ip().decrement_by_syscall_insn_length(r.arch())); t->set_regs(r); - if (sys == Arch::mprotect) { + if (modified_sys == Arch::mprotect) { t->vm()->fixup_mprotect_growsdown_parameters(t); } t->enter_syscall(); t->exit_syscall(); ASSERT(t, t->regs().syscall_result() == trace_regs.syscall_result()); - if (sys == Arch::mprotect) { + if (modified_sys == Arch::mprotect) { Registers r2 = t->regs(); r2.set_arg1(r.arg1()); r2.set_arg2(r.arg2()); diff --git a/src/rr_trace.capnp b/src/rr_trace.capnp index 2293b7fb2ac..e5fea788818 100644 --- a/src/rr_trace.capnp +++ b/src/rr_trace.capnp @@ -112,6 +112,8 @@ struct Header { # If in chaos mode, what was the global exclusion range. Useful for debugging. exclusionRangeStart @17 :RemotePtr; exclusionRangeEnd @18 :RemotePtr; + # Replaying this trace requires at least this forward-compabilitity-version + requiredForwardCompatibilityVersion @19 :Int32; } # A file descriptor belonging to a task @@ -195,10 +197,18 @@ struct TaskEvent { } } +struct WriteHole { + offset @0 :UInt64; + size @1 :UInt64; +} + struct MemWrite { tid @0 :Tid; addr @1 :RemotePtr; size @2 :UInt64; + # A list of regions where zeroes are written. These are not + # present in the compressed data. + holes @3 :List(WriteHole); } enum Arch { diff --git a/src/syscalls.py b/src/syscalls.py index 39804403440..a4397029ea2 100644 --- a/src/syscalls.py +++ b/src/syscalls.py @@ -546,7 +546,7 @@ def __init__(self, **kwargs): # to a statfs structure defined approximately as follows: fstatfs = EmulatedSyscall(x86=100, x64=138, generic=44, arg2="struct Arch::statfs") -ioperm = UnsupportedSyscall(x86=101, x64=173) +ioperm = EmulatedSyscall(x86=101, x64=173) # int socketcall(int call, unsigned long *args) # @@ -570,7 +570,7 @@ def __init__(self, **kwargs): lstat = EmulatedSyscall(x86=107, x64=6, arg2="struct Arch::stat") fstat = EmulatedSyscall(x86=108, x64=5, generic=80, arg2="struct Arch::stat") olduname = UnsupportedSyscall(x86=109) -iopl = UnsupportedSyscall(x86=110, x64=172) +iopl = EmulatedSyscall(x86=110, x64=172) vhangup = UnsupportedSyscall(x86=111, x64=153, generic=58) idle = UnsupportedSyscall(x86=112) vm86old = UnsupportedSyscall(x86=113) @@ -1671,18 +1671,18 @@ def __init__(self, **kwargs): bpf = IrregularEmulatedSyscall(x86=357, x64=321, generic=280) execveat = UnsupportedSyscall(x86=358, x64=322, generic=281) -userfaultfd = UnsupportedSyscall(x86=374, x64=323, generic=282) +userfaultfd = IrregularEmulatedSyscall(x86=374, x64=323, generic=282) membarrier = EmulatedSyscall(x86=375, x64=324, generic=283) mlock2 = UnsupportedSyscall(x86=376, x64=325, generic=284) copy_file_range = IrregularEmulatedSyscall(x86=377, x64=326, generic=285) preadv2 = UnsupportedSyscall(x86=378, x64=327, generic=286) pwritev2 = UnsupportedSyscall(x86=379, x64=328, generic=287) -pkey_mprotect = UnsupportedSyscall(x86=380, x64=329, generic=288) -pkey_alloc = UnsupportedSyscall(x86=381, x64=330, generic=289) -pkey_free = UnsupportedSyscall(x86=382, x64=331, generic=290) +pkey_mprotect = IrregularEmulatedSyscall(x86=380, x64=329, generic=288) +pkey_alloc = EmulatedSyscall(x86=381, x64=330, generic=289) +pkey_free = EmulatedSyscall(x86=382, x64=331, generic=290) statx = EmulatedSyscall(x86=383, x64=332, generic=291, arg5="typename Arch::statx_struct") io_pgetevents = UnsupportedSyscall(x86=385, x64=333, generic=292) -rseq = UnsupportedSyscall(x86=386, x64=334, generic=293) +rseq = IrregularEmulatedSyscall(x86=386, x64=334, generic=293) clock_gettime64 = EmulatedSyscall(x86=403, arg2="typename Arch::Arch64::timespec") clock_settime64 = UnsupportedSyscall(x86=404) @@ -1707,7 +1707,7 @@ def __init__(self, **kwargs): # x86-64 decided to skip ahead here to catchup pidfd_send_signal = UnsupportedSyscall(x86=424, x64=424, generic=424) -io_uring_setup = UnsupportedSyscall(x86=425, x64=425, generic=425) +io_uring_setup = IrregularEmulatedSyscall(x86=425, x64=425, generic=425) io_uring_enter = UnsupportedSyscall(x86=426, x64=426, generic=426) io_uring_register = UnsupportedSyscall(x86=427, x64=427, generic=427) open_tree = UnsupportedSyscall(x86=428, x64=428, generic=428) @@ -1723,6 +1723,10 @@ def __init__(self, **kwargs): process_madvise = UnsupportedSyscall(x86=440, x64=440, generic=440) epoll_pwait2 = UnsupportedSyscall(x86=441, x64=441, generic=441) mount_setattr = UnsupportedSyscall(x86=442, x64=442, generic=442) +# 443 reserved for quotactl_path +landlock_create_ruleset = UnsupportedSyscall(x86=444, x64=444, generic=444) +landlock_add_rule = UnsupportedSyscall(x86=445, x64=445, generic=445) +landlock_restrict_self = UnsupportedSyscall(x86=446, x64=446, generic=446) # restart_syscall is a little special. restart_syscall = RestartSyscall(x86=0, x64=219, generic=128) diff --git a/src/test-monitor/test-monitor.cc b/src/test-monitor/test-monitor.cc index 6aba5d39375..ff91784acf1 100644 --- a/src/test-monitor/test-monitor.cc +++ b/src/test-monitor/test-monitor.cc @@ -145,10 +145,16 @@ static void dump_gdb_stacktrace(pid_t child, FILE* out) { static void force_trace_closure(pid_t child, FILE* out) { char cmdline[1024 * 10]; - sprintf(cmdline, "gdb -p %d -ex 'set confirm off' -ex 'set height 0' -ex " - "'p rr::force_close_record_session()' -ex q &1", + sprintf(cmdline, "gdb -p %d " + "-ex 'set confirm off' " + "-ex 'set height 0' " + "-ex 'b rr::force_close_record_session' " + "-ex 'p rr::force_close_record_session()' " + "-ex detach " + "-ex q &1", child); dump_popen_cmdline(cmdline, out); + sleep(2); /* give the force_close_record_session time to take effect */ } static void dump_emergency_debugger(char* gdb_cmd, FILE* out) { diff --git a/src/test/bpf.c b/src/test/bpf.c new file mode 100644 index 00000000000..0beb4e39731 --- /dev/null +++ b/src/test/bpf.c @@ -0,0 +1,25 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ +#include "util.h" + +#include +#include + +int bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +int main(void) { + union bpf_attr attr; + + { + const char* filename = "foo"; + memset(&attr, 0, sizeof(attr)); + attr.pathname = (__u64)(uintptr_t)filename; + bpf(BPF_OBJ_GET, &attr, 1); + } + + atomic_puts("EXIT-SUCCESS"); + + return 0; +} diff --git a/src/test/call_exit.py b/src/test/call_exit.py index 541e391122c..1c8b262d8f4 100644 --- a/src/test/call_exit.py +++ b/src/test/call_exit.py @@ -1,14 +1,36 @@ +import sys from util import * +gdb_version = get_gdb_version() +if gdb_version < 10: + # On gdb 9.2 after calling exit(0) + # gdb's internal state is confused + # about which thread we're on, and + # the 'finish' command fails. + send_gdb('c') + expect_gdb('EXIT-SUCCESS') + ok() + sys.exit(0) + send_gdb('b main') expect_gdb('Breakpoint 1') send_gdb('c') expect_gdb('Breakpoint 1, main') +# Step over the breakpoint and into `atomic_puts' to make sure it +# doesn't influence the test. +send_gdb('step') +expect_gdb('atomic_puts \\(str=') + send_gdb('call (int)exit(0)') expect_gdb('while in a function called from GDB') +# Check sure we're still in the frame of `atomic_puts' and can still +# continue the replay. +send_gdb('finish') +expect_gdb('EXIT-SUCCESS') + restart_replay() expect_gdb('Breakpoint 1, main') diff --git a/src/test/chmod.c b/src/test/chmod.c index b0815d63d68..4332c6f2847 100644 --- a/src/test/chmod.c +++ b/src/test/chmod.c @@ -13,9 +13,9 @@ int main(void) { test_assert(0 == access(file_path, W_OK)); test_assert(0 == fchmodat(AT_FDCWD, file_path, 0400, 0)); test_assert(0 == access(file_path, R_OK)); - test_assert(0 == faccessat(AT_FDCWD, file_path, 0400, AT_SYMLINK_NOFOLLOW) || errno == ENOSYS); + test_assert(0 == faccessat(AT_FDCWD, file_path, R_OK, AT_SYMLINK_NOFOLLOW) || errno == ENOSYS); #ifdef SYS_faccessat2 - test_assert(0 == syscall(SYS_faccessat2, AT_FDCWD, file_path, 0400, AT_SYMLINK_NOFOLLOW) || errno == ENOSYS); + test_assert(0 == syscall(SYS_faccessat2, AT_FDCWD, file_path, R_OK, AT_SYMLINK_NOFOLLOW) || errno == ENOSYS); #endif atomic_puts("EXIT-SUCCESS"); diff --git a/src/test/conditional_breakpoint_offload.run b/src/test/conditional_breakpoint_offload.run index 3eadb5a637f..d3dc1eae0a0 100644 --- a/src/test/conditional_breakpoint_offload.run +++ b/src/test/conditional_breakpoint_offload.run @@ -1,2 +1,3 @@ source `dirname $0`/util.sh +if [ $TIMEOUT -lt 300 ]; then TIMEOUT=300; fi debug_test diff --git a/src/test/condvar_stress.run b/src/test/condvar_stress.run index d3661713dae..68e616cf1f5 100644 --- a/src/test/condvar_stress.run +++ b/src/test/condvar_stress.run @@ -2,5 +2,5 @@ source `dirname $0`/util.sh # Switch threads very eagerly on recorded events. RECORD_ARGS="-s" -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/fatal_init_signal.c b/src/test/fatal_init_signal.c new file mode 100644 index 00000000000..0c975dfa738 --- /dev/null +++ b/src/test/fatal_init_signal.c @@ -0,0 +1,38 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" +#include "nsutils.h" + +static char ch = 1; + +int main(void) { + pid_t pid; + int status; + int ret; + if (-1 == try_setup_ns(CLONE_NEWPID)) { + // We may not have permission to set up namespaces, so bail. + atomic_puts("Insufficient permissions, skipping test"); + atomic_puts("EXIT-SUCCESS"); + return 77; + } + + // This is the first child, therefore PID 1 in its PID namespace + pid = fork(); + test_assert(pid >= 0); + if (pid == 0) { + test_assert(getpid() == 1); + // This will be nonfatal because we don't have a handler for it. + kill(getpid(), SIGQUIT); + // Ensure at least one tick + if (ch == 1) { + ch = 3; + } + return 55; + } + + ret = waitpid(pid, &status, 0); + test_assert(ret == pid); + test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 55); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/fcntl_notify.c b/src/test/fcntl_notify.c new file mode 100644 index 00000000000..6cb162076aa --- /dev/null +++ b/src/test/fcntl_notify.c @@ -0,0 +1,28 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +static char tmp_name[] = "tempXXXXXX"; +static int saw_sigio = 0; + +void catcher(__attribute__((unused)) int signum) { + saw_sigio = 1; +} + +int main(void) { + int fd, file_fd; + mkdtemp(tmp_name); + signal(SIGIO, catcher); + + fd = open(tmp_name, O_RDONLY | O_DIRECTORY); + test_assert(fd >= 0); + + fcntl(fd, F_NOTIFY, DN_CREATE); + + file_fd = openat(fd, "foo", O_RDWR | O_CREAT); + test_assert(file_fd >= 0); + test_assert(saw_sigio); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/fd_limit.c b/src/test/fd_limit.c new file mode 100644 index 00000000000..c7e55e1aa7a --- /dev/null +++ b/src/test/fd_limit.c @@ -0,0 +1,40 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +static void* do_thread(__attribute__((unused)) void* p) { + atomic_puts("EXIT-SUCCESS"); + return NULL; +} + +int main(void) { + pthread_t thread; + struct rlimit limit; + int ret = getrlimit(RLIMIT_NOFILE, &limit); + int new_fd; + rlim_t initial_limit = limit.rlim_cur; + test_assert(ret >= 0); + + if (initial_limit + 10 > limit.rlim_max) { + atomic_puts("Current soft limit cannot be increased enough, skipping test"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + + /* Increase soft limit. */ + limit.rlim_cur += 10; + ret = setrlimit(RLIMIT_NOFILE, &limit); + test_assert(ret >= 0); + + /* Consume file descriptors until we've allocated all previously available descriptors (plus one). */ + do { + new_fd = open("/dev/null", O_RDONLY); + test_assert(new_fd >= 0); + } while (new_fd < (int)initial_limit); + + /* This will allocate new fds for thread stack and syscallbuf stuff */ + pthread_create(&thread, NULL, do_thread, NULL); + pthread_join(thread, NULL); + + return 0; +} diff --git a/src/test/fd_limit.run b/src/test/fd_limit.run new file mode 100644 index 00000000000..d52f1c922d7 --- /dev/null +++ b/src/test/fd_limit.run @@ -0,0 +1,3 @@ +source `dirname $0`/util.sh +ulimit -S -n 1024 +compare_test EXIT-SUCCESS diff --git a/src/test/fork_stress.run b/src/test/fork_stress.run index 9802f031d0d..08226ab6b4a 100644 --- a/src/test/fork_stress.run +++ b/src/test/fork_stress.run @@ -1,4 +1,4 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/gdb_bogus_breakpoint.c b/src/test/gdb_bogus_breakpoint.c index 4e2f0ac7f6e..e14199366c0 100644 --- a/src/test/gdb_bogus_breakpoint.c +++ b/src/test/gdb_bogus_breakpoint.c @@ -5,11 +5,11 @@ static char ch = 'E'; static char my_write(int fd, void* buf, size_t size) { - long ret; /* Do a write syscall where the address of the buffer is at the top of stack during the syscall. This may trigger gdb to try to set a breakpoint in that buffer. */ #ifdef __x86_64__ + long ret; asm("push %5\n\t" "syscall\n\t" "nop\n\t" @@ -20,6 +20,7 @@ static char my_write(int fd, void* buf, size_t size) { : "=a"(ret) : "a"(SYS_write), "D"(fd), "S"(buf), "d"(size), "r"(&ch)); #elif __i386__ + long ret; asm("push %5\n\t" "int $0x80\n\t" "nop\n\t" @@ -29,6 +30,22 @@ static char my_write(int fd, void* buf, size_t size) { "mov (%5),%0\n\t" : "=a"(ret) : "a"(SYS_write), "b"(fd), "c"(buf), "d"(size), "r"(&ch)); +#elif __aarch64__ + register long x0 __asm("x0") = fd; + register long x1 __asm("x1") = (uintptr_t)buf; + register long x2 __asm("x2") = size; + register long x7 __asm("x7") = (uintptr_t)&ch; + register long x8 __asm("x8") = SYS_write; + asm("stp x1, x7, [sp, #-16]!\n\t" + "svc #0\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "ldp x1, x7, [sp], #16\n\t" + "ldr x0, [x7]\n\t" + : "+r"(x0) + : "r"(x1), "r"(x2), "r"(x8), "r"(x7)); + long ret = x0; #else #error Unknown architecture #endif diff --git a/src/test/ignored_async_usr1.run b/src/test/ignored_async_usr1.run index 27f71c2690e..4c94ca492fd 100644 --- a/src/test/ignored_async_usr1.run +++ b/src/test/ignored_async_usr1.run @@ -3,10 +3,12 @@ source `dirname $0`/util.sh SYNC_TOKEN=disabled record $TESTNAME & +SUB_ID=$! echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 + if ! kill -0 "$SUB_ID" >/dev/null 2>&1; then failed "subshell died, no need to longer wait for '$SYNC_TOKEN'"; exit; fi done echo " done. Delivering SIGUSR1 ..." diff --git a/src/test/io_uring.c b/src/test/io_uring.c new file mode 100644 index 00000000000..a2bf6e1c6e4 --- /dev/null +++ b/src/test/io_uring.c @@ -0,0 +1,44 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +struct io_sqring_offsets { + uint32_t head; + uint32_t tail; + uint32_t ring_mask; + uint32_t ring_entries; + uint32_t flags; + uint32_t dropped; + uint32_t array; + uint32_t resv[3]; +}; + +struct io_cqring_offsets { + uint32_t head; + uint32_t tail; + uint32_t ring_mask; + uint32_t ring_entries; + uint32_t overflow; + uint32_t cqes; + uint32_t flags; + uint32_t resv[3]; +}; + +struct io_uring_params { + uint32_t sq_entries; + uint32_t cq_entries; + uint32_t flags; + uint32_t sq_thread_idle; + uint32_t features; + uint32_t resv[4]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +int main(void) { + struct io_uring_params params; + int ret = syscall(RR_io_uring_setup, 32, ¶ms); + test_assert(ret == -1 && errno == ENOSYS); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/ioctl_fb.c b/src/test/ioctl_fb.c new file mode 100644 index 00000000000..619fca42ca5 --- /dev/null +++ b/src/test/ioctl_fb.c @@ -0,0 +1,25 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int fd; + struct fb_fix_screeninfo finfo; + struct fb_var_screeninfo vinfo; + + fd = open("/dev/fb0", O_RDWR); + if (fd < 0) { + atomic_puts("Can't open framebuffer, aborting test"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + + test_assert(0 == ioctl(fd, FBIOGET_FSCREENINFO, &finfo)); + atomic_printf("FBIOGET_FSCREENINFO returned id=%s capabilities=%d\n", finfo.id, finfo.capabilities); + + test_assert(0 == ioctl(fd, FBIOGET_VSCREENINFO, &vinfo)); + atomic_printf("FBIOGET_VSCREENINFO returned xres=%d yres=%d colorspace=%d\n", vinfo.xres, vinfo.yres, vinfo.colorspace); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/ioctl_fs.c b/src/test/ioctl_fs.c index d9bcd8035fd..b81d623e438 100644 --- a/src/test/ioctl_fs.c +++ b/src/test/ioctl_fs.c @@ -6,22 +6,43 @@ int main(void) { int fd = open("dummy.txt", O_RDWR | O_CREAT, 0600); long version; long flags; + char filebuf[4096] = {}; + char fmbuf[4096] = {}; + struct fiemap *fm; int ret; test_assert(fd >= 0); ret = ioctl(fd, FS_IOC_GETVERSION, &version); if (ret < 0) { - test_assert(errno == ENOTTY); + test_assert(errno == ENOTTY || errno == EOPNOTSUPP); } else { atomic_printf("version=%ld\n", version); } ret = ioctl(fd, FS_IOC_GETFLAGS, &flags); if (ret < 0) { - test_assert(errno == ENOTTY); + test_assert(errno == ENOTTY || errno == EOPNOTSUPP); } else { atomic_printf("flags=%lx\n", flags); } + test_assert(sizeof(filebuf) == write(fd, &filebuf, sizeof(filebuf))); + fm = (struct fiemap*)fmbuf; + fm->fm_start = 0; + fm->fm_flags = 0; + fm->fm_extent_count = (sizeof(fmbuf) - offsetof(struct fiemap, fm_extents)) / sizeof(fm->fm_extents[0]); + fm->fm_length = FIEMAP_MAX_OFFSET - fm->fm_start; + ret = ioctl(fd, FS_IOC_FIEMAP, fm); + if (ret < 0) { + test_assert(errno == ENOTTY || errno == EOPNOTSUPP); + } else { + atomic_printf("fm->fm_mapped_extents=%d\n", fm->fm_mapped_extents); + for (unsigned int i=0; i < fm->fm_mapped_extents; i++) { + struct fiemap_extent* fe = fm->fm_extents + i; + atomic_printf("i=%d fe_logical=0x%llx fe_physical=0x%llx fe_length=0x%llx fe_flags=0x%x\n", i, + fe->fe_logical, fe->fe_physical, fe->fe_length, fe->fe_flags); + } + } + atomic_puts("EXIT-SUCCESS"); return 0; } diff --git a/src/test/ioctl_vt.c b/src/test/ioctl_vt.c new file mode 100644 index 00000000000..e18c9d9002d --- /dev/null +++ b/src/test/ioctl_vt.c @@ -0,0 +1,29 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int fd; + int vt; + struct vt_stat vts; + int tty_mode; + + fd = open("/dev/tty0", O_RDWR); + if (fd < 0) { + atomic_puts("Can't open tty, aborting test"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + + test_assert(0 == ioctl(fd, VT_OPENQRY, &vt)); + atomic_printf("VT_OPENQRY returned %d\n", vt); + + test_assert(0 == ioctl(fd, VT_GETSTATE, &vts)); + atomic_printf("VT_GETSTATE returned v_active=%d\n", vts.v_active); + + test_assert(0 == ioctl(fd, KDGKBMODE, &tty_mode)); + atomic_printf("KDGKBMODE returned %d\n", tty_mode); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/large_hole.c b/src/test/large_hole.c new file mode 100644 index 00000000000..6c387ed2678 --- /dev/null +++ b/src/test/large_hole.c @@ -0,0 +1,25 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { +#ifdef __x86_64__ + off64_t size = ((off64_t)100)*1024*1024*1024; + char* p; + int fd = open("big", O_RDWR | O_TRUNC | O_CREAT, 0700); + test_assert(pwrite64(fd, "x", 1, size) == 1); + p = (char*)mmap(NULL, size + 1, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + p[size/2] = 1; + p[size/2 + 65536] = 1; + test_assert(0 == munmap(p, size + 1)); + + test_assert(fallocate64(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, size/2, 4096) == 0); + p = (char*)mmap(NULL, size + 1, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + test_assert(p[size/2] == 0); + test_assert(p[size/2 + 65536] == 1); +#endif + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/mutex_pi_stress.run b/src/test/mutex_pi_stress.run index d3661713dae..68e616cf1f5 100644 --- a/src/test/mutex_pi_stress.run +++ b/src/test/mutex_pi_stress.run @@ -2,5 +2,5 @@ source `dirname $0`/util.sh # Switch threads very eagerly on recorded events. RECORD_ARGS="-s" -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/nested_detach_kill.run b/src/test/nested_detach_kill.run index eacb0cfe3ea..f7b59dca697 100644 --- a/src/test/nested_detach_kill.run +++ b/src/test/nested_detach_kill.run @@ -11,10 +11,12 @@ save_exe "$NEST_EXE" save_exe "$SLEEP_EXE" touch record.out just_record $NEST_EXE-$nonce "$(which rr) record --nested=detach $PWD/$SLEEP_EXE-$nonce" & +SUB_ID=$! echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 + if ! kill -0 "$SUB_ID" >/dev/null 2>&1; then failed "subshell died, no need to longer wait for '$SYNC_TOKEN'"; exit; fi done rrpid=$(parent_pid_of $(pidof $NEST_EXE-$nonce)) diff --git a/src/test/pid_ns_reap.c b/src/test/pid_ns_reap.c index c950ded876e..f789573a9ce 100644 --- a/src/test/pid_ns_reap.c +++ b/src/test/pid_ns_reap.c @@ -14,6 +14,7 @@ int main(void) { pid_t pid; if (-1 == try_setup_ns(CLONE_NEWPID)) { /* We may not have permission to set up namespaces, so bail. */ + atomic_puts("Insufficient permissions, skipping test"); atomic_puts("EXIT-SUCCESS"); return 77; } diff --git a/src/test/pid_ns_segv.c b/src/test/pid_ns_segv.c index a5a52b7db02..b0a0599199d 100644 --- a/src/test/pid_ns_segv.c +++ b/src/test/pid_ns_segv.c @@ -7,6 +7,7 @@ int main(void) { pid_t pid; if (-1 == try_setup_ns(CLONE_NEWPID)) { // We may not have permission to set up namespaces, so bail. + atomic_puts("Insufficient permissions, skipping test"); atomic_puts("EXIT-SUCCESS"); return 77; } diff --git a/src/test/record_replay.run b/src/test/record_replay.run index 885812abd22..b35c3c88cd0 100644 --- a/src/test/record_replay.run +++ b/src/test/record_replay.run @@ -1,5 +1,5 @@ source `dirname $0`/util.sh -TIMEOUT=300 +if [ $TIMEOUT -lt 300 ]; then TIMEOUT=300; fi record record_replay_subject$bitness just_record rr "--suppress-environment-warnings replay -a $workdir/*-0" replay diff --git a/src/test/reverse_step_threads_break.c b/src/test/reverse_step_threads_break.c index 5d2351b8dc7..df53402e016 100644 --- a/src/test/reverse_step_threads_break.c +++ b/src/test/reverse_step_threads_break.c @@ -14,13 +14,25 @@ static size_t my_read(int fd, void* buf, size_t size) { #ifdef __x86_64__ __asm__("syscall\n\t" : "=a"(ret) - : "a"(SYS_read), "D"(fd), "S"(buf), "d"(size)); + : "a"(SYS_read), "D"(fd), "S"(buf), "d"(size) + : "memory"); #elif defined(__i386__) __asm__("xchg %%ebx,%%edi\n\t" "int $0x80\n\t" "xchg %%ebx,%%edi\n\t" : "=a"(ret) - : "a"(SYS_read), "c"(buf), "d"(size), "D"(fd)); + : "a"(SYS_read), "c"(buf), "d"(size), "D"(fd) + : "memory"); +#elif defined(__aarch64__) + register uint64_t x0 __asm__ ("x0") = fd; + register void *x1 __asm__ ("x1") = buf; + register uint64_t x2 __asm__ ("x2") = size; + register uint64_t x8 __asm__ ("x8") = SYS_read; + __asm__("svc #0\n\t" + : "+r"(x0) + : "r"(x1), "r"(x2), "r"(x8) + : "memory"); + ret = x0; #else #error define syscall here #endif diff --git a/src/test/rseq.c b/src/test/rseq.c new file mode 100644 index 00000000000..86d76f5763e --- /dev/null +++ b/src/test/rseq.c @@ -0,0 +1,18 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +struct rseq { + uint32_t cpu_id_start; + uint32_t cpu_id; + uint64_t rseq_cs; + uint32_t flags; +}; + +int main(void) { + struct rseq rs; + int ret = syscall(RR_rseq, &rs, sizeof(rs), 0, 0); + test_assert(ret == -1 && errno == ENOSYS); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/seccomp.c b/src/test/seccomp.c index 94130b1c4d0..d06809d4e51 100644 --- a/src/test/seccomp.c +++ b/src/test/seccomp.c @@ -131,12 +131,38 @@ static void* run_thread(__attribute__((unused)) void* p) { return NULL; } +static void test_get_action_avail(void) { + // `SECCOMP_RET_ALLOW` is available since the first version of `SECCOMP_GET_ACTION_AVAIL` + uint32_t action = SECCOMP_RET_ALLOW; + int ret = syscall(RR_seccomp, SECCOMP_GET_ACTION_AVAIL, 0, &action); + test_assert(ret == EINVAL || ret == 0); +} + +static void test_get_notif_sizes(void) { + struct { + uint16_t seccomp_notif; + uint16_t seccomp_notif_resp; + uint16_t seccomp_data; + } sizes; + int ret = syscall(RR_seccomp, SECCOMP_GET_NOTIF_SIZES, 0, &sizes); + test_assert(ret == EINVAL || ret == 0); + if (ret == 0) { + // These were the sizes when `SECCOMP_GET_NOTIF_SIZES` was first added. + test_assert(sizes.seccomp_notif >= 80); + test_assert(sizes.seccomp_notif_resp >= 24); + test_assert(sizes.seccomp_data >= 64); + } +} + int main(void) { struct sigaction sa; pthread_t thread; pthread_t w_thread; char ch; + test_get_action_avail(); + test_get_notif_sizes(); + test_assert(0 == pipe(pipe_fds)); sa.sa_sigaction = handler; diff --git a/src/test/seccomp_cloning.c b/src/test/seccomp_cloning.c new file mode 100644 index 00000000000..658bb0a1989 --- /dev/null +++ b/src/test/seccomp_cloning.c @@ -0,0 +1,62 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +#define BUF_SIZE 65536 + +static void install_filter(void) { + struct sock_filter filter[] = { + /* Load system call number from 'seccomp_data' buffer into + accumulator */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)), + /* Jump forward 1 instruction if system call number + is not SYS_read */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1), + /* Allow syscall */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /* Jump forward 1 instruction if system call number + is not SYS_write */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 1), + /* Allow syscall */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /* Jump forward 1 instruction if system call number + is not SYS_exit_group */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_exit_group, 0, 1), + /* Allow syscall */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /* Kill process */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter) / sizeof(filter[0])), + .filter = filter, + }; + int ret; + + ret = syscall(RR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &prog); + if (ret == -1 && errno == ENOSYS) { + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + } + test_assert(ret == 0); +} + +int main(void) { + char buf[BUF_SIZE]; + int fd = open("tmp.bin", O_RDWR | O_CREAT | O_EXCL, 0600); + test_assert(fd >= 0); + unlink("tmp.bin"); + + memset(buf, 1, sizeof(buf)); + + test_assert(write(fd, buf, BUF_SIZE) == BUF_SIZE); + test_assert(0 == lseek(fd, 0, SEEK_SET)); + + test_assert(0 == prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + install_filter(); + + test_assert(read(fd, buf, BUF_SIZE) == BUF_SIZE); + + atomic_puts("EXIT-SUCCESS"); + syscall(SYS_exit_group, 0); + return 0; +} diff --git a/src/test/shared_monitor.c b/src/test/shared_monitor.c new file mode 100644 index 00000000000..ea27038b51f --- /dev/null +++ b/src/test/shared_monitor.c @@ -0,0 +1,27 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int fd = syscall(SYS_memfd_create, "temp", 0); + char buf[4096]; + int size = sizeof(buf); + int ret; + uint8_t* p; + memset(buf, 1, size); + ret = write(fd, buf, size); + test_assert(ret == size); + p = (uint8_t*)mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + + memset(buf, 0, size); + ret = pwrite(fd, buf, size, 0); + test_assert(ret == size); + + p = (uint8_t*)mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + test_assert(p[0] == 0); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/shared_offset.c b/src/test/shared_offset.c new file mode 100644 index 00000000000..f1802f2169f --- /dev/null +++ b/src/test/shared_offset.c @@ -0,0 +1,20 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + size_t page_size = sysconf(_SC_PAGESIZE); + int fd = syscall(RR_memfd_create, "shared", 0); + if (fd < 0 && errno == ENOSYS) { + atomic_puts("SYS_memfd_create not supported on this kernel"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + test_assert(fd >= 0); + test_assert(0 == ftruncate(fd, page_size*2)); + char* map = (char*)mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, page_size); + test_assert(map != MAP_FAILED); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/small_holes.c b/src/test/small_holes.c new file mode 100644 index 00000000000..fb482115bb7 --- /dev/null +++ b/src/test/small_holes.c @@ -0,0 +1,20 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + char* p; + size_t page_size = sysconf(_SC_PAGESIZE); + int fd = open("small", O_RDWR | O_TRUNC | O_CREAT, 0700); + test_assert(0 == ftruncate(fd, page_size*7)); + pwrite(fd, "x", 1, page_size); + pwrite(fd, "y", 1, page_size*3); + pwrite(fd, "z", 1, page_size*5); + p = (char*)mmap(NULL, page_size*7, PROT_READ, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + test_assert(p[page_size] == 'x'); + test_assert(p[page_size*3] == 'y'); + test_assert(p[page_size*5] == 'z'); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/term_trace_cpu.run b/src/test/term_trace_cpu.run index 939b8cd42e4..d1d6652f67d 100644 --- a/src/test/term_trace_cpu.run +++ b/src/test/term_trace_cpu.run @@ -15,10 +15,12 @@ SYNC_TOKEN=spinning WAIT_SECS=1 record $EXE & +SUB_ID=$! echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 + if ! kill -0 "$SUB_ID" >/dev/null 2>&1; then failed "subshell died, no need to longer wait for '$SYNC_TOKEN'"; exit; fi done rrpid=$(parent_pid_of $(pidof $EXE-$nonce)) diff --git a/src/test/term_trace_syscall.run b/src/test/term_trace_syscall.run index 0e67c56c923..03e0c3b5592 100644 --- a/src/test/term_trace_syscall.run +++ b/src/test/term_trace_syscall.run @@ -3,10 +3,12 @@ source `dirname $0`/util.sh SYNC_TOKEN=sleeping record $TESTNAME & # sleep "forever" +SUB_ID=$! echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 + if ! kill -0 "$SUB_ID" >/dev/null 2>&1; then failed "subshell died, no need to longer wait for '$SYNC_TOKEN'"; exit; fi done rrpid=$(parent_pid_of $(pidof $TESTNAME-$nonce)) diff --git a/src/test/thread_open_race.run b/src/test/thread_open_race.run index 908f7827d6b..a8840d0efbe 100644 --- a/src/test/thread_open_race.run +++ b/src/test/thread_open_race.run @@ -3,6 +3,6 @@ source `dirname $0`/util.sh # This test requires syscallbuf syscall patching skip_if_no_syscall_buf -TIMEOUT=300 +if [ $TIMEOUT -lt 300 ]; then TIMEOUT=300; fi compare_test EXIT-SUCCESS diff --git a/src/test/thread_stress.run b/src/test/thread_stress.run index 9802f031d0d..08226ab6b4a 100644 --- a/src/test/thread_stress.run +++ b/src/test/thread_stress.run @@ -1,4 +1,4 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/tty.run b/src/test/tty.run new file mode 100644 index 00000000000..3a3310d4792 --- /dev/null +++ b/src/test/tty.run @@ -0,0 +1,13 @@ +source `dirname $0`/util.sh +record simple$bitness +replay "--tty tty-output" + +token=EXIT-SUCCESS +if [[ "tty-output" != $(grep -l $token tty-output) ]]; then + failed ": token '$token' not in tty-output:" + echo "--------------------------------------------------" + cat tty-output + echo "--------------------------------------------------" +else + passed +fi diff --git a/src/test/unexpected_exit.c b/src/test/unexpected_exit.c index fd7d262bb76..6a5e777e44d 100644 --- a/src/test/unexpected_exit.c +++ b/src/test/unexpected_exit.c @@ -22,11 +22,12 @@ static int do_child(void) { /* Do a busy delay loop that changes registers so won't trigger rr's spinlock-detection heuristic */ -static void delay(void) { +static char delay(void) { #if defined(__x86_64__) || defined(__i386__) asm("mov $10000000,%%ecx\n\t" "1: loop 1b\n\t" : : : "ecx"); + return 0; #else /* Does this actually change registers on ARM??? */ int i; @@ -34,6 +35,7 @@ static void delay(void) { for (i = 0; i < 10000000; ++i) { ch = i % 3; } + return ch; #endif } diff --git a/src/test/unexpected_exit_execve.c b/src/test/unexpected_exit_execve.c index 93db859d52b..98d99388a7a 100644 --- a/src/test/unexpected_exit_execve.c +++ b/src/test/unexpected_exit_execve.c @@ -22,11 +22,12 @@ static int do_child(void) { /* Do a busy delay loop that changes registers so won't trigger rr's spinlock-detection heuristic */ -static void delay(void) { +static char delay(void) { #if defined(__x86_64__) || defined(__i386__) asm("mov $10000000,%%ecx\n\t" "1: loop 1b\n\t" : : : "ecx"); + return 0; #else /* Does this actually change registers on ARM??? */ int i; @@ -34,6 +35,7 @@ static void delay(void) { for (i = 0; i < 10000000; ++i) { ch = i % 3; } + return ch; #endif } diff --git a/src/test/unexpected_exit_execve_twice.c b/src/test/unexpected_exit_execve_twice.c index e784130aec9..2e4f93795e0 100644 --- a/src/test/unexpected_exit_execve_twice.c +++ b/src/test/unexpected_exit_execve_twice.c @@ -22,11 +22,12 @@ static int do_child(void) { /* Do a busy delay loop that changes registers so won't trigger rr's spinlock-detection heuristic */ -static void delay(void) { +static char delay(void) { #if defined(__x86_64__) || defined(__i386__) asm("mov $10000000,%%ecx\n\t" "1: loop 1b\n\t" : : : "ecx"); + return 0; #else /* Does this actually change registers on ARM??? */ int i; @@ -34,6 +35,7 @@ static void delay(void) { for (i = 0; i < 10000000; ++i) { ch = i % 3; } + return ch; #endif } diff --git a/src/test/unexpected_exit_pid_ns.c b/src/test/unexpected_exit_pid_ns.c index 03dfaf655be..80cf65fa218 100644 --- a/src/test/unexpected_exit_pid_ns.c +++ b/src/test/unexpected_exit_pid_ns.c @@ -7,11 +7,12 @@ static int child_to_parent[2]; /* Do a busy delay loop that changes registers so won't trigger rr's spinlock-detection heuristic */ -static void delay(void) { +static char delay(void) { #if defined(__x86_64__) || defined(__i386__) asm("mov $10000000,%%ecx\n\t" "1: loop 1b\n\t" : : : "ecx", "memory"); + return 0; #else /* Does this actually change registers on ARM??? */ int i; @@ -19,6 +20,7 @@ static void delay(void) { for (i = 0; i < 10000000; ++i) { ch = i % 3; } + return ch; #endif } diff --git a/src/test/userfaultfd.c b/src/test/userfaultfd.c new file mode 100644 index 00000000000..43342fe616e --- /dev/null +++ b/src/test/userfaultfd.c @@ -0,0 +1,10 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int ret = syscall(RR_userfaultfd, 0); + test_assert(ret == -1 && errno == ENOSYS); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/util.h b/src/test/util.h index 1ed2a989a3d..895a4089d1f 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -30,12 +30,15 @@ #include #include #include +#include +#include #include #include #include #include #include #include +#include #include #include #include @@ -46,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -61,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +111,7 @@ // X86 specific headers #if defined(__i386__) || defined(__x86_64__) #include +#include #include #endif @@ -389,6 +395,12 @@ inline static SyscallWrapper get_spurious_desched_syscall(void) { #ifndef SECCOMP_FILTER_FLAG_TSYNC #define SECCOMP_FILTER_FLAG_TSYNC 1 #endif +#ifndef SECCOMP_GET_ACTION_AVAIL +#define SECCOMP_GET_ACTION_AVAIL 2 +#endif +#ifndef SECCOMP_GET_NOTIF_SIZES +#define SECCOMP_GET_NOTIF_SIZES 3 +#endif /* Old systems don't have linux/kcmp.h */ #define RR_KCMP_FILE 0 diff --git a/src/test/util.py b/src/test/util.py index d7891265f6f..c8c7bf1f4fd 100644 --- a/src/test/util.py +++ b/src/test/util.py @@ -2,7 +2,8 @@ __all__ = [ 'expect_gdb', 'send_gdb','expect_rr', 'expect_list', 'restart_replay', 'interrupt_gdb', 'ok', - 'failed', 'iterlines_both', 'last_match', 'get_exe_arch' ] + 'failed', 'iterlines_both', 'last_match', 'get_exe_arch', + 'get_gdb_version' ] # Public API def expect_gdb(what): @@ -99,6 +100,13 @@ def get_rr_cmd(): rrargs = sys.argv[1:] return (rrargs[0], rrargs[1:]) +def get_gdb_version(): + '''Return the gdb version''' + send_gdb('python print(gdb.VERSION)') + expect_gdb(r'(\d+.\d+)') + global gdb_rr + return float(gdb_rr.match.group(1)) + def send(prog, what): try: prog.send(what) diff --git a/src/test/vdso_parts.c b/src/test/vdso_parts.c new file mode 100644 index 00000000000..d75fb6646c7 --- /dev/null +++ b/src/test/vdso_parts.c @@ -0,0 +1,78 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +#ifdef __x86_64__ +static int found_dyn(Elf64_Dyn* dyn, Elf64_Sxword tag) { + while (dyn->d_tag != DT_NULL) { + if (dyn->d_tag == tag) { + return 1; + } + ++dyn; + } + return 0; +} +#elif defined(__i386__) +static int found_dyn(Elf32_Dyn* dyn, Elf32_Sxword tag) { + while (dyn->d_tag != DT_NULL) { + if (dyn->d_tag == tag) { + return 1; + } + ++dyn; + } + return 0; +} +#endif + +int main(void) { +#ifdef __x86_64__ + char* vdso = (char*)getauxval(AT_SYSINFO_EHDR); + Elf64_Ehdr* ehdr = (Elf64_Ehdr*)vdso; + Elf64_Phdr* dynamic = NULL; + for (int i = 0; i < ehdr->e_phnum; ++i) { + Elf64_Phdr* phdr = (Elf64_Phdr*)(vdso + ehdr->e_phoff + i*ehdr->e_phentsize); + if (phdr->p_type == PT_DYNAMIC) { + dynamic = phdr; + break; + } + } + if (!dynamic) { + atomic_puts("PT_DYNAMIC not found in VDSO"); + return 1; + } + Elf64_Dyn* dyn = (Elf64_Dyn*)(vdso + dynamic->p_offset); + test_assert(found_dyn(dyn, DT_HASH)); + test_assert(found_dyn(dyn, DT_SYMTAB)); + test_assert(found_dyn(dyn, DT_STRTAB)); + test_assert(found_dyn(dyn, DT_VERSYM)); + test_assert(found_dyn(dyn, DT_VERDEF)); + test_assert(found_dyn(dyn, DT_VERDEFNUM)); + test_assert(found_dyn(dyn, DT_STRSZ)); +#elif defined(__i386__) + char* vdso = (char*)getauxval(AT_SYSINFO_EHDR); + Elf32_Ehdr* ehdr = (Elf32_Ehdr*)vdso; + Elf32_Phdr* dynamic = NULL; + for (int i = 0; i < ehdr->e_phnum; ++i) { + Elf32_Phdr* phdr = (Elf32_Phdr*)(vdso + ehdr->e_phoff + i*ehdr->e_phentsize); + if (phdr->p_type == PT_DYNAMIC) { + dynamic = phdr; + break; + } + } + if (!dynamic) { + atomic_puts("PT_DYNAMIC not found in VDSO"); + return 1; + } + Elf32_Dyn* dyn = (Elf32_Dyn*)(vdso + dynamic->p_offset); + test_assert(found_dyn(dyn, DT_HASH)); + test_assert(found_dyn(dyn, DT_SYMTAB)); + test_assert(found_dyn(dyn, DT_STRTAB)); + test_assert(found_dyn(dyn, DT_VERSYM)); + test_assert(found_dyn(dyn, DT_VERDEF)); + test_assert(found_dyn(dyn, DT_VERDEFNUM)); + test_assert(found_dyn(dyn, DT_STRSZ)); +#endif + + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/vsyscall_reverse_next.run b/src/test/vsyscall_reverse_next.run index 5641c9ed3fb..d3dc1eae0a0 100644 --- a/src/test/vsyscall_reverse_next.run +++ b/src/test/vsyscall_reverse_next.run @@ -1,3 +1,3 @@ source `dirname $0`/util.sh -TIMEOUT=300 +if [ $TIMEOUT -lt 300 ]; then TIMEOUT=300; fi debug_test diff --git a/src/test/vsyscall_timeslice.c b/src/test/vsyscall_timeslice.c new file mode 100644 index 00000000000..1452e8dd0fe --- /dev/null +++ b/src/test/vsyscall_timeslice.c @@ -0,0 +1,90 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" +#include "util_internal.h" + +#ifdef __x86_64__ +extern void generate_tick(long generate); + +__asm__( +"generate_tick:\n\t" + "test %rdi, %rdi\n\t" + "jnz 1f\n\t" + "ud2\n\t" + "1: retq\n\t"); + +static void test_vsyscall_timeslice_sig(void) +{ + intptr_t ret; + uintptr_t syscall = SYS_rrcall_arm_time_slice; + uintptr_t request = 1; + register long r10 __asm__("r10") = 0; + register long r8 __asm__("r8") = 0; + register long r9 __asm__("r9") = 0; + __asm__ __volatile( + "syscall\n\t" + "test %%rax, %%rax\n\t" + "jnz .Ldone\n\t" + // Create a pipeline stall - the CPU will speculate through + // these, but because of the dependency from %rax (the result of the + // division) to the %rdi argument of generate_tick will not be able to + // retire the conditional branches therein, thus skidding our time + // slice signal into the vsyscall. + "movq $1, %%rax\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + // Two taken conditional branches here will trigger the + // time slice expiration. We expect this to skid into + // the subsequent vsyscall, triggering the condition we + // want to test + "movq %%rax, %%rdi\n\t" + // N.B.: This only works if the branches contained herein are + // predicted taken. Below we train the branch predictor to make + // sure this happens. + "callq generate_tick\n\t" + "callq generate_tick\n\t" + "xorq %%rdi, %%rdi\n\t" + "movq $0xffffffffff600400, %%rax\n\t" // time(NULL) + "callq *%%rax\n\t" + ".Ldone:" + "nop\n\t" + : "=a"(ret) + : "a"(syscall), "D"(request), "S"(NULL), "d"(NULL), + "r"(r10), "r"(r8), "r"(r9) : "cc", "memory"); + test_assert(ret > 0); +} + +void callback(uint64_t env, char *name, __attribute__((unused)) map_properties_t* props) { + if (strcmp(name, "[vsyscall]") == 0) { + int* has_vsyscall = (int*)(uintptr_t)env; + *has_vsyscall = 1; + } +} +#endif + +int main(void) { + // x86_64 only +#ifdef __x86_64__ + FILE* maps_file = fopen("/proc/self/maps", "r"); + int has_vsyscall = 0; + iterate_maps((uintptr_t)&has_vsyscall, callback, maps_file); + + if (!running_under_rr()) { + atomic_puts("WARNING: This test only works under rr."); + } else if (has_vsyscall) { + for (int i = 0; i < 20000; ++i) { + // Train the branch predictor that these branches are taken + generate_tick(1); + } + test_vsyscall_timeslice_sig(); + } +#endif + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/explicit_checkpoints.c b/src/test/x86/explicit_checkpoints.c similarity index 100% rename from src/test/explicit_checkpoints.c rename to src/test/x86/explicit_checkpoints.c diff --git a/src/test/explicit_checkpoints.py b/src/test/x86/explicit_checkpoints.py similarity index 100% rename from src/test/explicit_checkpoints.py rename to src/test/x86/explicit_checkpoints.py diff --git a/src/test/explicit_checkpoints.run b/src/test/x86/explicit_checkpoints.run similarity index 100% rename from src/test/explicit_checkpoints.run rename to src/test/x86/explicit_checkpoints.run diff --git a/src/test/x86/ioperm.c b/src/test/x86/ioperm.c new file mode 100644 index 00000000000..533941ea9a2 --- /dev/null +++ b/src/test/x86/ioperm.c @@ -0,0 +1,11 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int ret; + ret = ioperm(0, 1024, 1); + atomic_printf("ioperm returned %d\n", ret); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/x86/iopl.c b/src/test/x86/iopl.c new file mode 100644 index 00000000000..c1ee876bb1e --- /dev/null +++ b/src/test/x86/iopl.c @@ -0,0 +1,11 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int ret; + ret = iopl(3); + atomic_printf("iopl returned %d\n", ret); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/x86/pkeys.c b/src/test/x86/pkeys.c new file mode 100644 index 00000000000..cdc9d435c89 --- /dev/null +++ b/src/test/x86/pkeys.c @@ -0,0 +1,50 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +static void wrpkru(unsigned int pkru) { + unsigned int eax = pkru; + unsigned int ecx = 0; + unsigned int edx = 0; + + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); +} + +static char* p; + +static void unset_pkey(__attribute__((unused)) int sig) { + pkey_mprotect(p, 4096, PROT_READ | PROT_WRITE, 0); +} + +int main(void) { + int pkey = pkey_alloc(0, 0); + int ret; + if (pkey < 0 && errno == ENOSYS) { + atomic_puts("pkeys not supported in kernel, skipping"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + if (pkey < 0 && (errno == ENOSPC || errno == EINVAL)) { + atomic_puts("pkeys not supported on this system, skipping"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + test_assert(pkey >= 0); + + p = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + test_assert(p != MAP_FAILED); + ret = pkey_mprotect(p, 4096, PROT_READ | PROT_WRITE, pkey); + test_assert(ret == 0); + p[0] = 1; + + wrpkru(PKEY_DISABLE_ACCESS << (2 * pkey)); + signal(SIGSEGV, unset_pkey); + p[0] = 2; + + ret = pkey_free(pkey); + test_assert(ret == 0); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/ptrace_sysemu.c b/src/test/x86/ptrace_sysemu.c similarity index 99% rename from src/test/ptrace_sysemu.c rename to src/test/x86/ptrace_sysemu.c index eca2ca4cf67..04083026d11 100644 --- a/src/test/ptrace_sysemu.c +++ b/src/test/x86/ptrace_sysemu.c @@ -1,7 +1,7 @@ /* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "util.h" -#include "ptrace_util.h" +#include "../ptrace_util.h" /* This tests PTRACE_SYSEMU, PTRACE_SINGLESTEP and PTRACE_SYSEMU_SINGLESTEP */ diff --git a/src/test/x86/string_instructions.run b/src/test/x86/string_instructions.run index 9802f031d0d..08226ab6b4a 100644 --- a/src/test/x86/string_instructions.run +++ b/src/test/x86/string_instructions.run @@ -1,4 +1,4 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/x86/string_instructions_async_signals.run b/src/test/x86/string_instructions_async_signals.run index ab81f7f6835..7a5599b14f0 100644 --- a/src/test/x86/string_instructions_async_signals.run +++ b/src/test/x86/string_instructions_async_signals.run @@ -1,6 +1,6 @@ source `dirname $0`/util.sh RECORD_ARGS="-c1000" -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/x86/string_instructions_async_signals_shared.run b/src/test/x86/string_instructions_async_signals_shared.run index ab81f7f6835..7a5599b14f0 100644 --- a/src/test/x86/string_instructions_async_signals_shared.run +++ b/src/test/x86/string_instructions_async_signals_shared.run @@ -1,6 +1,6 @@ source `dirname $0`/util.sh RECORD_ARGS="-c1000" -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/x86/string_instructions_break.run b/src/test/x86/string_instructions_break.run index cc9681a1651..14de8ac41e3 100644 --- a/src/test/x86/string_instructions_break.run +++ b/src/test/x86/string_instructions_break.run @@ -1,5 +1,5 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi record string_instructions_replay$bitness debug x86/string_instructions_break diff --git a/src/test/x86/string_instructions_multiwatch.run b/src/test/x86/string_instructions_multiwatch.run index 080818d751c..0463cb6902c 100644 --- a/src/test/x86/string_instructions_multiwatch.run +++ b/src/test/x86/string_instructions_multiwatch.run @@ -1,4 +1,4 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi debug_test diff --git a/src/test/x86/string_instructions_replay.run b/src/test/x86/string_instructions_replay.run index 1b0e1dda0dd..b3e39b47d33 100644 --- a/src/test/x86/string_instructions_replay.run +++ b/src/test/x86/string_instructions_replay.run @@ -1,6 +1,6 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi record $TESTNAME & diff --git a/src/test/x86/string_instructions_replay_quirk.run b/src/test/x86/string_instructions_replay_quirk.run index 18b73177f50..ad709863d88 100644 --- a/src/test/x86/string_instructions_replay_quirk.run +++ b/src/test/x86/string_instructions_replay_quirk.run @@ -1,5 +1,5 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi record string_instructions_replay$bitness debug x86/string_instructions_replay_quirk diff --git a/src/util.cc b/src/util.cc index ba8345703c7..3007a4593b5 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1762,6 +1762,9 @@ bool is_advanced_pc_and_signaled_instruction(Task* t, remote_code_ptr ip) { !memcmp(insn, int3_insn, sizeof(int3_insn))) { return true; } +#else + UNUSED(t); + UNUSED(ip); #endif return false; } @@ -2140,6 +2143,7 @@ std::vector read_all_proc_fds(pid_t tid) while (struct dirent *dir = readdir(fddir)) { ret.push_back(atoi(dir->d_name)); } + closedir(fddir); return ret; } @@ -2182,7 +2186,7 @@ void SAFE_FATAL(int err, const char *msg) {.iov_base = (char*)msg, .iov_len=strlen(msg)}, {.iov_base = nl, .iov_len=sizeof(nl)} }; - ::writev(STDERR_FILENO, out, sizeof(out)/sizeof(struct iovec)); + (void)::writev(STDERR_FILENO, out, sizeof(out)/sizeof(struct iovec)); abort(); } diff --git a/src/util.h b/src/util.h index bbc63c727d1..902d1c47baf 100644 --- a/src/util.h +++ b/src/util.h @@ -28,6 +28,13 @@ #define SOL_NETLINK 270 #endif +#define UNUSED(expr) \ + do { \ + if (expr) { \ + (void)0; \ + } \ + } while (0) + namespace rr { /*