From 5590083b19422ba793f0d9dd36e9195eb5226447 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 12 Jan 2021 01:03:22 -0500 Subject: [PATCH 001/110] Fix interaction of desched sig and vsyscall patching This fixes a rare interaction where a time slice signal arrives just as we are stepping out of a vsyscall (this can happen if counter overflows just before the vsyscall, but execution skids into the vsyscall - signal delivery only happens at the return to userspace, so the seccomp signal gets delivered first despite the actual overflow of the timeslice technically happening earlier), which would previously confuse rr into asserting. This failure is very rare. In three weeks of running ~40 simultaneous rr sessions, we've seen this once. Fixes https://github.com/JuliaLang/julia/issues/39206 --- src/RecordSession.cc | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/RecordSession.cc b/src/RecordSession.cc index 8ce4fcf0451..f506ae322b5 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -319,7 +319,22 @@ void RecordSession::handle_seccomp_traced_syscall(RecordTask* t, // SIGSYS. Instead, we set a breakpoint at the return instruction. t->set_regs(regs); t->vm()->add_breakpoint(ret_addr, BKPT_INTERNAL); - t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); + while (true) { + t->resume_execution(RESUME_SYSCALL, RESUME_WAIT, RESUME_NO_TICKS); + if (t->ptrace_event() == PTRACE_EVENT_EXIT) { + return; + } + ASSERT(t, !t->ptrace_event()); + if (t->stop_sig() == syscallbuf_desched_sig()) { + continue; + } + if (t->stop_sig() == SIGTRAP && + is_kernel_trap(t->get_siginfo().si_code)) { + // Hit the breakpoint + break; + } + t->stash_sig(); + } t->vm()->remove_breakpoint(ret_addr, BKPT_INTERNAL); ASSERT(t, t->regs().ip().undo_executed_bkpt(t->arch()) == ret_addr); From a4ca5c73ec26be061d32939917b0b660a60cd2c7 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Tue, 12 Jan 2021 01:48:21 -0500 Subject: [PATCH 002/110] Add a mechanism to test time slice signal interactions This adds an rrcall that lets the tracee override the next time slice duration and uses it to test the previously fixed interaction between time slice signals and vsyscall patching. --- CMakeLists.txt | 1 + src/RecordSession.cc | 7 +++ src/RecordTask.cc | 3 +- src/RecordTask.h | 4 ++ src/preload/rrcalls.h | 6 +++ src/record_syscall.cc | 23 +++++++++ src/test/vsyscall_timeslice.c | 89 +++++++++++++++++++++++++++++++++++ 7 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 src/test/vsyscall_timeslice.c diff --git a/CMakeLists.txt b/CMakeLists.txt index a2339a7425a..6c37a673985 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1146,6 +1146,7 @@ set(BASIC_TESTS video_capture vm_readv_writev vsyscall + vsyscall_timeslice x86/x87env wait wait_sigstop diff --git a/src/RecordSession.cc b/src/RecordSession.cc index f506ae322b5..7c6356fe70d 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -796,6 +796,13 @@ void RecordSession::task_continue(const StepState& step_state) { } } + // Override requested by the tracee for testing purposes + if (t->tick_request_override != (TicksRequest)0) { + ASSERT(t, !t->next_pmc_interrupt_is_for_user); + ticks_request = t->tick_request_override; + t->tick_request_override = (TicksRequest)0; + } + bool singlestep = is_ptrace_any_singlestep(t->arch(), t->emulated_ptrace_cont_command); if (singlestep && is_at_syscall_instruction(t, t->ip())) { diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 6eb92b7dfc8..8cfc6235cb2 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -196,7 +196,8 @@ RecordTask::RecordTask(RecordSession& session, pid_t _tid, uint32_t serial, waiting_for_reap(false), waiting_for_zombie(false), waiting_for_ptrace_exit(false), - retry_syscall_patching(false) { + retry_syscall_patching(false), + tick_request_override((TicksRequest)0) { push_event(Event::sentinel()); if (session.tasks().empty()) { // Initial tracee. It inherited its state from this process, so set it up. diff --git a/src/RecordTask.h b/src/RecordTask.h index 73afc88325d..3c69d9b1591 100644 --- a/src/RecordTask.h +++ b/src/RecordTask.h @@ -751,6 +751,10 @@ class RecordTask : public Task { // When exiting a syscall, we should call MonkeyPatcher::try_patch_syscall again. bool retry_syscall_patching; + + // Set if the tracee requested an override of the ticks request. + // Used for testing. + TicksRequest tick_request_override; }; } // namespace rr diff --git a/src/preload/rrcalls.h b/src/preload/rrcalls.h index be11210d454..296b75d15ae 100644 --- a/src/preload/rrcalls.h +++ b/src/preload/rrcalls.h @@ -78,3 +78,9 @@ * process tree, such that it may run without seccomp. */ #define SYS_rrcall_detach_teleport (RR_CALL_BASE + 9) +/** + * Requests that rr reset the time slice signal to the + * requested period. Used for testing interaction corner + * cases between the time slice signal and other rr behavior. + */ +#define SYS_rrcall_arm_time_slice (RR_CALL_BASE + 10) diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 19c42523de0..9ab46436acf 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -4778,6 +4778,29 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, return ALLOW_SWITCH; } + case SYS_rrcall_arm_time_slice: { + Registers r = t->regs(); + bool arguments_are_zero = true; + for (int i = 2; i <= 6; ++i) { + arguments_are_zero &= r.arg(i) == 0; + } + // Ticks request of zero is invalid for the moment + // for purposes of this syscall. In the future we + // want to have it mean to simulate a timeslice expiry + // at the end of this syscall, but we have no use for + // that at the moment. + if (r.arg(1) == 0 || r.arg(1) > (uintptr_t)MAX_TICKS_REQUEST || + !arguments_are_zero) { + syscall_state.emulate_result((uintptr_t)-EINVAL); + syscall_state.expect_errno = ENOSYS; + return PREVENT_SWITCH; + } + t->tick_request_override = (TicksRequest)r.arg(1); + syscall_state.emulate_result(0); + syscall_state.expect_errno = ENOSYS; + return PREVENT_SWITCH; + } + case Arch::brk: case Arch::munmap: case Arch::process_vm_readv: diff --git a/src/test/vsyscall_timeslice.c b/src/test/vsyscall_timeslice.c new file mode 100644 index 00000000000..79733899b73 --- /dev/null +++ b/src/test/vsyscall_timeslice.c @@ -0,0 +1,89 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" +#include "util_internal.h" + +#ifdef __x86_64__ +void __attribute__((naked)) generate_tick(long generate) { + __asm__ __volatile("test %%rdi, %%rdi\n\t" + "jnz 1f\n\t" + "ud2\n\t" + "1: retq\n\t" :: "D"(generate)); + (void)generate; +} + +static void test_vsyscall_timeslice_sig(void) +{ + intptr_t ret; + uintptr_t syscall = SYS_rrcall_arm_time_slice; + uintptr_t request = 1; + register long r10 __asm__("r10") = 0; + register long r8 __asm__("r8") = 0; + register long r9 __asm__("r9") = 0; + __asm__ __volatile( + "syscall\n\t" + "test %%rax, %%rax\n\t" + "jnz .Ldone\n\t" + // Create a pipeline stall - the CPU will speculate through + // these, but because of the dependency from %rax (the result of the + // division) to the %rdi argument of generate_tick will not be able to + // retire the conditional branches therein, thus skidding our time + // slice signal into the vsyscall. + "movq $1, %%rax\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + "div %%rdi\n\t" + // Two taken conditional branches here will trigger the + // time slice expiration. We expect this to skid into + // the subsequent vsyscall, triggering the condition we + // want to test + "movq %%rax, %%rdi\n\t" + // N.B.: This only works if the branches contained herein are + // predicted taken. Below we train the branch predictor to make + // sure this happens. + "callq generate_tick\n\t" + "callq generate_tick\n\t" + "xorq %%rdi, %%rdi\n\t" + "movq $0xffffffffff600400, %%rax\n\t" // time(NULL) + "callq *%%rax\n\t" + ".Ldone:" + "nop\n\t" + : "=a"(ret) + : "a"(syscall), "D"(request), "S"(NULL), "d"(NULL), + "r"(r10), "r"(r8), "r"(r9) : "cc", "memory"); + test_assert(ret > 0); +} + +void callback(uint64_t env, char *name, __attribute__((unused)) map_properties_t* props) { + if (strcmp(name, "[vsyscall]") == 0) { + int* has_vsyscall = (int*)(uintptr_t)env; + *has_vsyscall = 1; + } +} +#endif + +int main(void) { + // x86_64 only +#ifdef __x86_64__ + FILE* maps_file = fopen("/proc/self/maps", "r"); + int has_vsyscall = 0; + iterate_maps((uintptr_t)&has_vsyscall, callback, maps_file); + + if (!running_under_rr()) { + atomic_puts("WARNING: This test only works under rr."); + } else if (has_vsyscall) { + for (int i = 0; i < 20000; ++i) { + // Train the branch predictor that these branches are taken + generate_tick(1); + } + test_vsyscall_timeslice_sig(); + } +#endif + atomic_puts("EXIT-SUCCESS"); + return 0; +} From f25671d094edac8059cec56b98d7f10f2c740697 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 12 Apr 2021 22:28:00 -0700 Subject: [PATCH 003/110] Handle EINTR (which apparently you can still get on a WNOWAIT waitid(2)). --- src/Task.cc | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/src/Task.cc b/src/Task.cc index 13106ed1ed7..eb771f88cd2 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -163,22 +163,28 @@ void Task::wait_exit() { * for this we add `| WNOWAIT` to prevent dequeing the event and simply take * it as an indication that the task has execed. */ - int ret = waitid(P_PID, tid, &info, WSTOPPED | WNOWAIT); - if (ret == 0) { - ASSERT(this, info.si_pid == tid) << "Expected " << tid << " got " << info.si_pid; - if (WaitStatus(info).ptrace_event() == PTRACE_EVENT_EXIT) { - // It's possible that the earlier exit event was synthetic, in which - // case we're only now catching up to the real process exit. In that - // case, just ask the process to actually exit. (TODO: We may want to - // catch this earlier). - return proceed_to_exit(true); - } - ASSERT(this, WaitStatus(info).ptrace_event() == PTRACE_EVENT_EXEC) - << "Expected PTRACE_EVENT_EXEC, got " << WaitStatus(info); - // The kernel will do the reaping for us in this case - was_reaped = true; - } else { - ASSERT(this, ret == -1 && errno == ECHILD) << "Got ret=" << ret << " errno=" << errno; + while (true) { + int ret = waitid(P_PID, tid, &info, WSTOPPED | WNOWAIT); + if (ret == 0) { + ASSERT(this, info.si_pid == tid) << "Expected " << tid << " got " << info.si_pid; + if (WaitStatus(info).ptrace_event() == PTRACE_EVENT_EXIT) { + // It's possible that the earlier exit event was synthetic, in which + // case we're only now catching up to the real process exit. In that + // case, just ask the process to actually exit. (TODO: We may want to + // catch this earlier). + return proceed_to_exit(true); + } + ASSERT(this, WaitStatus(info).ptrace_event() == PTRACE_EVENT_EXEC) + << "Expected PTRACE_EVENT_EXEC, got " << WaitStatus(info); + // The kernel will do the reaping for us in this case + was_reaped = true; + break; + } else if (ret == -1 && errno == EINTR) { + continue; + } else { + ASSERT(this, ret == -1 && errno == ECHILD) << "Got ret=" << ret << " errno=" << errno; + break; + } } } From 254f53c531287fe737559558300ff7089e1d081e Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 16 Apr 2021 16:54:53 +1200 Subject: [PATCH 004/110] Make `rr sources` return full paths for DWOs when possible. If the DW_AT_comp_dir is relative, we assume that it is relative to the directory containing the final binary. --- src/SourcesCommand.cc | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 71718af3231..765727b2244 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -105,18 +105,20 @@ static void base_name(string& s) { } // file_name cannot be null, but the others can be. -static void resolve_file_name(const char* original_file_dir, - const char* comp_dir, const char* rel_dir, - const char* file_name, set* file_names) { +static string resolve_file_name(const char* original_file_dir, + const char* comp_dir, const char* rel_dir, + const char* file_name) { const char* names[] = { original_file_dir, comp_dir, rel_dir, file_name }; - ssize_t first_absolute = -1; + // Find the last path on the list that is absolute, and start + // resolution from there. + ssize_t absolute_path_index = -1; for (ssize_t i = 0; i < 4; ++i) { if (names[i] && names[i][0] == '/') { - first_absolute = i; + absolute_path_index = i; } } - string s = first_absolute >= 0 ? "" : "/"; - for (size_t i = (first_absolute >= 0 ? first_absolute : 0); i < 4; ++i) { + string s = absolute_path_index >= 0 ? "" : "/"; + for (size_t i = (absolute_path_index >= 0 ? absolute_path_index : 0); i < 4; ++i) { if (!names[i]) { continue; } @@ -125,7 +127,7 @@ static void resolve_file_name(const char* original_file_dir, } s += names[i]; } - file_names->insert(move(s)); + return s; } struct DwoInfo { @@ -136,6 +138,14 @@ struct DwoInfo { uint64_t id; }; +static string resolve_dwo_name(const string& original_file_name, + const char* comp_dir, + const char* dwo_name) { + string original_dir = original_file_name; + parent_dir(original_dir); + return resolve_file_name(original_dir.c_str(), comp_dir, NULL, dwo_name); +} + static bool process_compilation_units(ElfFileReader& reader, ElfFileReader* sup_reader, const string& trace_relative_name, @@ -210,11 +220,12 @@ static bool process_compilation_units(ElfFileReader& reader, } } if (has_dwo_id) { + string s = resolve_dwo_name(original_file_name, comp_dir, dwo_name); string c; if (comp_dir) { c = comp_dir; } - dwos->push_back({ dwo_name, trace_relative_name, move(c), dwo_id }); + dwos->push_back({ s, trace_relative_name, move(c), dwo_id }); } else { LOG(warn) << "DW_AT_GNU_dwo_name but not DW_AT_GNU_dwo_id"; } @@ -224,7 +235,7 @@ static bool process_compilation_units(ElfFileReader& reader, continue; } if (source_file_name) { - resolve_file_name(original_file_dir.c_str(), comp_dir, nullptr, source_file_name, file_names); + file_names->insert(move(resolve_file_name(original_file_dir.c_str(), comp_dir, nullptr, source_file_name))); } intptr_t stmt_list = cu.die().section_ptr_attr(DW_AT_stmt_list, &ok); if (stmt_list < 0 || !ok) { @@ -240,7 +251,7 @@ static bool process_compilation_units(ElfFileReader& reader, continue; } const char* dir = lines.directories()[f.directory_index]; - resolve_file_name(original_file_dir.c_str(), comp_dir, dir, f.file_name, file_names); + file_names->insert(move(resolve_file_name(original_file_dir.c_str(), comp_dir, dir, f.file_name))); } } while (!debug_info.empty()); From 21f20970fe4ba7fdc43b199b2b783108b56e178c Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 16 Apr 2021 17:23:33 +1200 Subject: [PATCH 005/110] Put the resolved DWO path in full_path --- src/SourcesCommand.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 765727b2244..8d5b202e7a1 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -135,6 +135,7 @@ struct DwoInfo { string trace_file; // Could be an empty string string comp_dir; + string full_path; uint64_t id; }; @@ -225,7 +226,7 @@ static bool process_compilation_units(ElfFileReader& reader, if (comp_dir) { c = comp_dir; } - dwos->push_back({ s, trace_relative_name, move(c), dwo_id }); + dwos->push_back({ dwo_name, trace_relative_name, move(c), s, dwo_id }); } else { LOG(warn) << "DW_AT_GNU_dwo_name but not DW_AT_GNU_dwo_id"; } @@ -740,8 +741,9 @@ static int sources(const map& binary_file_names, const map Date: Sat, 17 Apr 2021 11:47:19 +1200 Subject: [PATCH 006/110] Use toplevel __asm__ instead of __attribute__((naked)) --- src/chaos-test/futex_wakeup.c | 2 +- src/test/vsyscall_timeslice.c | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/chaos-test/futex_wakeup.c b/src/chaos-test/futex_wakeup.c index cd5e7a85d2e..2ca25c07328 100644 --- a/src/chaos-test/futex_wakeup.c +++ b/src/chaos-test/futex_wakeup.c @@ -12,7 +12,7 @@ static void* run_thread(__attribute__((unused)) void* p) { return NULL; } -int main(__attribute__((unused)) int argc) { +int main(void) { int i; pthread_t thread; struct timespec ts = { 0, 10000000 }; diff --git a/src/test/vsyscall_timeslice.c b/src/test/vsyscall_timeslice.c index 79733899b73..1452e8dd0fe 100644 --- a/src/test/vsyscall_timeslice.c +++ b/src/test/vsyscall_timeslice.c @@ -4,13 +4,14 @@ #include "util_internal.h" #ifdef __x86_64__ -void __attribute__((naked)) generate_tick(long generate) { - __asm__ __volatile("test %%rdi, %%rdi\n\t" +extern void generate_tick(long generate); + +__asm__( +"generate_tick:\n\t" + "test %rdi, %rdi\n\t" "jnz 1f\n\t" "ud2\n\t" - "1: retq\n\t" :: "D"(generate)); - (void)generate; -} + "1: retq\n\t"); static void test_vsyscall_timeslice_sig(void) { From 340e04ea14cc0138e3e3bc6397ffa1246a0f6d17 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sat, 17 Apr 2021 16:32:50 +1200 Subject: [PATCH 007/110] Look at ancestors of original_file_name to try to find a build directory we can make relative DW_AT_comp_dirs relative to --- src/SourcesCommand.cc | 101 ++++++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 37 deletions(-) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 8d5b202e7a1..02bf61131e2 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -88,12 +88,14 @@ ExplicitSourcesCommand ExplicitSourcesCommand::singleton( " LIBRARY is the basename of the original file name,\n" " e.g. libc-2.32.so\n"); -static void parent_dir(string& s) { +static void dir_name(string& s) { size_t p = s.rfind('/'); - if (p == string::npos) { + if (p == string::npos || (p == 0 && s.size() == 1)) { s.clear(); } else if (p > 0) { s.resize(p); + } else { + s.resize(1); } } @@ -104,30 +106,66 @@ static void base_name(string& s) { } } +static bool is_absolute(string& s) { + return s[0] == '/'; +} + +static void prepend_path(const char* prefix, string& s) { + size_t len = strlen(prefix); + if (!len) { + return; + } + if (prefix[len - 1] == '/') { + s = string(prefix) + s; + } else { + s = string(prefix) + '/' + s; + } +} + +// Resolve a file name relative to a compilation directory and relative directory. // file_name cannot be null, but the others can be. -static string resolve_file_name(const char* original_file_dir, +// Takes into accout the original file name as follows: +// -- if comp_dir, rel_dir or file_name are absolute, or original_file_name is NULL, +// then ignore original_file_name. +// The result is just the result of combining comp_dir/rel_dir/file_name. +// -- otherwise they're all relative to some build directory. We hypothesize +// the build directory is some ancestor directory of original_file_name. +// We try making comp_dir/rel_dir/file_name relative to each ancestor directory +// of original_file_name, and if we find a file there, we return that name. +static string resolve_file_name(const char* original_file_name, const char* comp_dir, const char* rel_dir, const char* file_name) { - const char* names[] = { original_file_dir, comp_dir, rel_dir, file_name }; - // Find the last path on the list that is absolute, and start - // resolution from there. - ssize_t absolute_path_index = -1; - for (ssize_t i = 0; i < 4; ++i) { - if (names[i] && names[i][0] == '/') { - absolute_path_index = i; - } - } - string s = absolute_path_index >= 0 ? "" : "/"; - for (size_t i = (absolute_path_index >= 0 ? absolute_path_index : 0); i < 4; ++i) { - if (!names[i]) { - continue; + string path = file_name; + if (is_absolute(path)) { + return path; + } + if (rel_dir) { + prepend_path(rel_dir, path); + if (is_absolute(path)) { + return path; } - if (!s.empty() && s.back() != '/') { - s.push_back('/'); + } + if (comp_dir) { + prepend_path(comp_dir, path); + if (is_absolute(path)) { + return path; + } + } + if (!original_file_name) { + return path; + } + string original(original_file_name); + while (true) { + dir_name(original); + if (original.empty()) { + return path; + } + string candidate = original + "/" + path; + int ret = access(candidate.c_str(), F_OK); + if (!ret) { + return candidate; } - s += names[i]; } - return s; } struct DwoInfo { @@ -139,14 +177,6 @@ struct DwoInfo { uint64_t id; }; -static string resolve_dwo_name(const string& original_file_name, - const char* comp_dir, - const char* dwo_name) { - string original_dir = original_file_name; - parent_dir(original_dir); - return resolve_file_name(original_dir.c_str(), comp_dir, NULL, dwo_name); -} - static bool process_compilation_units(ElfFileReader& reader, ElfFileReader* sup_reader, const string& trace_relative_name, @@ -173,9 +203,6 @@ static bool process_compilation_units(ElfFileReader& reader, debug_line_str, }; - string original_file_dir = original_file_name; - parent_dir(original_file_dir); - DwarfAbbrevs abbrevs(debug_abbrev); do { bool ok = true; @@ -221,12 +248,12 @@ static bool process_compilation_units(ElfFileReader& reader, } } if (has_dwo_id) { - string s = resolve_dwo_name(original_file_name, comp_dir, dwo_name); + string full_name = resolve_file_name(original_file_name.c_str(), comp_dir, nullptr, dwo_name); string c; if (comp_dir) { c = comp_dir; } - dwos->push_back({ dwo_name, trace_relative_name, move(c), s, dwo_id }); + dwos->push_back({ dwo_name, trace_relative_name, move(c), full_name, dwo_id }); } else { LOG(warn) << "DW_AT_GNU_dwo_name but not DW_AT_GNU_dwo_id"; } @@ -236,7 +263,7 @@ static bool process_compilation_units(ElfFileReader& reader, continue; } if (source_file_name) { - file_names->insert(move(resolve_file_name(original_file_dir.c_str(), comp_dir, nullptr, source_file_name))); + file_names->insert(resolve_file_name(original_file_name.c_str(), comp_dir, nullptr, source_file_name)); } intptr_t stmt_list = cu.die().section_ptr_attr(DW_AT_stmt_list, &ok); if (stmt_list < 0 || !ok) { @@ -252,7 +279,7 @@ static bool process_compilation_units(ElfFileReader& reader, continue; } const char* dir = lines.directories()[f.directory_index]; - file_names->insert(move(resolve_file_name(original_file_dir.c_str(), comp_dir, dir, f.file_name))); + file_names->insert(resolve_file_name(original_file_name.c_str(), comp_dir, dir, f.file_name)); } } while (!debug_info.empty()); @@ -300,7 +327,7 @@ find_auxiliary_file(const string& original_file_name, // Try in the same directory as the original file. string original_file_dir = original_file_name; - parent_dir(original_file_dir); + dir_name(original_file_dir); full_file_name = original_file_dir + "/" + aux_file_name; normalize_file_name(full_file_name); fd = ScopedFd(full_file_name.c_str(), O_RDONLY); @@ -534,7 +561,7 @@ static string resolve_symlinks(const string& path, string target; if (buf[0] != '/') { target = base; - parent_dir(target); + dir_name(target); if (target.size() > 1) { target.push_back('/'); } From 165f85a61126cddfe93625119d5ce928260c06fa Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sun, 18 Apr 2021 00:41:45 +1200 Subject: [PATCH 008/110] Handle cases where all files in a VCS dir were skipped (e.g. due to not being found) --- src/SourcesCommand.cc | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 02bf61131e2..1e6645e23b7 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -723,10 +723,20 @@ static int sources(const map& binary_file_names, const map Date: Sat, 17 Apr 2021 05:23:40 -0700 Subject: [PATCH 009/110] Suggest /etc/sysctl.d instead of /etc/sysctl.conf On systemd systems, the latter is silently and confusingly ignored (see, approximately, https://github.com/systemd/systemd/issues/12791). The former is compatible with non-systemd systems according to man 5 sysctl.conf, so we should suggest it instead as it works everywhere. --- src/RecordSession.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/RecordSession.cc b/src/RecordSession.cc index 7c6356fe70d..8bbb751195c 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -2142,8 +2142,9 @@ static string lookup_by_path(const string& name) { fprintf(stderr, "rr needs /proc/sys/kernel/perf_event_paranoid <= 1, but it is %d.\n" "Change it to 1, or use 'rr record -n' (slow).\n" - "Consider putting 'kernel.perf_event_paranoid = 1' in /etc/sysctl.conf.\n" - "See 'man 8 sysctl', 'man 5 sysctl.d' and 'man 5 sysctl.conf' for more details.\n", + "Consider putting 'kernel.perf_event_paranoid = 1' in /etc/sysctl.d/10-rr.conf.\n" + "See 'man 8 sysctl', 'man 5 sysctl.d' (systemd systems)\n" + "and 'man 5 sysctl.conf' (non-systemd systems) for more details.\n", val); exit(1); } From 878f756966c660a6e838cbf3516f953e586b9edc Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sun, 18 Apr 2021 20:30:47 +1200 Subject: [PATCH 010/110] Print more info if we get an unexpected status on kill --- src/Task.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Task.cc b/src/Task.cc index eb771f88cd2..0c3c7aa4280 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -218,15 +218,17 @@ WaitStatus Task::kill() { */ LOG(debug) << "Sending SIGKILL to " << tid; int ret = syscall(SYS_tgkill, real_tgid(), tid, SIGKILL); - DEBUG_ASSERT(ret == 0); + ASSERT(this, ret == 0); int raw_status = -1; int wait_ret = ::waitpid(tid, &raw_status, __WALL | WUNTRACED); WaitStatus status = WaitStatus(raw_status); LOG(debug) << " -> " << status; bool is_exit_event = status.ptrace_event() == PTRACE_EVENT_EXIT; - DEBUG_ASSERT(wait_ret == tid && - (is_exit_event || status.type() == WaitStatus::FATAL_SIGNAL || - status.type() == WaitStatus::EXIT)); + ASSERT(this, wait_ret == tid) << "Expected " << tid << " got " << wait_ret; + ASSERT(this, + is_exit_event || status.type() == WaitStatus::FATAL_SIGNAL || + status.type() == WaitStatus::EXIT) + << "Expected exit or fatal signal for " << tid << " got " << status; did_kill(); if (is_exit_event) { /* If this is the exit event, we can detach here and the task will From e63d602289b0914b59010468d7303df9b8c97dd2 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Wed, 28 Apr 2021 09:51:47 -0700 Subject: [PATCH 011/110] Mark some singlestep flag functions const. --- src/Registers.cc | 4 ++-- src/Registers.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Registers.cc b/src/Registers.cc index e6e610924ed..62258c7e5cf 100644 --- a/src/Registers.cc +++ b/src/Registers.cc @@ -659,7 +659,7 @@ void Registers::set_from_trace(SupportedArch a, const void* data, memcpy(&u.arm64regs, data, sizeof(u.arm64regs)); } -bool Registers::aarch64_singlestep_flag() { +bool Registers::aarch64_singlestep_flag() const { switch (arch()) { case aarch64: return pstate() & AARCH64_DBG_SPSR_SS; @@ -679,7 +679,7 @@ void Registers::set_aarch64_singlestep_flag() { } } -bool Registers::x86_singlestep_flag() { +bool Registers::x86_singlestep_flag() const { switch (arch()) { case x86: case x86_64: diff --git a/src/Registers.h b/src/Registers.h index 21c71713c55..bbc5233c294 100644 --- a/src/Registers.h +++ b/src/Registers.h @@ -424,7 +424,7 @@ class Registers { * Modify the processor's single step flag. On x86 this is the TF flag in the * eflags register. */ - bool x86_singlestep_flag(); + bool x86_singlestep_flag() const; void clear_x86_singlestep_flag(); /** @@ -441,7 +441,7 @@ class Registers { * likely already be clear, and we'd take a single step exception without * ever having executed any userspace instructions whatsoever. */ - bool aarch64_singlestep_flag(); + bool aarch64_singlestep_flag() const; void set_aarch64_singlestep_flag(); void print_register_file(FILE* f) const; From 030e34f8f78ac32517552644e17ce2eb8cb42e20 Mon Sep 17 00:00:00 2001 From: Chris Morgan Date: Thu, 29 Apr 2021 18:37:53 +1000 Subject: [PATCH 012/110] Add microarch id for Zen 3 Cezanne APUs Confirmed to match on a laptop with an AMD Ryzen 7 5800HS, I expect it will also cover other 5000 H-, U- and G-series APUs. --- src/PerfCounters_x86.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/PerfCounters_x86.h b/src/PerfCounters_x86.h index 1f8a7740c57..aafe0b5c8f6 100644 --- a/src/PerfCounters_x86.h +++ b/src/PerfCounters_x86.h @@ -89,6 +89,7 @@ static CpuMicroarch compute_cpu_microarch() { } break; case 0x20f10: // Vermeer (Zen 3) + case 0x50f00: // Cezanne (Zen 3) if (ext_family == 0xa) { return AMDZen; } From 565be8d5331d854f8594f30061b412a41d07c883 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Mon, 10 May 2021 14:59:12 +1200 Subject: [PATCH 013/110] Mention Pernosco as development sponsor --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 10f3caa4d3f..c938f250e0b 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ Please contribute! Make sure to review the [pull request checklist](/CONTRIBUTI If you find rr useful, please [add a testimonial](https://github.com/rr-debugger/rr/wiki/Testimonials). +rr development is sponsored by [Pernosco](https://pernos.co) and was originated by [Mozilla](https://www.mozilla.org). + # System requirements * Linux kernel ≥ 3.11 is required (for `PTRACE_SETSIGMASK`). From 01684a88e7e26fb0f3ab7ab4d7ae3475b6717d14 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 10 May 2021 13:29:03 -0700 Subject: [PATCH 014/110] Copy new bpf commands from kernel headers. --- src/kernel_supplement.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/kernel_supplement.h b/src/kernel_supplement.h index 03a6ff2beba..d4bac404496 100644 --- a/src/kernel_supplement.h +++ b/src/kernel_supplement.h @@ -358,6 +358,36 @@ enum { BPF_MAP_DELETE_ELEM, BPF_MAP_GET_NEXT_KEY, BPF_PROG_LOAD, + BPF_OBJ_PIN, + BPF_OBJ_GET, + BPF_PROG_ATTACH, + BPF_PROG_DETACH, + BPF_PROG_TEST_RUN, + BPF_PROG_GET_NEXT_ID, + BPF_MAP_GET_NEXT_ID, + BPF_PROG_GET_FD_BY_ID, + BPF_MAP_GET_FD_BY_ID, + BPF_OBJ_GET_INFO_BY_FD, + BPF_PROG_QUERY, + BPF_RAW_TRACEPOINT_OPEN, + BPF_BTF_LOAD, + BPF_BTF_GET_FD_BY_ID, + BPF_TASK_FD_QUERY, + BPF_MAP_LOOKUP_AND_DELETE_ELEM, + BPF_MAP_FREEZE, + BPF_BTF_GET_NEXT_ID, + BPF_MAP_LOOKUP_BATCH, + BPF_MAP_LOOKUP_AND_DELETE_BATCH, + BPF_MAP_UPDATE_BATCH, + BPF_MAP_DELETE_BATCH, + BPF_LINK_CREATE, + BPF_LINK_UPDATE, + BPF_LINK_GET_FD_BY_ID, + BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, + BPF_ITER_CREATE, + BPF_LINK_DETACH, + BPF_PROG_BIND_MAP, }; #ifndef O_PATH From ee71f989c2ee455e0cc3334d06ea82a20664228b Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 10 May 2021 13:51:19 -0700 Subject: [PATCH 015/110] Allow BPF_OBJ_GET. Fixes #2853. --- CMakeLists.txt | 1 + src/record_syscall.cc | 2 ++ src/test/bpf.c | 25 +++++++++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 src/test/bpf.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c37a673985..6d632a268d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -785,6 +785,7 @@ set(BASIC_TESTS big_buffers block block_open + bpf brk brk2 capget diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 9ab46436acf..de58ae26876 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -2038,6 +2038,8 @@ static Switchable prepare_bpf(RecordTask* t, case BPF_MAP_UPDATE_ELEM: case BPF_MAP_DELETE_ELEM: return PREVENT_SWITCH; + case BPF_OBJ_GET: + return ALLOW_SWITCH; case BPF_PROG_LOAD: { auto argsp = syscall_state.reg_parameter(2, IN); diff --git a/src/test/bpf.c b/src/test/bpf.c new file mode 100644 index 00000000000..0beb4e39731 --- /dev/null +++ b/src/test/bpf.c @@ -0,0 +1,25 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ +#include "util.h" + +#include +#include + +int bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ + return syscall(__NR_bpf, cmd, attr, size); +} + +int main(void) { + union bpf_attr attr; + + { + const char* filename = "foo"; + memset(&attr, 0, sizeof(attr)); + attr.pathname = (__u64)(uintptr_t)filename; + bpf(BPF_OBJ_GET, &attr, 1); + } + + atomic_puts("EXIT-SUCCESS"); + + return 0; +} From 2f1271d12a7827bff430d0a720fc69f363f21b3c Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 10 May 2021 13:55:18 -0700 Subject: [PATCH 016/110] Note failing bpf subcommand when we die. --- src/record_syscall.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/record_syscall.cc b/src/record_syscall.cc index de58ae26876..a1f22c1aae6 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -5620,6 +5620,9 @@ static string extra_expected_errno_info(RecordTask* t, case Arch::madvise: ss << "; unknown madvise(" << (int)t->regs().arg3() << ")"; break; + case Arch::bpf: + ss << "; unknown bpf(cmd=" << (int)t->regs().arg1() << ")"; + break; } break; case EIO: From 78611c3e9bb51f1080c87ca1a2453c31b5fbcc8c Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 11 May 2021 12:23:35 +1200 Subject: [PATCH 017/110] Make userfaultfd return ENOSYS in the hope this works for some users --- CMakeLists.txt | 1 + src/record_syscall.cc | 12 +++++++++++- src/syscalls.py | 2 +- src/test/userfaultfd.c | 10 ++++++++++ 4 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 src/test/userfaultfd.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d632a268d5..b079064840e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1141,6 +1141,7 @@ set(BASIC_TESTS unexpected_exit_pid_ns unjoined_thread unshare + userfaultfd utimes vfork_flush vfork_shared diff --git a/src/record_syscall.cc b/src/record_syscall.cc index a1f22c1aae6..235069499b6 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -3950,6 +3950,15 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, return PREVENT_SWITCH; } + case Arch::userfaultfd: { + // Pretend the kernel doesn't support this. + Registers r = regs; + r.set_arg1(0xffffffff); + t->set_regs(r); + syscall_state.emulate_result(-ENOSYS); + return PREVENT_SWITCH; + } + case Arch::memfd_create: { string name = t->read_c_str(remote_ptr(regs.arg1())); if (is_blacklisted_memfd(name.c_str())) { @@ -6154,13 +6163,14 @@ static void rec_process_syscall_arch(RecordTask* t, case Arch::io_setup: case Arch::madvise: case Arch::memfd_create: + case Arch::mprotect: case Arch::pread64: case Arch::preadv: case Arch::ptrace: case Arch::read: case Arch::readv: case Arch::sched_setaffinity: - case Arch::mprotect: { + case Arch::userfaultfd: { // Restore the registers that we may have altered. Registers r = t->regs(); r.set_orig_arg1(syscall_state.syscall_entry_registers.arg1()); diff --git a/src/syscalls.py b/src/syscalls.py index 39804403440..ee7d8b4f4a3 100644 --- a/src/syscalls.py +++ b/src/syscalls.py @@ -1671,7 +1671,7 @@ def __init__(self, **kwargs): bpf = IrregularEmulatedSyscall(x86=357, x64=321, generic=280) execveat = UnsupportedSyscall(x86=358, x64=322, generic=281) -userfaultfd = UnsupportedSyscall(x86=374, x64=323, generic=282) +userfaultfd = IrregularEmulatedSyscall(x86=374, x64=323, generic=282) membarrier = EmulatedSyscall(x86=375, x64=324, generic=283) mlock2 = UnsupportedSyscall(x86=376, x64=325, generic=284) copy_file_range = IrregularEmulatedSyscall(x86=377, x64=326, generic=285) diff --git a/src/test/userfaultfd.c b/src/test/userfaultfd.c new file mode 100644 index 00000000000..43342fe616e --- /dev/null +++ b/src/test/userfaultfd.c @@ -0,0 +1,10 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int ret = syscall(RR_userfaultfd, 0); + test_assert(ret == -1 && errno == ENOSYS); + atomic_puts("EXIT-SUCCESS"); + return 0; +} From 37d6685d821b8643ca8cc263cf5d628209dcf72a Mon Sep 17 00:00:00 2001 From: Sidharth Kshatriya Date: Sat, 8 May 2021 12:36:34 +0530 Subject: [PATCH 018/110] Make sure maybe_intercept_mem_request() does not write to invalid memory When dispatch_debugger_request() handles the DREQ_GET_MEM request, gdb can ask for a memory range that is invalid or partially valid. That is why a read_bytes_fallible() call is made. Depending on the actually number of bytes read, the `mem` buffer is resized. It could, for instance, be resized to 0 length. Then a maybe_intercept_mem_request() call is made. This function will always assume the memory range it needs to inspect (for writing a 0 word) is req.mem_.len long. However that is wrong -- it should be the the resized length of the buffer into which the read_bytes_fallible() call mentioned above wrote to. --- src/GdbServer.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/GdbServer.cc b/src/GdbServer.cc index bd5827358af..e93a57c5338 100644 --- a/src/GdbServer.cc +++ b/src/GdbServer.cc @@ -338,6 +338,7 @@ static bool is_in_patch_stubs(Task* t, remote_code_ptr ip) { void GdbServer::maybe_intercept_mem_request(Task* target, const GdbRequest& req, vector* result) { + DEBUG_ASSERT(req.mem_.len >= result->size()); /* Crazy hack! * When gdb tries to read the word at the top of the stack, and we're in our * dynamically-generated stub code, tell it the value is zero, so that gdb's @@ -348,7 +349,7 @@ void GdbServer::maybe_intercept_mem_request(Task* target, const GdbRequest& req, */ size_t size = word_size(target->arch()); if (target->regs().sp().as_int() >= req.mem_.addr && - target->regs().sp().as_int() + size <= req.mem_.addr + req.mem_.len && + target->regs().sp().as_int() + size <= req.mem_.addr + result->size() && is_in_patch_stubs(target, target->ip())) { memset(result->data() + target->regs().sp().as_int() - req.mem_.addr, 0, size); From 996ecf01675ae6b1dbe3efd82d4e0269814a63bf Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Wed, 12 May 2021 10:32:12 +1200 Subject: [PATCH 019/110] Search for external debuginfo by build-id --- src/SourcesCommand.cc | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 1e6645e23b7..d5a5331a828 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -332,7 +332,13 @@ find_auxiliary_file(const string& original_file_name, normalize_file_name(full_file_name); fd = ScopedFd(full_file_name.c_str(), O_RDONLY); if (fd.is_open()) { - goto found; + // Debian/Ubuntu built /lib/x86_64-linux-gnu/ld-2.31.so with a + // .gnu_debuglink of "ld-2.31.so", expecting it to be found at + // /usr/lib/debug/lib/x86_64-linux-gnu/ld-2.31.so. So we need to make + // sure we aren't using the binary file as its own debuginfo. + if (real_path(original_file_name) != real_path(full_file_name)) { + goto found; + } } LOG(warn) << "Can't find external debuginfo file " << full_file_name; @@ -378,6 +384,36 @@ find_auxiliary_file(const string& original_file_name, return reader; } +static unique_ptr +find_auxiliary_file_by_buildid(ElfFileReader& trace_file_reader, string& full_file_name) { + string build_id = trace_file_reader.read_buildid(); + if (build_id.empty()) { + LOG(warn) << "Main ELF binary has no build ID!"; + return nullptr; + } + if (build_id.size() < 3) { + LOG(warn) << "Build ID is too short!"; + return nullptr; + } + + string path = "/usr/lib/debug/.build-id/" + build_id.substr(0, 2) + "/" + build_id.substr(2) + ".debug"; + ScopedFd fd(path.c_str(), O_RDONLY); + if (!fd.is_open()) { + LOG(info) << "Can't find external debuginfo file " << path; + return nullptr; + } + + LOG(info) << "Examining external by buildid " << path; + auto reader = make_unique(fd); + if (!reader->ok()) { + LOG(warn) << "Not an ELF file!"; + return nullptr; + } + full_file_name = path; + return reader; +} + +// Traverse the compilation units of an auxiliary file to collect their source files static bool process_auxiliary_file(ElfFileReader& trace_file_reader, ElfFileReader& aux_file_reader, ElfFileReader* alt_file_reader, @@ -436,7 +472,10 @@ static bool try_debuglink_file(ElfFileReader& trace_file_reader, auto reader = find_auxiliary_file(original_file_name, aux_file_name, full_file_name); if (!reader) { - return false; + reader = find_auxiliary_file_by_buildid(trace_file_reader, full_file_name); + if (!reader) { + return false; + } } /* A debuglink file can have its own debugaltlink */ From 63bebd367b7fc9d21cd30be878fbc23b98334bb7 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 14 May 2021 13:07:19 +1200 Subject: [PATCH 020/110] Test write monitoring for multiply-mapped shared memory --- CMakeLists.txt | 1 + src/test/shared_monitor.c | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 src/test/shared_monitor.c diff --git a/CMakeLists.txt b/CMakeLists.txt index b079064840e..322104f82a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1059,6 +1059,7 @@ set(BASIC_TESTS setsid setuid shared_exec + shared_monitor shared_write shm shm_unmap diff --git a/src/test/shared_monitor.c b/src/test/shared_monitor.c new file mode 100644 index 00000000000..e7d6afc0eef --- /dev/null +++ b/src/test/shared_monitor.c @@ -0,0 +1,27 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int fd = memfd_create("temp", 0); + char buf[4096]; + int size = sizeof(buf); + int ret; + uint8_t* p; + memset(buf, 1, size); + ret = write(fd, buf, size); + test_assert(ret == size); + p = (uint8_t*)mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + + memset(buf, 0, size); + ret = pwrite(fd, buf, size, 0); + test_assert(ret == size); + + p = (uint8_t*)mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + test_assert(p[0] == 0); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} From c777d062ea242c40949499f6cb4863a6d3ceb81a Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 11 May 2021 12:01:51 -0700 Subject: [PATCH 021/110] Remove the attractive nuisance RR_BUILD_SHARED option. --- CMakeLists.txt | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 322104f82a1..572a221ac88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -544,29 +544,10 @@ else() set(CMAKE_INSTALL_INCLUDEDIR "include") endif() -option(RR_BUILD_SHARED "Build the rr shared library as well as the binary (experimental).") - -if(RR_BUILD_SHARED) - add_library(rr ${RR_SOURCES}) - set_target_properties(rr PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) - add_executable(rrbin src/main.cc) - set(RR_BIN rrbin) - post_build_executable(rrbin) - set_target_properties(rrbin PROPERTIES ENABLE_EXPORTS true OUTPUT_NAME rr) - set_target_properties(rrbin PROPERTIES INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") - set_target_properties(rrbin PROPERTIES INSTALL_RPATH_USE_LINK_PATH true) - set_target_properties(brotli PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_link_libraries(rrbin rr) - install(TARGETS rr - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -else() - add_executable(rr ${RR_SOURCES} src/main.cc) - set_target_properties(rr PROPERTIES ENABLE_EXPORTS true) - post_build_executable(rr) - set(RR_BIN rr) -endif() +add_executable(rr ${RR_SOURCES} src/main.cc) +set_target_properties(rr PROPERTIES ENABLE_EXPORTS true) +post_build_executable(rr) +set(RR_BIN rr) add_dependencies(rr Generated) option(strip "Strip debug info from rr binary") @@ -593,9 +574,6 @@ else() endif() set_target_properties(rr PROPERTIES LINK_FLAGS "${RR_MAIN_LINKER_FLAGS}") -if(RR_BUILD_SHARED) - set_target_properties(rrbin PROPERTIES LINK_FLAGS "${RR_MAIN_LINKER_FLAGS}") -endif() target_link_libraries(rrpreload ${CMAKE_DL_LIBS} From ed0964ee80756bfdcae76f7cc54aaecf8a2f8d6c Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 14 May 2021 09:44:44 -0700 Subject: [PATCH 022/110] Support F_NOTIFY. Closes #2856. --- CMakeLists.txt | 3 ++- src/kernel_abi.h | 1 + src/record_syscall.cc | 1 + src/test/fcntl_notify.c | 28 ++++++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 src/test/fcntl_notify.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 572a221ac88..52b9b8ec608 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -815,9 +815,10 @@ set(BASIC_TESTS fadvise fanotify fatal_sigsegv_thread - fcntl_dupfd x86/fault_in_code_page + fcntl_dupfd fcntl_misc + fcntl_notify fcntl_owner_ex fcntl_rw_hints fcntl_seals diff --git a/src/kernel_abi.h b/src/kernel_abi.h index 65a6de63d2e..bc02f8b9689 100644 --- a/src/kernel_abi.h +++ b/src/kernel_abi.h @@ -108,6 +108,7 @@ struct FcntlConstants { OFD_SETLK = 37, OFD_SETLKW = 38, // Other Linux-specific operations + NOTIFY = 0x400 + 2, DUPFD_CLOEXEC = 0x400 + 6, SETPIPE_SZ = 0x400 + 7, GETPIPE_SZ = 0x400 + 8, diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 235069499b6..d6aff8cc3a5 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -3551,6 +3551,7 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, case Arch::SETOWN_EX: case Arch::GETSIG: case Arch::SETSIG: + case Arch::NOTIFY: case Arch::SETPIPE_SZ: case Arch::GETPIPE_SZ: case Arch::ADD_SEALS: diff --git a/src/test/fcntl_notify.c b/src/test/fcntl_notify.c new file mode 100644 index 00000000000..6cb162076aa --- /dev/null +++ b/src/test/fcntl_notify.c @@ -0,0 +1,28 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +static char tmp_name[] = "tempXXXXXX"; +static int saw_sigio = 0; + +void catcher(__attribute__((unused)) int signum) { + saw_sigio = 1; +} + +int main(void) { + int fd, file_fd; + mkdtemp(tmp_name); + signal(SIGIO, catcher); + + fd = open(tmp_name, O_RDONLY | O_DIRECTORY); + test_assert(fd >= 0); + + fcntl(fd, F_NOTIFY, DN_CREATE); + + file_fd = openat(fd, "foo", O_RDWR | O_CREAT); + test_assert(file_fd >= 0); + test_assert(saw_sigio); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} From 95629d520c5eb99a87b4f2de10b06f6cc2ae78cb Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sat, 15 May 2021 12:20:20 +1200 Subject: [PATCH 023/110] Make EmuFile cloning not materialize hole data --- src/EmuFs.cc | 63 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 13 deletions(-) diff --git a/src/EmuFs.cc b/src/EmuFs.cc index 0de6e44e82f..0cb07be75a4 100644 --- a/src/EmuFs.cc +++ b/src/EmuFs.cc @@ -28,25 +28,62 @@ EmuFile::~EmuFile() { EmuFile::shr_ptr EmuFile::clone(EmuFs& owner) { auto f = EmuFile::create(owner, orig_path.c_str(), device(), inode(), size_); - uint64_t data[65536 / sizeof(uint64_t)]; + // We could try using FICLONE but tmpfs doesn't support that yet so let's just + // not bother for now. + + // Avoid copying holes. + vector buf; uint64_t offset = 0; while (offset < size_) { - ssize_t amount = min(size_ - offset, sizeof(data)); - ssize_t ret = pread64(fd(), data, amount, offset); - if (ret <= 0) { - FATAL() << "Couldn't read all the data"; + ssize_t ret = lseek(fd(), offset, SEEK_HOLE); + if (ret < 0) { + ret = size_; + } else { + if (uint64_t(ret) < offset) { + FATAL() << "lseek returned hole before requested offset"; + } } - // There could have been a short read - amount = ret; - uint8_t* data_ptr = reinterpret_cast(data); - while (amount > 0) { - ret = pwrite64(f->fd(), data_ptr, amount, offset); + uint64_t hole = ret; + // Copy data + while (offset < hole) { + loff_t off_in = offset; + loff_t off_out = offset; + ssize_t ncopied = syscall(NativeArch::copy_file_range, file.get(), &off_in, + f->fd().get(), &off_out, hole - offset, 0); + if (ncopied >= 0) { + if (ncopied == 0) { + FATAL() << "Didn't copy anything"; + } + offset += ncopied; + continue; + } + + ssize_t amount = min(hole - offset, 4*1024*1024); + buf.resize(amount); + ret = pread64(fd(), buf.data(), amount, offset); if (ret <= 0) { + FATAL() << "Couldn't read all the data"; + } + ssize_t written = pwrite_all_fallible(f->fd(), buf.data(), ret, offset); + if (written < ret) { FATAL() << "Couldn't write all the data"; } - amount -= ret; - data_ptr += ret; - offset += ret; + offset += written; + } + if (offset < size_) { + // Look for the end of the hole, if any + ret = lseek(fd(), offset, SEEK_DATA); + if (ret < 0) { + if (errno != ENXIO) { + FATAL() << "Couldn't find data"; + } + break; + } + if (uint64_t(ret) <= offset) { + FATAL() << "Zero sized hole?"; + } + // Skip the hole + offset = ret; } } From 5c6e992b2ad174e980dd3b08987218ceaf5a5db1 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 14 May 2021 02:05:27 +1200 Subject: [PATCH 024/110] Support mapping of sparse files by allowing raw-data records to contain holes --- CMakeLists.txt | 1 + src/AddressSpace.h | 2 +- src/AutoRemoteSyscalls.h | 2 -- src/RecordTask.cc | 51 +++++++++++++++++++++-------- src/RecordTask.h | 3 +- src/Task.cc | 69 ++++++++++++++++++++++++++++++++++------ src/Task.h | 14 ++++++++ src/TraceStream.cc | 62 +++++++++++++++++++++++++++++++++--- src/TraceStream.h | 36 +++++++++++++++++++-- src/record_syscall.cc | 34 +++++++++++++++++++- src/replay_syscall.cc | 36 +++++++++++++++++++-- src/rr_trace.capnp | 8 +++++ src/test/large_hole.c | 25 +++++++++++++++ 13 files changed, 305 insertions(+), 38 deletions(-) create mode 100644 src/test/large_hole.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 52b9b8ec608..a71887ef395 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -875,6 +875,7 @@ set(BASIC_TESTS keyctl kill_newborn kill_ptracee + large_hole large_write_deadlock legacy_ugid x86/lsl diff --git a/src/AddressSpace.h b/src/AddressSpace.h index 71d3a90fe39..48842e68edd 100644 --- a/src/AddressSpace.h +++ b/src/AddressSpace.h @@ -531,7 +531,7 @@ class AddressSpace : public HasTaskSet { remote_ptr new_addr, size_t new_num_bytes); /** - * Notify that data was written to this address space by rr or + * Notify that data will be (or may be) written to this address space by rr or * by the kernel. * |flags| can contain values from Task::WriteFlags. */ diff --git a/src/AutoRemoteSyscalls.h b/src/AutoRemoteSyscalls.h index 6150ed1ba01..b68f0670e6f 100644 --- a/src/AutoRemoteSyscalls.h +++ b/src/AutoRemoteSyscalls.h @@ -289,8 +289,6 @@ class AutoRemoteSyscalls { AutoRemoteSyscalls& operator=(const AutoRemoteSyscalls&) = delete; AutoRemoteSyscalls(const AutoRemoteSyscalls&) = delete; - void* operator new(size_t) = delete; - void operator delete(void*) = delete; }; } // namespace rr diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 8cfc6235cb2..97dfec9c958 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -1652,22 +1652,45 @@ void RecordTask::record_remote_writable(remote_ptr addr, } ssize_t RecordTask::record_remote_fallible(remote_ptr addr, - ssize_t num_bytes) { - ASSERT(this, num_bytes >= 0); + uintptr_t num_bytes, + const std::vector& holes) { + auto hole_iter = holes.begin(); + uintptr_t offset = 0; + vector buf; + while (offset < num_bytes) { + if (hole_iter != holes.end() && hole_iter->offset == offset) { + offset += hole_iter->size; + ++hole_iter; + continue; + } - if (record_remote_by_local_map(addr, num_bytes)) { - return num_bytes; - } + uintptr_t bytes = min(uintptr_t(4*1024*1024), num_bytes - offset); + if (hole_iter != holes.end()) { + ASSERT(this, hole_iter->offset > offset); + bytes = min(bytes, hole_iter->offset - offset); + } + if (record_remote_by_local_map(addr + offset, bytes)) { + offset += bytes; + continue; + } - vector buf; - ssize_t nread = 0; - if (!addr.is_null()) { - buf.resize(num_bytes); - nread = read_bytes_fallible(addr, num_bytes, buf.data()); - buf.resize(max(0, nread)); - } - trace_writer().write_raw(rec_tid, buf.data(), buf.size(), addr); - return nread; + if (addr) { + buf.resize(bytes); + ssize_t nread = read_bytes_fallible(addr + offset, bytes, buf.data()); + if (nread <= 0) { + if (offset == 0) { + return nread; + } + break; + } + trace_writer().write_raw_data(buf.data(), nread); + offset += nread; + } else { + offset += bytes; + } + } + trace_writer().write_raw_header(rec_tid, offset, addr, holes); + return offset; } void RecordTask::record_remote_even_if_null(remote_ptr addr, diff --git a/src/RecordTask.h b/src/RecordTask.h index 3c69d9b1591..ea072fbe5a8 100644 --- a/src/RecordTask.h +++ b/src/RecordTask.h @@ -378,7 +378,8 @@ class RecordTask : public Task { } // Record as much as we can of the bytes in this range. Will record only // contiguous mapped data starting at `addr`. - ssize_t record_remote_fallible(remote_ptr addr, ssize_t num_bytes); + ssize_t record_remote_fallible(remote_ptr addr, uintptr_t num_bytes, + const std::vector& holes = std::vector()); // Record as much as we can of the bytes in this range. Will record only // contiguous mapped-writable data starting at `addr`. void record_remote_writable(remote_ptr addr, ssize_t num_bytes); diff --git a/src/Task.cc b/src/Task.cc index 0c3c7aa4280..10842f2478d 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -2914,21 +2914,31 @@ void Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, return; } + ssize_t nwritten = write_bytes_helper_no_notifications(addr, buf_size, buf, ok, flags); + if (nwritten > 0) { + vm()->notify_written(addr, nwritten, flags); + } +} + +ssize_t Task::write_bytes_helper_no_notifications(remote_ptr addr, ssize_t buf_size, + const void* buf, bool* ok, uint32_t flags) { + ASSERT(this, buf_size >= 0) << "Invalid buf_size " << buf_size; + if (0 == buf_size) { + return 0; + } + if (uint8_t* local_addr = as->local_mapping(addr, buf_size)) { memcpy(local_addr, buf, buf_size); - return; + return buf_size; } if (!as->mem_fd().is_open()) { ssize_t nwritten = write_bytes_ptrace(addr, buf_size, static_cast(buf)); - if (nwritten > 0) { - vm()->notify_written(addr, nwritten, flags); - } if (ok && nwritten < buf_size) { *ok = false; } - return; + return nwritten; } errno = 0; @@ -2936,7 +2946,7 @@ void Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, // See comment in read_bytes_helper(). if (0 == nwritten && 0 == errno) { open_mem_fd(); - return write_bytes_helper(addr, buf_size, buf, ok, flags); + return write_bytes_helper_no_notifications(addr, buf_size, buf, ok, flags); } if (errno == EPERM) { FATAL() << "Can't write to /proc/" << tid << "/mem\n" @@ -2952,9 +2962,7 @@ void Task::write_bytes_helper(remote_ptr addr, ssize_t buf_size, << "Should have written " << buf_size << " bytes to " << addr << ", but only wrote " << nwritten; } - if (nwritten > 0) { - vm()->notify_written(addr, nwritten, flags); - } + return nwritten; } uint64_t Task::write_ranges(const vector& ranges, @@ -2974,6 +2982,49 @@ uint64_t Task::write_ranges(const vector& ranges, return result; } +void Task::write_zeroes(unique_ptr* remote, remote_ptr addr, size_t size) { + if (!size) { + return; + } + + bool remove_ok = true; + remote_ptr initial_addr = addr; + size_t initial_size = size; + vector zeroes; + while (size > 0) { + size_t bytes; + remote_ptr first_page = ceil_page_size(addr); + if (addr < first_page) { + bytes = min(first_page - addr, size); + } else { + if (remove_ok) { + remote_ptr last_page = floor_page_size(addr + size); + if (first_page < last_page) { + if (!*remote) { + *remote = make_unique(this); + } + int ret = (*remote)->syscall(syscall_number_for_madvise(arch()), first_page, last_page - first_page, MADV_REMOVE); + if (ret == 0) { + addr = last_page; + size -= last_page - first_page; + continue; + } + // Don't try MADV_REMOVE again + remove_ok = false; + } + } + bytes = min(4*1024*1024, size); + } + zeroes.resize(bytes); + memset(zeroes.data(), 0, bytes); + ssize_t written = write_bytes_helper_no_notifications(addr, bytes, zeroes.data(), nullptr, 0); + ASSERT(this, written == (ssize_t)bytes); + addr += bytes; + size -= bytes; + } + vm()->notify_written(initial_addr, initial_size, 0); +} + const TraceStream* Task::trace_stream() const { if (session().as_record()) { return &session().as_record()->trace_writer(); diff --git a/src/Task.h b/src/Task.h index 9b86f98aeb5..a214f87988f 100644 --- a/src/Task.h +++ b/src/Task.h @@ -728,6 +728,13 @@ class Task { uint64_t write_ranges(const std::vector& ranges, void* data, size_t size); + /** + * Writes zeroes to the given memory range. + * For efficiency tries using MADV_REMOVE via `remote`. Caches + * an AutoRemoteSyscalls in `*remote`. + */ + void write_zeroes(std::unique_ptr* remote, remote_ptr addr, size_t size); + /** * Don't use these helpers directly; use the safer and more * convenient variants above. @@ -749,6 +756,13 @@ class Task { void write_bytes_helper(remote_ptr addr, ssize_t buf_size, const void* buf, bool* ok = nullptr, uint32_t flags = 0); + /** + * |flags| is bits from WriteFlags. + * Returns number of bytes written. + */ + ssize_t write_bytes_helper_no_notifications(remote_ptr addr, ssize_t buf_size, + const void* buf, bool* ok = nullptr, + uint32_t flags = 0); SupportedArch detect_syscall_arch(); diff --git a/src/TraceStream.cc b/src/TraceStream.cc index 42109913c6d..536822e5afb 100644 --- a/src/TraceStream.cc +++ b/src/TraceStream.cc @@ -417,6 +417,11 @@ void TraceWriter::write_frame(RecordTask* t, const Event& ev, w.setTid(r.rec_tid); w.setAddr(r.addr.as_int()); w.setSize(r.size); + auto holes = w.initHoles(r.holes.size()); + for (size_t j = 0; j < r.holes.size(); ++j) { + holes[j].setOffset(r.holes[j].offset); + holes[j].setSize(r.holes[j].size); + } } raw_recs.clear(); frame.setArch(to_trace_arch(t->arch())); @@ -533,7 +538,14 @@ TraceFrame TraceReader::read_frame() { for (size_t i = 0; i < raw_recs.size(); ++i) { // Build list in reverse order so we can efficiently pull records from it auto w = mem_writes[raw_recs.size() - 1 - i]; - raw_recs[i] = { w.getAddr(), (size_t)w.getSize(), i32_to_tid(w.getTid()) }; + auto holes = w.getHoles(); + vector h; + h.resize(holes.size()); + for (size_t j = 0; j < h.size(); ++j) { + const auto& hole = holes[j]; + h[j] = { hole.getOffset(), hole.getSize() }; + } + raw_recs[i] = { w.getAddr(), (size_t)w.getSize(), i32_to_tid(w.getTid()), h }; } TraceFrame ret; @@ -1152,10 +1164,14 @@ KernelMapping TraceReader::read_mapped_region(MappedData* data, bool* found, map.getFileOffsetBytes()); } -void TraceWriter::write_raw(pid_t rec_tid, const void* d, size_t len, - remote_ptr addr) { +void TraceWriter::write_raw_header(pid_t rec_tid, size_t total_len, + remote_ptr addr, + const std::vector& holes = std::vector()) { + raw_recs.push_back({ addr, total_len, rec_tid, holes }); +} + +void TraceWriter::write_raw_data(const void* d, size_t len) { auto& data = writer(RAW_DATA); - raw_recs.push_back({ addr, len, rec_tid }); data.write(d, len); } @@ -1174,8 +1190,44 @@ bool TraceReader::read_raw_data_for_frame(RawData& d) { auto& rec = raw_recs[raw_recs.size() - 1]; d.rec_tid = rec.rec_tid; d.addr = rec.addr; + d.data.resize(rec.size); - reader(RAW_DATA).read((char*)d.data.data(), rec.size); + auto hole_iter = rec.holes.begin(); + uintptr_t offset = 0; + while (offset < d.data.size()) { + uintptr_t end = rec.size; + if (hole_iter != rec.holes.end()) { + if (offset == hole_iter->offset) { + memset(d.data.data() + offset, 0, hole_iter->size); + ++hole_iter; + offset += hole_iter->size; + continue; + } + end = hole_iter->offset; + } + reader(RAW_DATA).read((char*)d.data.data() + offset, end - offset); + offset = end; + } + + raw_recs.pop_back(); + return true; +} + +bool TraceReader::read_raw_data_for_frame_with_holes(RawDataWithHoles& d) { + if (raw_recs.empty()) { + return false; + } + auto& rec = raw_recs[raw_recs.size() - 1]; + d.rec_tid = rec.rec_tid; + d.addr = rec.addr; + d.holes = move(rec.holes); + size_t data_size = rec.size; + for (auto& h : d.holes) { + data_size -= h.size; + } + d.data.resize(data_size); + reader(RAW_DATA).read((char*)d.data.data(), data_size); + raw_recs.pop_back(); return true; } diff --git a/src/TraceStream.h b/src/TraceStream.h index d7d084aa9b2..bc356ca505c 100644 --- a/src/TraceStream.h +++ b/src/TraceStream.h @@ -28,6 +28,11 @@ class KernelMapping; class RecordTask; struct TraceUuid; +struct WriteHole { + uint64_t offset; + uint64_t size; +}; + /** * TraceStream stores all the data common to both recording and * replay. TraceWriter deals with recording-specific logic, and @@ -45,6 +50,7 @@ class TraceStream { remote_ptr addr; size_t size; pid_t rec_tid; + std::vector holes; }; /** @@ -213,8 +219,13 @@ class TraceWriter : public TraceStream { * 'addr' is the address in the tracee where the data came from/will be * restored to. */ - void write_raw(pid_t tid, const void* data, size_t len, - remote_ptr addr); + void write_raw(pid_t tid, const void* data, size_t len, remote_ptr addr) { + write_raw_data(data, len); + write_raw_header(tid, len, addr, std::vector()); + } + void write_raw_data(const void* data, size_t len); + void write_raw_header(pid_t tid, size_t total_len, remote_ptr addr, + const std::vector& holes); /** * Write a task event (clone or exec record) to the trace. @@ -313,7 +324,7 @@ class TraceReader : public TraceStream { public: /** * A parcel of recorded tracee data. |data| contains the data read - * from |addr| in the tracee. + * from |addr| in the tracee. `data` contains zeroes for holes. */ struct RawData { std::vector data; @@ -321,6 +332,16 @@ class TraceReader : public TraceStream { pid_t rec_tid; }; + /** + * Like RawData, but returns positions of holes. `data` excludes holes. + */ + struct RawDataWithHoles { + std::vector data; + remote_ptr addr; + pid_t rec_tid; + std::vector holes; + }; + /** * Read relevant data from the trace. * @@ -361,9 +382,18 @@ class TraceReader : public TraceStream { /** * Reads the next raw data record for last-read frame. If there are no more * raw data records for this frame, return false. + * Holes are filled with zeroes in the output buffer. */ bool read_raw_data_for_frame(RawData& d); + /** + * Reads the next raw data record for last-read frame. If there are no more + * raw data records for this frame, return false. + * Returns hole metadata so you can do something smarter with it than + * explicitly filling with zeroes. + */ + bool read_raw_data_for_frame_with_holes(RawDataWithHoles& d); + /** * Like read_raw_data_for_frame, but doesn't actually read the data bytes. * The array is resized but the data is not filled in. diff --git a/src/record_syscall.cc b/src/record_syscall.cc index d6aff8cc3a5..cc1f4c39ffa 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -5351,6 +5351,37 @@ static bool monitor_fd_for_mapping(RecordTask* mapped_t, int mapped_fd, const st return our_mapping_writable; } +static vector find_holes(RecordTask* t, int desc, uint64_t offset, uint64_t size) { + vector ret; + ScopedFd fd = t->open_fd(desc, O_RDONLY); + if (!fd.is_open()) { + return ret; + } + uint64_t file_end = offset + size; + while (offset < file_end) { + off64_t r = lseek(fd, offset, SEEK_HOLE); + if (r < 0) { + // SEEK_HOLE not supported? + return ret; + } + uint64_t hole = (uint64_t)r; + ASSERT(t, hole >= offset); + if (hole >= file_end) { + return ret; + } + r = lseek(fd, hole, SEEK_DATA); + if (r < 0) { + // ???? + return ret; + } + uint64_t data = min((uint64_t)r, file_end); + ASSERT(t, data > hole); + ret.push_back({ hole, data - hole }); + offset = data; + } + return ret; +} + static void process_mmap(RecordTask* t, size_t length, int prot, int flags, int fd, off_t offset_pages) { if (t->regs().syscall_failed()) { @@ -5423,7 +5454,8 @@ static void process_mmap(RecordTask* t, size_t length, int prot, int flags, TraceWriter::RECORD_IN_TRACE) { off64_t end = (off64_t)st.st_size - km.file_offset_bytes(); off64_t nbytes = min(end, (off64_t)km.size()); - ssize_t nread = t->record_remote_fallible(addr, nbytes); + vector holes = find_holes(t, fd, km.file_offset_bytes(), (uint64_t)nbytes); + ssize_t nread = t->record_remote_fallible(addr, nbytes, holes); if (!adjusted_size && nread != nbytes) { // If we adjusted the size, we're not guaranteed that the bytes we're // reading are actually valid (it could actually have been a zero-sized diff --git a/src/replay_syscall.cc b/src/replay_syscall.cc index 2bbb9a5be68..d99ded2c2ad 100644 --- a/src/replay_syscall.cc +++ b/src/replay_syscall.cc @@ -557,14 +557,46 @@ static void finish_anonymous_mmap(ReplayTask* t, AutoRemoteSyscalls& remote, device, inode, nullptr, &recorded_km, emu_file); } +static void write_mapped_data_with_holes(ReplayTask* t, const TraceReader::RawDataWithHoles& buf) { + unique_ptr remote; + size_t data_offset = 0; + size_t addr_offset = 0; + auto holes_iter = buf.holes.begin(); + while (data_offset < buf.data.size() || holes_iter != buf.holes.end()) { + if (holes_iter != buf.holes.end() && holes_iter->offset == addr_offset) { + t->write_zeroes(&remote, buf.addr + addr_offset, holes_iter->size); + addr_offset += holes_iter->size; + ++holes_iter; + continue; + } + size_t data_end = buf.data.size(); + if (holes_iter != buf.holes.end()) { + data_end = holes_iter->offset - addr_offset; + } + t->write_bytes_helper(buf.addr + addr_offset, data_end - data_offset, buf.data.data() + data_offset, + nullptr); + addr_offset += data_end - data_offset; + data_offset = data_end; + } +} + static void write_mapped_data(ReplayTask* t, remote_ptr rec_addr, size_t size, TraceReader::MappedData& data) { switch (data.source) { - case TraceReader::SOURCE_TRACE: - t->set_data_from_trace(); + case TraceReader::SOURCE_TRACE: { + TraceReader::RawDataWithHoles buf; + ASSERT(t, t->trace_reader().read_raw_data_for_frame_with_holes(buf)); + ASSERT(t, buf.addr == rec_addr); + // Note that this gets called for remaps and shared maps that refer to the same pages + // as previous maps and so the data we're recording might not be the initial data + // for those pages, but it is the inital data *for this mapping*. + write_mapped_data_with_holes(t, buf); + t->vm()->maybe_update_breakpoints(t, rec_addr.cast(), + buf.data.size()); break; + } case TraceReader::SOURCE_FILE: { ScopedFd file(data.file_name.c_str(), O_RDONLY); ASSERT(t, file.is_open()) << "Can't open " << data.file_name; diff --git a/src/rr_trace.capnp b/src/rr_trace.capnp index 2293b7fb2ac..43bdeedc451 100644 --- a/src/rr_trace.capnp +++ b/src/rr_trace.capnp @@ -195,10 +195,18 @@ struct TaskEvent { } } +struct WriteHole { + offset @0 :UInt64; + size @1 :UInt64; +} + struct MemWrite { tid @0 :Tid; addr @1 :RemotePtr; size @2 :UInt64; + # A list of regions where zeroes are written. These are not + # present in the compressed data. + holes @3 :List(WriteHole); } enum Arch { diff --git a/src/test/large_hole.c b/src/test/large_hole.c new file mode 100644 index 00000000000..6c387ed2678 --- /dev/null +++ b/src/test/large_hole.c @@ -0,0 +1,25 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { +#ifdef __x86_64__ + off64_t size = ((off64_t)100)*1024*1024*1024; + char* p; + int fd = open("big", O_RDWR | O_TRUNC | O_CREAT, 0700); + test_assert(pwrite64(fd, "x", 1, size) == 1); + p = (char*)mmap(NULL, size + 1, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + p[size/2] = 1; + p[size/2 + 65536] = 1; + test_assert(0 == munmap(p, size + 1)); + + test_assert(fallocate64(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, size/2, 4096) == 0); + p = (char*)mmap(NULL, size + 1, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + test_assert(p[size/2] == 0); + test_assert(p[size/2 + 65536] == 1); +#endif + atomic_puts("EXIT-SUCCESS"); + return 0; +} From d18f580d91a60f3eedecd7cb5ff5aaf0a5868e3b Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sat, 15 May 2021 23:38:19 +1200 Subject: [PATCH 025/110] Fix comment --- src/AddressSpace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AddressSpace.h b/src/AddressSpace.h index 48842e68edd..71d3a90fe39 100644 --- a/src/AddressSpace.h +++ b/src/AddressSpace.h @@ -531,7 +531,7 @@ class AddressSpace : public HasTaskSet { remote_ptr new_addr, size_t new_num_bytes); /** - * Notify that data will be (or may be) written to this address space by rr or + * Notify that data was written to this address space by rr or * by the kernel. * |flags| can contain values from Task::WriteFlags. */ From 29048db7c24d78038135e8a2a9fe5c6acd4b548e Mon Sep 17 00:00:00 2001 From: Sidharth Kshatriya Date: Mon, 17 May 2021 20:33:51 +0530 Subject: [PATCH 026/110] patch_syscall_with_hook() can fail; don't assume it always succeeds This could lead to a spurious Event::patch_syscall() being added to trace. Also: - Don't print any LOG message that you've patched syscall unless you've already exit_syscall_and_prepare_restart()-ed successfully - Tweak LOG message. Use word "Patching" instead of "Patched" to account for situation in which patching may actually fail when attempted. --- src/Monkeypatcher.cc | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/Monkeypatcher.cc b/src/Monkeypatcher.cc index 0847eacdf60..e3e57d53e35 100644 --- a/src/Monkeypatcher.cc +++ b/src/Monkeypatcher.cc @@ -606,6 +606,7 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { uint8_t* following_bytes = &bytes[MAXIMUM_LOOKBACK]; intptr_t syscallno = r.original_syscallno(); + bool success = false; for (auto& hook : syscall_hooks) { bool matches_hook = false; if ((!(hook.flags & PATCH_SYSCALL_INSTRUCTION_IS_LAST) && @@ -687,7 +688,12 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { return false; } - LOG(debug) << "Patched syscall at " << ip << " syscall " + // Get out of executing the current syscall before we patch it. + if (entering_syscall && !t->exit_syscall_and_prepare_restart()) { + return false; + } + + LOG(debug) << "Patching syscall at " << ip << " syscall " << syscall_name(syscallno, t->arch()) << " tid " << t->tid << " bytes " << bytes_to_string( @@ -695,26 +701,24 @@ bool Monkeypatcher::try_patch_syscall(RecordTask* t, bool entering_syscall) { min(bytes_count, sizeof(syscall_patch_hook::patch_region_bytes))); - // Get out of executing the current syscall before we patch it. - if (entering_syscall && !t->exit_syscall_and_prepare_restart()) { - return false; - } - - patch_syscall_with_hook(*this, t, hook); - - // Return to caller, which resume normal execution. - return true; + success = patch_syscall_with_hook(*this, t, hook); + break; } } - LOG(debug) << "Failed to patch syscall at " << ip << " syscall " - << syscall_name(syscallno, t->arch()) << " tid " << t->tid - << " bytes " - << bytes_to_string( - following_bytes, - min(bytes_count, - sizeof(syscall_patch_hook::patch_region_bytes))); - tried_to_patch_syscall_addresses.insert(ip); - return false; + + if (!success) { + LOG(debug) << "Failed to patch syscall at " << ip << " syscall " + << syscall_name(syscallno, t->arch()) << " tid " << t->tid + << " bytes " + << bytes_to_string( + following_bytes, + min(bytes_count, + sizeof(syscall_patch_hook::patch_region_bytes))); + tried_to_patch_syscall_addresses.insert(ip); + return false; + } + + return true; } // VDSOs are filled with overhead critical functions related to getting the From 52144d683b7b9e213918d61da5bc1b8b14974fab Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 18 May 2021 11:08:42 -0700 Subject: [PATCH 027/110] Fix error in data_end calculation. --- src/replay_syscall.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replay_syscall.cc b/src/replay_syscall.cc index d99ded2c2ad..176ce855bcd 100644 --- a/src/replay_syscall.cc +++ b/src/replay_syscall.cc @@ -571,7 +571,7 @@ static void write_mapped_data_with_holes(ReplayTask* t, const TraceReader::RawDa } size_t data_end = buf.data.size(); if (holes_iter != buf.holes.end()) { - data_end = holes_iter->offset - addr_offset; + data_end = data_offset + holes_iter->offset - addr_offset; } t->write_bytes_helper(buf.addr + addr_offset, data_end - data_offset, buf.data.data() + data_offset, nullptr); From 6c1c3e2594e1e8278ffc720e157b950c01780705 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Wed, 19 May 2021 01:46:36 +1200 Subject: [PATCH 028/110] Don't lose the last hole of the file --- src/record_syscall.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/record_syscall.cc b/src/record_syscall.cc index cc1f4c39ffa..0e9f95c78e2 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -5371,8 +5371,11 @@ static vector find_holes(RecordTask* t, int desc, uint64_t offset, ui } r = lseek(fd, hole, SEEK_DATA); if (r < 0) { - // ???? - return ret; + if (errno == ENXIO) { + r = file_end; + } else { + return ret; + } } uint64_t data = min((uint64_t)r, file_end); ASSERT(t, data > hole); From 1d826918ac3da223e652b2613e195d78f9c43a74 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Wed, 19 May 2021 11:17:37 +1200 Subject: [PATCH 029/110] Always write a data record even if we read no bytes (restores behavior that was changed by 5c6e992b2ad174e980dd3b08987218ceaf5a5db1) --- src/RecordTask.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 97dfec9c958..8e7a4adcc6a 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -1678,9 +1678,6 @@ ssize_t RecordTask::record_remote_fallible(remote_ptr addr, buf.resize(bytes); ssize_t nread = read_bytes_fallible(addr + offset, bytes, buf.data()); if (nread <= 0) { - if (offset == 0) { - return nread; - } break; } trace_writer().write_raw_data(buf.data(), nread); From 65631cb233609ce4e9e3967f51c8cdf3c5d0b339 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Wed, 19 May 2021 11:46:40 +1200 Subject: [PATCH 030/110] Add test for 52144d683b7b9e213918d61da5bc1b8b14974fab --- CMakeLists.txt | 1 + src/test/small_holes.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 src/test/small_holes.c diff --git a/CMakeLists.txt b/CMakeLists.txt index a71887ef395..f7c840869c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1074,6 +1074,7 @@ set(BASIC_TESTS sigtrap simple_threads_stress sioc + small_holes sock_names_opts spinlock_priorities splice diff --git a/src/test/small_holes.c b/src/test/small_holes.c new file mode 100644 index 00000000000..fb482115bb7 --- /dev/null +++ b/src/test/small_holes.c @@ -0,0 +1,20 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + char* p; + size_t page_size = sysconf(_SC_PAGESIZE); + int fd = open("small", O_RDWR | O_TRUNC | O_CREAT, 0700); + test_assert(0 == ftruncate(fd, page_size*7)); + pwrite(fd, "x", 1, page_size); + pwrite(fd, "y", 1, page_size*3); + pwrite(fd, "z", 1, page_size*5); + p = (char*)mmap(NULL, page_size*7, PROT_READ, MAP_SHARED, fd, 0); + test_assert(p != MAP_FAILED); + test_assert(p[page_size] == 'x'); + test_assert(p[page_size*3] == 'y'); + test_assert(p[page_size*5] == 'z'); + atomic_puts("EXIT-SUCCESS"); + return 0; +} From ebbbc730dfe341b258417348abfb8f6b170fb294 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Wed, 19 May 2021 12:33:38 +1200 Subject: [PATCH 031/110] Add forward compatibility version checking --- src/ReplaySession.cc | 6 ++++++ src/TraceInfoCommand.cc | 2 ++ src/TraceStream.cc | 3 +++ src/TraceStream.h | 8 ++++++++ src/rr_trace.capnp | 2 ++ 5 files changed, 21 insertions(+) diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index e279d00d168..fe40762b6ed 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -162,6 +162,12 @@ ReplaySession::ReplaySession(const std::string& dir, const Flags& flags) ticks_at_start_of_event(0), flags_(flags), trace_start_time(0) { + if (trace_in.required_forward_compatibility_version() > FORWARD_COMPATIBILITY_VERSION) { + CLEAN_FATAL() + << "This rr build is too old to replay the trace (we support forward compability version " + << FORWARD_COMPATIBILITY_VERSION << " but the trace needs " << trace_in.required_forward_compatibility_version(); + } + ticks_semantics_ = trace_in.ticks_semantics(); rrcall_base_ = trace_in.rrcall_base(); diff --git a/src/TraceInfoCommand.cc b/src/TraceInfoCommand.cc index e85794fcd56..539ee161675 100644 --- a/src/TraceInfoCommand.cc +++ b/src/TraceInfoCommand.cc @@ -61,6 +61,8 @@ static int dump_trace_info(const string& trace_dir, FILE* out) { fprintf(out, " \"cpuidFaulting\":%s,\n", trace.uses_cpuid_faulting() ? "true" : "false"); + fprintf(out, " \"requiredForwardCompatibilityVersion\":%d,\n", trace.required_forward_compatibility_version()); + const char* semantics; switch (trace.ticks_semantics()) { case TICKS_RETIRED_CONDITIONAL_BRANCHES: semantics = "rcb"; break; diff --git a/src/TraceStream.cc b/src/TraceStream.cc index 536822e5afb..e737e15d915 100644 --- a/src/TraceStream.cc +++ b/src/TraceStream.cc @@ -1370,6 +1370,7 @@ void TraceWriter::close(CloseStatus status, const TraceUuid* uuid) { header.setTicksSemantics( to_trace_ticks_semantics(PerfCounters::default_ticks_semantics())); header.setSyscallbufProtocolVersion(SYSCALLBUF_PROTOCOL_VERSION); + header.setRequiredForwardCompatibilityVersion(FORWARD_COMPATIBILITY_VERSION); header.setPreloadThreadLocalsRecorded(true); header.setRrcallBase(syscall_number_for_rrcall_init_preload(x86_64)); @@ -1538,6 +1539,7 @@ TraceReader::TraceReader(const string& dir) preload_thread_locals_recorded_ = header.getPreloadThreadLocalsRecorded(); ticks_semantics_ = from_trace_ticks_semantics(header.getTicksSemantics()); rrcall_base_ = header.getRrcallBase(); + required_forward_compatibility_version_ = header.getRequiredForwardCompatibilityVersion(); quirks_ = 0; { auto quirks = header.getQuirks(); @@ -1618,6 +1620,7 @@ TraceReader::TraceReader(const TraceReader& other) exclusion_range_ = other.exclusion_range_; quirks_ = other.quirks_; clear_fip_fdp_ = other.clear_fip_fdp_; + required_forward_compatibility_version_ = other.required_forward_compatibility_version_; } TraceReader::~TraceReader() {} diff --git a/src/TraceStream.h b/src/TraceStream.h index bc356ca505c..3e66e00f389 100644 --- a/src/TraceStream.h +++ b/src/TraceStream.h @@ -22,6 +22,11 @@ namespace rr { +/** + * Bump this when rr changes mean that traces produced by new rr can't be replayed by old rr. + */ +const int FORWARD_COMPATIBILITY_VERSION = 1; + struct CPUIDRecord; struct DisableCPUIDFeatures; class KernelMapping; @@ -480,6 +485,8 @@ class TraceReader : public TraceStream { int quirks() const { return quirks_; } + int required_forward_compatibility_version() const { return required_forward_compatibility_version_; } + private: CompressedReader& reader(Substream s) { return *readers[s]; } const CompressedReader& reader(Substream s) const { return *readers[s]; } @@ -498,6 +505,7 @@ class TraceReader : public TraceStream { bool chaos_mode_known_; bool chaos_mode_; int rrcall_base_; + int required_forward_compatibility_version_; SupportedArch arch_; int quirks_; }; diff --git a/src/rr_trace.capnp b/src/rr_trace.capnp index 43bdeedc451..e5fea788818 100644 --- a/src/rr_trace.capnp +++ b/src/rr_trace.capnp @@ -112,6 +112,8 @@ struct Header { # If in chaos mode, what was the global exclusion range. Useful for debugging. exclusionRangeStart @17 :RemotePtr; exclusionRangeEnd @18 :RemotePtr; + # Replaying this trace requires at least this forward-compabilitity-version + requiredForwardCompatibilityVersion @19 :Int32; } # A file descriptor belonging to a task From e9ec388d95ae05bc2eceb0361aea39510f08e17e Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Wed, 19 May 2021 12:47:35 +1200 Subject: [PATCH 032/110] Fix comment --- src/TraceStream.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/TraceStream.cc b/src/TraceStream.cc index e737e15d915..d2169b58a7f 100644 --- a/src/TraceStream.cc +++ b/src/TraceStream.cc @@ -42,9 +42,8 @@ namespace rr { // version number doesn't track the rr version number, because changes // to the trace format will be rare. // -// NB: if you *do* change the trace format for whatever reason, you -// MUST increment this version number. Otherwise users' old traces -// will become unreplayable and they won't know why. +// We don't plan to ever change this again. Instead, we use CapnpProto +// to define the trace format in an extensible way (see rr_trace.capnp). // #define TRACE_VERSION 85 From fce87138cf19a08974e916912f5eeb9fa8d16031 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Wed, 19 May 2021 21:07:51 -0700 Subject: [PATCH 033/110] Fix bugs when traversing raw data records in the presence of holes. --- src/TraceStream.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/TraceStream.cc b/src/TraceStream.cc index d2169b58a7f..a481cf3a343 100644 --- a/src/TraceStream.cc +++ b/src/TraceStream.cc @@ -1198,8 +1198,8 @@ bool TraceReader::read_raw_data_for_frame(RawData& d) { if (hole_iter != rec.holes.end()) { if (offset == hole_iter->offset) { memset(d.data.data() + offset, 0, hole_iter->size); - ++hole_iter; offset += hole_iter->size; + ++hole_iter; continue; } end = hole_iter->offset; @@ -1236,7 +1236,11 @@ bool TraceReader::read_raw_data_metadata_for_frame(RawDataMetadata& d) { return false; } d = raw_recs[raw_recs.size() - 1]; - reader(RAW_DATA).skip(d.size); + size_t data_size = d.size; + for (auto& h : d.holes) { + data_size -= h.size; + } + reader(RAW_DATA).skip(data_size); raw_recs.pop_back(); return true; } From 57c4dea2a2b77f5956dcd020d8fd53f0e80e08dc Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Thu, 20 May 2021 18:58:01 +1200 Subject: [PATCH 034/110] Fix find_holes to return holes relative to the given offset --- CMakeLists.txt | 1 + src/record_syscall.cc | 3 ++- src/test/shared_offset.c | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 src/test/shared_offset.c diff --git a/CMakeLists.txt b/CMakeLists.txt index f7c840869c2..b0495265a0f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1040,6 +1040,7 @@ set(BASIC_TESTS setuid shared_exec shared_monitor + shared_offset shared_write shm shm_unmap diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 0e9f95c78e2..d4d630049dd 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -5351,6 +5351,7 @@ static bool monitor_fd_for_mapping(RecordTask* mapped_t, int mapped_fd, const st return our_mapping_writable; } +// The returned hole offsets are relative to 'offset' static vector find_holes(RecordTask* t, int desc, uint64_t offset, uint64_t size) { vector ret; ScopedFd fd = t->open_fd(desc, O_RDONLY); @@ -5379,7 +5380,7 @@ static vector find_holes(RecordTask* t, int desc, uint64_t offset, ui } uint64_t data = min((uint64_t)r, file_end); ASSERT(t, data > hole); - ret.push_back({ hole, data - hole }); + ret.push_back({ hole - offset, data - hole }); offset = data; } return ret; diff --git a/src/test/shared_offset.c b/src/test/shared_offset.c new file mode 100644 index 00000000000..f1802f2169f --- /dev/null +++ b/src/test/shared_offset.c @@ -0,0 +1,20 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + size_t page_size = sysconf(_SC_PAGESIZE); + int fd = syscall(RR_memfd_create, "shared", 0); + if (fd < 0 && errno == ENOSYS) { + atomic_puts("SYS_memfd_create not supported on this kernel"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + test_assert(fd >= 0); + test_assert(0 == ftruncate(fd, page_size*2)); + char* map = (char*)mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, page_size); + test_assert(map != MAP_FAILED); + atomic_puts("EXIT-SUCCESS"); + return 0; +} From 2a90da31f04b1093ce50fed8d51d53bcf3d15b60 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Thu, 20 May 2021 19:12:15 +1200 Subject: [PATCH 035/110] Fix regression involving offset mutating under us --- src/record_syscall.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/record_syscall.cc b/src/record_syscall.cc index d4d630049dd..401ee35d61d 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -5358,6 +5358,7 @@ static vector find_holes(RecordTask* t, int desc, uint64_t offset, ui if (!fd.is_open()) { return ret; } + uint64_t file_start = offset; uint64_t file_end = offset + size; while (offset < file_end) { off64_t r = lseek(fd, offset, SEEK_HOLE); @@ -5380,7 +5381,7 @@ static vector find_holes(RecordTask* t, int desc, uint64_t offset, ui } uint64_t data = min((uint64_t)r, file_end); ASSERT(t, data > hole); - ret.push_back({ hole - offset, data - hole }); + ret.push_back({ hole - file_start, data - hole }); offset = data; } return ret; From 7469e113587b5c21df195e536c859c645669b1ba Mon Sep 17 00:00:00 2001 From: jalapenopuzzle <8386278+jalapenopuzzle@users.noreply.github.com> Date: Thu, 20 May 2021 16:24:34 +1000 Subject: [PATCH 036/110] Change test/shared_monitor.c to use syscall Allows compilng on RHEL7 --- src/test/shared_monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/shared_monitor.c b/src/test/shared_monitor.c index e7d6afc0eef..ea27038b51f 100644 --- a/src/test/shared_monitor.c +++ b/src/test/shared_monitor.c @@ -3,7 +3,7 @@ #include "util.h" int main(void) { - int fd = memfd_create("temp", 0); + int fd = syscall(SYS_memfd_create, "temp", 0); char buf[4096]; int size = sizeof(buf); int ret; From cd65670feca0902efac1c14b02834c4ba99c0221 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 21 May 2021 00:05:03 +1200 Subject: [PATCH 037/110] Print holes in 'rr dump' --- src/DumpCommand.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/DumpCommand.cc b/src/DumpCommand.cc index d7f44cf0738..bd941e428fd 100644 --- a/src/DumpCommand.cc +++ b/src/DumpCommand.cc @@ -258,8 +258,20 @@ static void dump_events_matching(TraceReader& trace, const DumpFlags& flags, TraceReader::RawDataMetadata data; while (process_raw_data && trace.read_raw_data_metadata_for_frame(data)) { if (flags.dump_recorded_data_metadata) { - fprintf(out, " { tid:%d, addr:%p, length:%p }\n", data.rec_tid, + fprintf(out, " { tid:%d, addr:%p, length:%p", data.rec_tid, (void*)data.addr.as_int(), (void*)data.size); + if (!data.holes.empty()) { + fputs(", holes:[", out); + bool first = true; + for (auto& h : data.holes) { + if (!first) { + fputs(", ", out); + } + fprintf(out, "%p-%p", (void*)h.offset, (void*)(h.offset + h.size)); + } + fputs("]", out); + } + fputs(" }\n", out); } } if (!flags.raw_dump) { From f922127b4ec4661edabda9cf274f91f31bc6b56f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Sun, 23 May 2021 14:24:02 +0200 Subject: [PATCH 038/110] Fix 32-bit build by adding cast. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Related to commit 5c6e992b2ad1. ---- error: invalid cast from type ‘uintptr_t’ {aka ‘unsigned int’} to type ‘uint64_t’ {aka ‘long long unsigned int’} --- src/RecordTask.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RecordTask.cc b/src/RecordTask.cc index 8e7a4adcc6a..3337b18ccfc 100644 --- a/src/RecordTask.cc +++ b/src/RecordTask.cc @@ -1667,7 +1667,7 @@ ssize_t RecordTask::record_remote_fallible(remote_ptr addr, uintptr_t bytes = min(uintptr_t(4*1024*1024), num_bytes - offset); if (hole_iter != holes.end()) { ASSERT(this, hole_iter->offset > offset); - bytes = min(bytes, hole_iter->offset - offset); + bytes = min(bytes, uintptr_t(hole_iter->offset) - offset); } if (record_remote_by_local_map(addr + offset, bytes)) { offset += bytes; From e627c19b0aaf7f18207cc1ce4d019f4b9ffe603b Mon Sep 17 00:00:00 2001 From: Bob131 Date: Sun, 23 May 2021 10:26:13 +1000 Subject: [PATCH 039/110] GdbServer: fix diversion exit notification If a diversion session exits during execution, the resumption request from GDB doesn't get a response and doesn't get consumed. This has two effects: - GDB may continue to process an inferior call without ever having been notified of a change in target state, and - the request can end up being reprocessing on the replay session a number of times, stepping it forward arbitrarily. The call_exit test should have picked this up, however the placement of the breakpoint masked the test failure by preventing the replay session from stepping. This commit fixes the issue by making sure that a diversion session exit triggers some sort of notification for GDB. This is a bit tricky, since a simple stop isn't sufficient to make GDB bail out of an inferior call but an inferior exit signals the end of the inferior. It just so happens, however, that using an exit notification as done in this commit causes the current implementation in GDB to revive the inferior object after its destruction in a way that seems to work. --- src/DiversionSession.cc | 2 ++ src/GdbServer.cc | 44 ++++++++++++++++++++++++++++------------- src/test/call_exit.py | 10 ++++++++++ 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/src/DiversionSession.cc b/src/DiversionSession.cc index c8a372e0bdb..f6ec7fe8be3 100644 --- a/src/DiversionSession.cc +++ b/src/DiversionSession.cc @@ -142,6 +142,8 @@ DiversionSession::DiversionResult DiversionSession::diversion_step( if (t->ptrace_event() == PTRACE_EVENT_EXIT) { handle_ptrace_exit_event(t); result.status = DIVERSION_EXITED; + result.break_status.task = t; + result.break_status.task_exit = true; return result; } diff --git a/src/GdbServer.cc b/src/GdbServer.cc index e93a57c5338..08fcc01b4bd 100644 --- a/src/GdbServer.cc +++ b/src/GdbServer.cc @@ -898,8 +898,9 @@ bool GdbServer::diverter_process_debugger_requests( } static bool is_last_thread_exit(const BreakStatus& break_status) { + // The task set may be empty if the task has already exited. return break_status.task_exit && - break_status.task->thread_group()->task_set().size() == 1; + break_status.task->thread_group()->task_set().size() <= 1; } static Task* is_in_exec(ReplayTimeline& timeline) { @@ -955,19 +956,33 @@ void GdbServer::maybe_notify_stop(const GdbRequest& req, stop_siginfo = *break_status.signal; LOG(debug) << "Stopping for signal " << stop_siginfo; } - if (is_last_thread_exit(break_status) && dbg->features().reverse_execution) { - do_stop = true; - memset(&stop_siginfo, 0, sizeof(stop_siginfo)); - if (req.cont().run_direction == RUN_FORWARD) { - // The exit of the last task in a thread group generates a fake SIGKILL, - // when reverse-execution is enabled, because users often want to run - // backwards from the end of the task. - stop_siginfo.si_signo = SIGKILL; - LOG(debug) << "Stopping for synthetic SIGKILL"; - } else { - // The start of the debuggee task-group should trigger a silent stop. - stop_siginfo.si_signo = 0; - LOG(debug) << "Stopping at start of execution while running backwards"; + if (is_last_thread_exit(break_status)) { + if (break_status.task->session().is_diversion()) { + // If the last task of a diversion session has exited, we need + // to make sure GDB knows it's unrecoverable. There's no good + // way to do this: a stop is insufficient, but an inferior exit + // typically signals the end of a debugging session. Using the + // latter approach appears to work, but stepping through GDB's + // processing of the event seems to indicate it isn't really + // supposed to. FIXME. + LOG(debug) << "Last task of diversion exiting. " + << "Notifying exit with synthetic SIGKILL"; + dbg->notify_exit_signal(SIGKILL); + return; + } else if (dbg->features().reverse_execution) { + do_stop = true; + memset(&stop_siginfo, 0, sizeof(stop_siginfo)); + if (req.cont().run_direction == RUN_FORWARD) { + // The exit of the last task in a thread group generates a fake SIGKILL, + // when reverse-execution is enabled, because users often want to run + // backwards from the end of the task. + stop_siginfo.si_signo = SIGKILL; + LOG(debug) << "Stopping for synthetic SIGKILL"; + } else { + // The start of the debuggee task-group should trigger a silent stop. + stop_siginfo.si_signo = 0; + LOG(debug) << "Stopping at start of execution while running backwards"; + } } } Task* t = break_status.task; @@ -1090,6 +1105,7 @@ GdbRequest GdbServer::divert(ReplaySession& replay) { if (result.status == DiversionSession::DIVERSION_EXITED) { diversion_refcount = 0; + maybe_notify_stop(req, result.break_status); req = GdbRequest(DREQ_NONE); break; } diff --git a/src/test/call_exit.py b/src/test/call_exit.py index 541e391122c..c03dab80903 100644 --- a/src/test/call_exit.py +++ b/src/test/call_exit.py @@ -6,9 +6,19 @@ send_gdb('c') expect_gdb('Breakpoint 1, main') +# Step over the breakpoint and into `atomic_puts' to make sure it +# doesn't influence the test. +send_gdb('step') +expect_gdb('atomic_puts \\(str=') + send_gdb('call (int)exit(0)') expect_gdb('while in a function called from GDB') +# Check sure we're still in the frame of `atomic_puts' and can still +# continue the replay. +send_gdb('finish') +expect_gdb('EXIT-SUCCESS') + restart_replay() expect_gdb('Breakpoint 1, main') From d3b38fc768a45fc119f635b099b322960d5c449a Mon Sep 17 00:00:00 2001 From: Bob131 Date: Tue, 25 May 2021 03:07:05 +1000 Subject: [PATCH 040/110] GdbServer: assert around DiversionSession::find_task Before [1], GdbServer::diverter_process_debugger_requests returned a NULL Task pointer to indicate that a diversion session had finished executing. When [1] changed GdbServer to explicitly track task UIDs, the code removed by this commit was added in what appears to be a mechanical translation of the previous code. The code path in question introduces a problem, however, in that it may cause rr to reprocess diversion requests on the underlying replay session and fail to notify GDB of a change in inferior state. It shouldn't ever be hit (nothing in the test suite hits it, at least), so the check is replaced with an assert. [1]: dcb5f759e76e813ba5d80f197ec469f0ad0b6ede --- src/GdbServer.cc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/GdbServer.cc b/src/GdbServer.cc index 08fcc01b4bd..1ab15b76d20 100644 --- a/src/GdbServer.cc +++ b/src/GdbServer.cc @@ -1091,11 +1091,7 @@ GdbRequest GdbServer::divert(ReplaySession& replay) { } Task* t = diversion_session->find_task(last_continue_tuid); - if (!t) { - diversion_refcount = 0; - req = GdbRequest(DREQ_NONE); - break; - } + DEBUG_ASSERT(t != nullptr); int signal_to_deliver; RunCommand command = From b184eae21ec97283e5269b568f8bacdae7e9a37d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Sun, 23 May 2021 13:41:28 +0200 Subject: [PATCH 041/110] Add support for ioctl FS_IOC_FIEMAP. Related issue #2289. --- src/kernel_abi.cc | 1 + src/kernel_abi.h | 20 ++++++++++++++++++++ src/record_syscall.cc | 9 +++++++++ src/test/ioctl_fs.c | 21 +++++++++++++++++++++ src/test/util.h | 1 + 5 files changed, 52 insertions(+) diff --git a/src/kernel_abi.cc b/src/kernel_abi.cc index 0f4c092e5e5..ec83361a45a 100644 --- a/src/kernel_abi.cc +++ b/src/kernel_abi.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/kernel_abi.h b/src/kernel_abi.h index bc02f8b9689..5f9683c3767 100644 --- a/src/kernel_abi.h +++ b/src/kernel_abi.h @@ -1708,6 +1708,26 @@ struct BaseArch : public wordsize, __u32 exe_fd; }; RR_VERIFY_TYPE(prctl_mm_map); + + struct fiemap_extent { + __u64 fe_logical; + __u64 fe_physical; + __u64 fe_length; + __u64 fe_reserved64[2]; + __u32 fe_flags; + __u32 fe_reserved[3]; + }; + RR_VERIFY_TYPE(fiemap_extent); + struct fiemap { + __u64 fm_start; + __u64 fm_length; + __u32 fm_flags; + __u32 fm_mapped_extents; + __u32 fm_extent_count; + __u32 fm_reserved; + struct fiemap_extent fm_extents[0]; + }; + RR_VERIFY_TYPE(fiemap); }; struct X64Arch : public BaseArch { diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 401ee35d61d..1326078a1a3 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -2004,6 +2005,14 @@ static Switchable prepare_ioctl(RecordTask* t, args.wLength); return PREVENT_SWITCH; } + case IOCTL_MASK_SIZE(FS_IOC_FIEMAP): { + auto argsp = remote_ptr(t->regs().arg3()); + auto args = t->read_mem(argsp); + size = sizeof(typename Arch::fiemap) + + sizeof(typename Arch::fiemap_extent) * args.fm_extent_count; + syscall_state.reg_parameter(3, size, IN_OUT); + return PREVENT_SWITCH; + } } /* These ioctls are mostly regular but require additional recording. */ diff --git a/src/test/ioctl_fs.c b/src/test/ioctl_fs.c index d9bcd8035fd..a371c30e53e 100644 --- a/src/test/ioctl_fs.c +++ b/src/test/ioctl_fs.c @@ -6,6 +6,9 @@ int main(void) { int fd = open("dummy.txt", O_RDWR | O_CREAT, 0600); long version; long flags; + char filebuf[4096] = {}; + char fmbuf[4096] = {}; + struct fiemap *fm; int ret; test_assert(fd >= 0); @@ -22,6 +25,24 @@ int main(void) { atomic_printf("flags=%lx\n", flags); } + test_assert(sizeof(filebuf) == write(fd, &filebuf, sizeof(filebuf))); + fm = (struct fiemap*)fmbuf; + fm->fm_start = 0; + fm->fm_flags = 0; + fm->fm_extent_count = (sizeof(fmbuf) - offsetof(struct fiemap, fm_extents)) / sizeof(fm->fm_extents[0]); + fm->fm_length = FIEMAP_MAX_OFFSET - fm->fm_start; + ret = ioctl(fd, FS_IOC_FIEMAP, fm); + if (ret < 0) { + test_assert(errno == ENOTTY); + } else { + atomic_printf("fm->fm_mapped_extents=%d\n", fm->fm_mapped_extents); + for (unsigned int i=0; i < fm->fm_mapped_extents; i++) { + struct fiemap_extent* fe = fm->fm_extents + i; + atomic_printf("i=%d fe_logical=0x%llx fe_physical=0x%llx fe_length=0x%llx fe_flags=0x%x\n", i, + fe->fe_logical, fe->fe_physical, fe->fe_length, fe->fe_flags); + } + } + atomic_puts("EXIT-SUCCESS"); return 0; } diff --git a/src/test/util.h b/src/test/util.h index 1ed2a989a3d..ee734af7436 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include From e613d1a9a2d6a4256ac46003c1b90f0da8d93f21 Mon Sep 17 00:00:00 2001 From: Simon Sobisch Date: Thu, 27 May 2021 10:31:41 +0200 Subject: [PATCH 042/110] gdbinit: query gdb.VERSION ... instead of executing a GDB command and trying to parse its string return (multiple lines) which includes a freely configurable prefix --- src/GdbServer.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/GdbServer.cc b/src/GdbServer.cc index 1ab15b76d20..f482755a059 100644 --- a/src/GdbServer.cc +++ b/src/GdbServer.cc @@ -124,8 +124,8 @@ static const string& gdb_rr_macros() { << "python\n" << "import re\n" << "m = re.compile(" - << "'.* ([0-9]+)\\.([0-9]+)(\\.([0-9]+))?.*'" - << ").match(gdb.execute('show version', False, True))\n" + << "'[^0-9]*([0-9]+)\.([0-9]+)(\.([0-9]+))?'" + << ").match(gdb.VERSION)\n" << "ver = int(m.group(1))*10000 + int(m.group(2))*100\n" << "if m.group(4):\n" << " ver = ver + int(m.group(4))\n" From fb90e881e7a6788a3bd1633c706765896dd6eada Mon Sep 17 00:00:00 2001 From: Marian Jancar Date: Fri, 28 May 2021 02:36:36 +0100 Subject: [PATCH 043/110] Fix escape sequence in gdb version regex --- src/GdbServer.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GdbServer.cc b/src/GdbServer.cc index f482755a059..83cfbb005f4 100644 --- a/src/GdbServer.cc +++ b/src/GdbServer.cc @@ -124,7 +124,7 @@ static const string& gdb_rr_macros() { << "python\n" << "import re\n" << "m = re.compile(" - << "'[^0-9]*([0-9]+)\.([0-9]+)(\.([0-9]+))?'" + << "'[^0-9]*([0-9]+)\\.([0-9]+)(\\.([0-9]+))?'" << ").match(gdb.VERSION)\n" << "ver = int(m.group(1))*10000 + int(m.group(2))*100\n" << "if m.group(4):\n" From 08e28732ffb8448455971b41592f0c059fcca7d8 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 28 May 2021 14:12:16 +1200 Subject: [PATCH 044/110] Silence warning --- src/util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util.cc b/src/util.cc index ba8345703c7..b88b225216a 100644 --- a/src/util.cc +++ b/src/util.cc @@ -2182,7 +2182,7 @@ void SAFE_FATAL(int err, const char *msg) {.iov_base = (char*)msg, .iov_len=strlen(msg)}, {.iov_base = nl, .iov_len=sizeof(nl)} }; - ::writev(STDERR_FILENO, out, sizeof(out)/sizeof(struct iovec)); + (void)::writev(STDERR_FILENO, out, sizeof(out)/sizeof(struct iovec)); abort(); } From 1bf2fdf10a570652a2a9b21bc021817cd3e7c73f Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 28 May 2021 14:00:33 +1200 Subject: [PATCH 045/110] Support pkey_* syscalls --- CMakeLists.txt | 1 + src/ReplaySession.cc | 11 ++++++++-- src/Task.cc | 4 +++- src/record_syscall.cc | 2 ++ src/replay_syscall.cc | 9 +++++--- src/syscalls.py | 6 +++--- src/test/pkeys.c | 50 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 74 insertions(+), 9 deletions(-) create mode 100644 src/test/pkeys.c diff --git a/CMakeLists.txt b/CMakeLists.txt index b0495265a0f..b925aa111c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -946,6 +946,7 @@ set(BASIC_TESTS pid_ns_reap pid_ns_segv pidfd + pkeys poll_sig_race ppoll prctl diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index fe40762b6ed..13aff6dec14 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -1099,8 +1099,15 @@ void ReplaySession::check_ticks_consistency(ReplayTask* t, const Event& ev) { } static bool treat_signal_event_as_deterministic(const SignalEvent& ev) { - return ev.deterministic == DETERMINISTIC_SIG && - ev.siginfo.si_signo != SIGBUS; + if (ev.siginfo.si_signo == SIGBUS) { + return false; + } + if (ev.siginfo.si_signo == SIGSEGV && ev.siginfo.si_code == SEGV_PKUERR) { + // We don't set up memory protection key state, so pkey-triggered signals + // won't happen. + return false; + } + return ev.deterministic == DETERMINISTIC_SIG; } /** diff --git a/src/Task.cc b/src/Task.cc index 10842f2478d..d5091ed8ff0 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -570,7 +570,8 @@ void Task::on_syscall_exit_arch(int syscallno, const Registers& regs) { // failing. // SYS_rrcall_mprotect_record always fails with ENOSYS, though we want to // note its usage here. - if (regs.syscall_failed() && !is_mprotect_syscall(syscallno, regs.arch())) { + if (regs.syscall_failed() && !is_mprotect_syscall(syscallno, regs.arch()) + && !is_pkey_mprotect_syscall(syscallno, regs.arch())) { return; } @@ -584,6 +585,7 @@ void Task::on_syscall_exit_arch(int syscallno, const Registers& regs) { return; } + case Arch::pkey_mprotect: case Arch::mprotect: { remote_ptr addr = regs.orig_arg1(); size_t num_bytes = regs.arg2(); diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 1326078a1a3..a0801051685 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -4725,6 +4725,7 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, prepare_mmap_register_params(t); return PREVENT_SWITCH; + case Arch::pkey_mprotect: case Arch::mprotect: // Since we're stripping MAP_GROWSDOWN from kernel mmap calls, we need // to implement PROT_GROWSDOWN ourselves. @@ -6211,6 +6212,7 @@ static void rec_process_syscall_arch(RecordTask* t, case Arch::madvise: case Arch::memfd_create: case Arch::mprotect: + case Arch::pkey_mprotect: case Arch::pread64: case Arch::preadv: case Arch::ptrace: diff --git a/src/replay_syscall.cc b/src/replay_syscall.cc index 176ce855bcd..d508e1188c5 100644 --- a/src/replay_syscall.cc +++ b/src/replay_syscall.cc @@ -1127,6 +1127,7 @@ static void rep_process_syscall_arch(ReplayTask* t, ReplayTraceStep* step, switch (non_negative_syscall(sys)) { case Arch::madvise: case Arch::mprotect: + case Arch::pkey_mprotect: case Arch::sigreturn: case Arch::rt_sigreturn: break; @@ -1214,19 +1215,21 @@ static void rep_process_syscall_arch(ReplayTask* t, ReplayTraceStep* step, case Arch::munmap: case Arch::mprotect: case Arch::modify_ldt: + case Arch::pkey_mprotect: case Arch::set_thread_area: { // Using AutoRemoteSyscalls here fails for arch_prctl, not sure why. Registers r = t->regs(); - r.set_syscallno(sys); + int modified_sys = sys == Arch::pkey_mprotect ? Arch::mprotect : sys; + r.set_syscallno(modified_sys); r.set_ip(r.ip().decrement_by_syscall_insn_length(r.arch())); t->set_regs(r); - if (sys == Arch::mprotect) { + if (modified_sys == Arch::mprotect) { t->vm()->fixup_mprotect_growsdown_parameters(t); } t->enter_syscall(); t->exit_syscall(); ASSERT(t, t->regs().syscall_result() == trace_regs.syscall_result()); - if (sys == Arch::mprotect) { + if (modified_sys == Arch::mprotect) { Registers r2 = t->regs(); r2.set_arg1(r.arg1()); r2.set_arg2(r.arg2()); diff --git a/src/syscalls.py b/src/syscalls.py index ee7d8b4f4a3..8600fce6677 100644 --- a/src/syscalls.py +++ b/src/syscalls.py @@ -1677,9 +1677,9 @@ def __init__(self, **kwargs): copy_file_range = IrregularEmulatedSyscall(x86=377, x64=326, generic=285) preadv2 = UnsupportedSyscall(x86=378, x64=327, generic=286) pwritev2 = UnsupportedSyscall(x86=379, x64=328, generic=287) -pkey_mprotect = UnsupportedSyscall(x86=380, x64=329, generic=288) -pkey_alloc = UnsupportedSyscall(x86=381, x64=330, generic=289) -pkey_free = UnsupportedSyscall(x86=382, x64=331, generic=290) +pkey_mprotect = IrregularEmulatedSyscall(x86=380, x64=329, generic=288) +pkey_alloc = EmulatedSyscall(x86=381, x64=330, generic=289) +pkey_free = EmulatedSyscall(x86=382, x64=331, generic=290) statx = EmulatedSyscall(x86=383, x64=332, generic=291, arg5="typename Arch::statx_struct") io_pgetevents = UnsupportedSyscall(x86=385, x64=333, generic=292) rseq = UnsupportedSyscall(x86=386, x64=334, generic=293) diff --git a/src/test/pkeys.c b/src/test/pkeys.c new file mode 100644 index 00000000000..cdc9d435c89 --- /dev/null +++ b/src/test/pkeys.c @@ -0,0 +1,50 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +static void wrpkru(unsigned int pkru) { + unsigned int eax = pkru; + unsigned int ecx = 0; + unsigned int edx = 0; + + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); +} + +static char* p; + +static void unset_pkey(__attribute__((unused)) int sig) { + pkey_mprotect(p, 4096, PROT_READ | PROT_WRITE, 0); +} + +int main(void) { + int pkey = pkey_alloc(0, 0); + int ret; + if (pkey < 0 && errno == ENOSYS) { + atomic_puts("pkeys not supported in kernel, skipping"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + if (pkey < 0 && (errno == ENOSPC || errno == EINVAL)) { + atomic_puts("pkeys not supported on this system, skipping"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + test_assert(pkey >= 0); + + p = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + test_assert(p != MAP_FAILED); + ret = pkey_mprotect(p, 4096, PROT_READ | PROT_WRITE, pkey); + test_assert(ret == 0); + p[0] = 1; + + wrpkru(PKEY_DISABLE_ACCESS << (2 * pkey)); + signal(SIGSEGV, unset_pkey); + p[0] = 2; + + ret = pkey_free(pkey); + test_assert(ret == 0); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} From c62a7c1f99012a4b4c81d2606c7f235e80edfaf6 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 28 May 2021 23:40:39 +1200 Subject: [PATCH 046/110] Nondeterminisic signals that would normally be fatal to an init process are ignored by the kernel, so don't treat them as fatal --- CMakeLists.txt | 1 + src/RecordSession.cc | 24 +++++++++++++---------- src/test/fatal_init_signal.c | 38 ++++++++++++++++++++++++++++++++++++ src/test/pid_ns_reap.c | 1 + src/test/pid_ns_segv.c | 1 + 5 files changed, 55 insertions(+), 10 deletions(-) create mode 100644 src/test/fatal_init_signal.c diff --git a/CMakeLists.txt b/CMakeLists.txt index b925aa111c4..043dd9ed6a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -814,6 +814,7 @@ set(BASIC_TESTS exit_with_syscallbuf_signal fadvise fanotify + fatal_init_signal fatal_sigsegv_thread x86/fault_in_code_page fcntl_dupfd diff --git a/src/RecordSession.cc b/src/RecordSession.cc index 8bbb751195c..d861eda250e 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -1579,8 +1579,11 @@ bool RecordSession::signal_state_changed(RecordTask* t, StepState* step_state) { // A SIGSTOP requires us to allow switching to another task. // So does a fatal, core-dumping signal, since we need to allow other // tasks to proceed to their exit events. - bool is_fatal = t->ev().Signal().disposition == DISPOSITION_FATAL; bool is_deterministic = t->ev().Signal().deterministic == DETERMINISTIC_SIG; + // Signals that would normally be fatal are just ignored for init processes, + // unless they're deterministic. + bool is_fatal = t->ev().Signal().disposition == DISPOSITION_FATAL && + (!t->is_container_init() || is_deterministic); Switchable can_switch = ((is_fatal && is_coredumping_signal(sig)) || sig == SIGSTOP) ? ALLOW_SWITCH : PREVENT_SWITCH; @@ -1622,15 +1625,16 @@ bool RecordSession::signal_state_changed(RecordTask* t, StepState* step_state) { RecordTask::ALLOW_RESET_SYSCALLBUF, &r); // Don't actually set_regs(r), the kernel does these modifications. - // If the task is a container init, the kernel will ignore injection - // of fatal signals. Usually, the kernel removes the killable-protection - // when a determinisic fatal signal gets executed, but (due to what is - // arguably a bug) when a ptracer is attached, this does not happen. - // If we try to inject it here, the kernel will just ignore it, - // and we'll go around again. As a hack, we detach here, in the - // expectation that the deterministic instruction will run again and - // actually kill the task now that it isn't under ptrace control anymore. - if (t->is_container_init() && is_fatal && is_deterministic) { + if (t->is_container_init() && is_fatal) { + // Nondeterministic signals were already filtered out. + ASSERT(t, is_deterministic); + // Usually, the kernel removes the killable-protection from an init process + // when a determinisic fatal signal gets executed, but (due to what is + // arguably a bug) when a ptracer is attached, this does not happen. + // If we try to inject it here, the kernel will just ignore it, + // and we'll go around again. As a hack, we detach here, in the + // expectation that the deterministic instruction will run again and + // actually kill the task now that it isn't under ptrace control anymore. t->destroy_buffers(nullptr, nullptr); WaitStatus exit_status = WaitStatus::for_fatal_sig(sig); record_exit_trace_event(t, exit_status); diff --git a/src/test/fatal_init_signal.c b/src/test/fatal_init_signal.c new file mode 100644 index 00000000000..0c975dfa738 --- /dev/null +++ b/src/test/fatal_init_signal.c @@ -0,0 +1,38 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" +#include "nsutils.h" + +static char ch = 1; + +int main(void) { + pid_t pid; + int status; + int ret; + if (-1 == try_setup_ns(CLONE_NEWPID)) { + // We may not have permission to set up namespaces, so bail. + atomic_puts("Insufficient permissions, skipping test"); + atomic_puts("EXIT-SUCCESS"); + return 77; + } + + // This is the first child, therefore PID 1 in its PID namespace + pid = fork(); + test_assert(pid >= 0); + if (pid == 0) { + test_assert(getpid() == 1); + // This will be nonfatal because we don't have a handler for it. + kill(getpid(), SIGQUIT); + // Ensure at least one tick + if (ch == 1) { + ch = 3; + } + return 55; + } + + ret = waitpid(pid, &status, 0); + test_assert(ret == pid); + test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 55); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/pid_ns_reap.c b/src/test/pid_ns_reap.c index c950ded876e..f789573a9ce 100644 --- a/src/test/pid_ns_reap.c +++ b/src/test/pid_ns_reap.c @@ -14,6 +14,7 @@ int main(void) { pid_t pid; if (-1 == try_setup_ns(CLONE_NEWPID)) { /* We may not have permission to set up namespaces, so bail. */ + atomic_puts("Insufficient permissions, skipping test"); atomic_puts("EXIT-SUCCESS"); return 77; } diff --git a/src/test/pid_ns_segv.c b/src/test/pid_ns_segv.c index a5a52b7db02..b0a0599199d 100644 --- a/src/test/pid_ns_segv.c +++ b/src/test/pid_ns_segv.c @@ -7,6 +7,7 @@ int main(void) { pid_t pid; if (-1 == try_setup_ns(CLONE_NEWPID)) { // We may not have permission to set up namespaces, so bail. + atomic_puts("Insufficient permissions, skipping test"); atomic_puts("EXIT-SUCCESS"); return 77; } From 0fcc204ce3d02c5e5a535f39de9db9cc0aa67bc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Sat, 29 May 2021 10:12:44 +0200 Subject: [PATCH 047/110] Enable ioperm syscall. Found to be used in xserver-xorg. --- CMakeLists.txt | 1 + src/syscalls.py | 2 +- src/test/ioperm.c | 11 +++++++++++ src/test/util.h | 1 + 4 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 src/test/ioperm.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 043dd9ed6a6..3ac8293f998 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -870,6 +870,7 @@ set(BASIC_TESTS ioctl_pty ioctl_sg ioctl_tty + ioperm join_threads joystick kcmp diff --git a/src/syscalls.py b/src/syscalls.py index 8600fce6677..8126656e1d0 100644 --- a/src/syscalls.py +++ b/src/syscalls.py @@ -546,7 +546,7 @@ def __init__(self, **kwargs): # to a statfs structure defined approximately as follows: fstatfs = EmulatedSyscall(x86=100, x64=138, generic=44, arg2="struct Arch::statfs") -ioperm = UnsupportedSyscall(x86=101, x64=173) +ioperm = EmulatedSyscall(x86=101, x64=173) # int socketcall(int call, unsigned long *args) # diff --git a/src/test/ioperm.c b/src/test/ioperm.c new file mode 100644 index 00000000000..533941ea9a2 --- /dev/null +++ b/src/test/ioperm.c @@ -0,0 +1,11 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int ret; + ret = ioperm(0, 1024, 1); + atomic_printf("ioperm returned %d\n", ret); + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/util.h b/src/test/util.h index ee734af7436..dcd7b4a5d2f 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -68,6 +68,7 @@ #include #include #include +#include #include #include #include From 5007ed69b8b6a388029154bc192fa4efa3027109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Sat, 29 May 2021 10:17:00 +0200 Subject: [PATCH 048/110] Enable iopl syscall. Found to be used in xserver-xorg. --- CMakeLists.txt | 1 + src/syscalls.py | 2 +- src/test/iopl.c | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 src/test/iopl.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 3ac8293f998..5170a644acd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -871,6 +871,7 @@ set(BASIC_TESTS ioctl_sg ioctl_tty ioperm + iopl join_threads joystick kcmp diff --git a/src/syscalls.py b/src/syscalls.py index 8126656e1d0..188359ec3f4 100644 --- a/src/syscalls.py +++ b/src/syscalls.py @@ -570,7 +570,7 @@ def __init__(self, **kwargs): lstat = EmulatedSyscall(x86=107, x64=6, arg2="struct Arch::stat") fstat = EmulatedSyscall(x86=108, x64=5, generic=80, arg2="struct Arch::stat") olduname = UnsupportedSyscall(x86=109) -iopl = UnsupportedSyscall(x86=110, x64=172) +iopl = EmulatedSyscall(x86=110, x64=172) vhangup = UnsupportedSyscall(x86=111, x64=153, generic=58) idle = UnsupportedSyscall(x86=112) vm86old = UnsupportedSyscall(x86=113) diff --git a/src/test/iopl.c b/src/test/iopl.c new file mode 100644 index 00000000000..c1ee876bb1e --- /dev/null +++ b/src/test/iopl.c @@ -0,0 +1,11 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int ret; + ret = iopl(3); + atomic_printf("iopl returned %d\n", ret); + atomic_puts("EXIT-SUCCESS"); + return 0; +} From 7657223b714344c69ecca58b221aa50b61298a92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Tue, 1 Jun 2021 00:08:34 +0200 Subject: [PATCH 049/110] Add ioctl VT_OPENQRY, VT_GETSTATE and KDGKBMODE. Found to be used in xserver-xorg. --- CMakeLists.txt | 1 + src/kernel_abi.cc | 1 + src/kernel_abi.h | 7 +++++++ src/record_syscall.cc | 11 ++++++++++- src/test/ioctl_vt.c | 29 +++++++++++++++++++++++++++++ src/test/util.h | 2 ++ 6 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 src/test/ioctl_vt.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 5170a644acd..8bbeccabe5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -870,6 +870,7 @@ set(BASIC_TESTS ioctl_pty ioctl_sg ioctl_tty + ioctl_vt ioperm iopl join_threads diff --git a/src/kernel_abi.cc b/src/kernel_abi.cc index ec83361a45a..685ec679b50 100644 --- a/src/kernel_abi.cc +++ b/src/kernel_abi.cc @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/src/kernel_abi.h b/src/kernel_abi.h index 5f9683c3767..a9031704b33 100644 --- a/src/kernel_abi.h +++ b/src/kernel_abi.h @@ -1728,6 +1728,13 @@ struct BaseArch : public wordsize, struct fiemap_extent fm_extents[0]; }; RR_VERIFY_TYPE(fiemap); + + struct vt_stat { + unsigned short v_active; + unsigned short v_signal; + unsigned short v_state; + }; + RR_VERIFY_TYPE(vt_stat); }; struct X64Arch : public BaseArch { diff --git a/src/record_syscall.cc b/src/record_syscall.cc index a0801051685..4e75de11e1b 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1731,9 +1733,11 @@ static Switchable prepare_ioctl(RecordTask* t, syscall_state.reg_parameter(3); return PREVENT_SWITCH; + case KDGKBMODE: case TIOCINQ: case TIOCOUTQ: case TIOCGETD: + case VT_OPENQRY: syscall_state.reg_parameter(3); return PREVENT_SWITCH; @@ -1772,7 +1776,7 @@ static Switchable prepare_ioctl(RecordTask* t, syscall_state.reg_parameter(3); return PREVENT_SWITCH; - case SG_IO: + case SG_IO: { auto argsp = syscall_state.reg_parameter(3, IN_OUT); auto args = t->read_mem(argsp); syscall_state.mem_ptr_parameter(REMOTE_PTR_FIELD(argsp, dxferp), args.dxfer_len); @@ -1780,6 +1784,11 @@ static Switchable prepare_ioctl(RecordTask* t, syscall_state.mem_ptr_parameter(REMOTE_PTR_FIELD(argsp, sbp), args.mx_sb_len); //usr_ptr: This value is not acted upon by the sg driver. return PREVENT_SWITCH; + } + + case VT_GETSTATE: + syscall_state.reg_parameter(3); + return PREVENT_SWITCH; } /* In ioctl language, "_IOC_READ" means "outparam". Both diff --git a/src/test/ioctl_vt.c b/src/test/ioctl_vt.c new file mode 100644 index 00000000000..e18c9d9002d --- /dev/null +++ b/src/test/ioctl_vt.c @@ -0,0 +1,29 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int fd; + int vt; + struct vt_stat vts; + int tty_mode; + + fd = open("/dev/tty0", O_RDWR); + if (fd < 0) { + atomic_puts("Can't open tty, aborting test"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + + test_assert(0 == ioctl(fd, VT_OPENQRY, &vt)); + atomic_printf("VT_OPENQRY returned %d\n", vt); + + test_assert(0 == ioctl(fd, VT_GETSTATE, &vts)); + atomic_printf("VT_GETSTATE returned v_active=%d\n", vts.v_active); + + test_assert(0 == ioctl(fd, KDGKBMODE, &tty_mode)); + atomic_printf("KDGKBMODE returned %d\n", tty_mode); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/util.h b/src/test/util.h index dcd7b4a5d2f..150b91eb0a4 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #include #include #include From 343ea36659b71f2458fe7752bdd9c4e48baba124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Tue, 1 Jun 2021 00:10:03 +0200 Subject: [PATCH 050/110] Add ioctl FBIOGET_FSCREENINFO and FBIOGET_VSCREENINFO. Found to be used in xserver-xorg. --- CMakeLists.txt | 1 + src/kernel_abi.cc | 1 + src/kernel_abi.h | 58 +++++++++++++++++++++++++++++++++++++++++++ src/record_syscall.cc | 9 +++++++ src/test/ioctl_fb.c | 25 +++++++++++++++++++ src/test/util.h | 1 + 6 files changed, 95 insertions(+) create mode 100644 src/test/ioctl_fb.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bbeccabe5f..b24d83d7ef3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -866,6 +866,7 @@ set(BASIC_TESTS invalid_ioctl io ioctl + ioctl_fb ioctl_fs ioctl_pty ioctl_sg diff --git a/src/kernel_abi.cc b/src/kernel_abi.cc index 685ec679b50..efc8100aa03 100644 --- a/src/kernel_abi.cc +++ b/src/kernel_abi.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/src/kernel_abi.h b/src/kernel_abi.h index a9031704b33..b597fb8d85b 100644 --- a/src/kernel_abi.h +++ b/src/kernel_abi.h @@ -1735,6 +1735,64 @@ struct BaseArch : public wordsize, unsigned short v_state; }; RR_VERIFY_TYPE(vt_stat); + + struct fb_fix_screeninfo { + char id[16]; + unsigned long smem_start; + __u32 smem_len; + __u32 type; + __u32 type_aux; + __u32 visual; + uint16_t xpanstep; + uint16_t ypanstep; + uint16_t xwrapstep; + __u32 line_length; + unsigned long mmio_start; + __u32 mmio_len; + __u32 accel; + uint16_t capabilities; + uint16_t reserved[2]; + }; + RR_VERIFY_TYPE(fb_fix_screeninfo); + + struct fb_bitfield { + __u32 offset; + __u32 length; + __u32 msb_right; + }; + RR_VERIFY_TYPE(fb_bitfield); + struct fb_var_screeninfo { + __u32 xres; + __u32 yres; + __u32 xres_virtual; + __u32 yres_virtual; + __u32 xoffset; + __u32 yoffset; + __u32 bits_per_pixel; + __u32 grayscale; + struct fb_bitfield red; + struct fb_bitfield green; + struct fb_bitfield blue; + struct fb_bitfield transp; + __u32 nonstd; + __u32 active; + __u32 height; + __u32 width; + __u32 accel_flags; + __u32 pixclock; + __u32 left_margin; + __u32 right_margin; + __u32 upper_margin; + __u32 lower_margin; + __u32 hsync_len; + __u32 vsync_len; + __u32 sync; + __u32 vmode; + __u32 rotate; + __u32 colorspace; + __u32 reserved[4]; + }; + RR_VERIFY_TYPE(fb_var_screeninfo); }; struct X64Arch : public BaseArch { diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 4e75de11e1b..3946a57e416 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -1789,6 +1790,14 @@ static Switchable prepare_ioctl(RecordTask* t, case VT_GETSTATE: syscall_state.reg_parameter(3); return PREVENT_SWITCH; + + case FBIOGET_FSCREENINFO: + syscall_state.reg_parameter(3); + return PREVENT_SWITCH; + + case FBIOGET_VSCREENINFO: + syscall_state.reg_parameter(3); + return PREVENT_SWITCH; } /* In ioctl language, "_IOC_READ" means "outparam". Both diff --git a/src/test/ioctl_fb.c b/src/test/ioctl_fb.c new file mode 100644 index 00000000000..619fca42ca5 --- /dev/null +++ b/src/test/ioctl_fb.c @@ -0,0 +1,25 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +int main(void) { + int fd; + struct fb_fix_screeninfo finfo; + struct fb_var_screeninfo vinfo; + + fd = open("/dev/fb0", O_RDWR); + if (fd < 0) { + atomic_puts("Can't open framebuffer, aborting test"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + + test_assert(0 == ioctl(fd, FBIOGET_FSCREENINFO, &finfo)); + atomic_printf("FBIOGET_FSCREENINFO returned id=%s capabilities=%d\n", finfo.id, finfo.capabilities); + + test_assert(0 == ioctl(fd, FBIOGET_VSCREENINFO, &vinfo)); + atomic_printf("FBIOGET_VSCREENINFO returned xres=%d yres=%d colorspace=%d\n", vinfo.xres, vinfo.yres, vinfo.colorspace); + + atomic_puts("EXIT-SUCCESS"); + return 0; +} diff --git a/src/test/util.h b/src/test/util.h index 150b91eb0a4..c53135656e6 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include From 7c63c0b8a9f6e40b16d44a3d3af66f91303b116e Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Wed, 2 Jun 2021 12:49:12 -0700 Subject: [PATCH 051/110] Handle traceinfo and replay of traces that never execed successfully without crashing. --- src/ReplaySession.cc | 10 ++++++++-- src/TraceInfoCommand.cc | 10 ++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index 13aff6dec14..f76fa9bd6a4 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -1522,8 +1522,14 @@ static void end_task(ReplayTask* t) { t->set_regs(r); // Enter the syscall. t->resume_execution(RESUME_CONT, RESUME_WAIT, RESUME_NO_TICKS); - ASSERT(t, t->ptrace_event() == PTRACE_EVENT_EXIT); - t->did_handle_ptrace_exit_event(); + if (t->session().done_initial_exec()) { + ASSERT(t, t->ptrace_event() == PTRACE_EVENT_EXIT); + t->did_handle_ptrace_exit_event(); + } else { + // If we never execed, the trace is totally hosed, + // just clean up. + t->did_kill(); + } t->detach(); delete t; } diff --git a/src/TraceInfoCommand.cc b/src/TraceInfoCommand.cc index 539ee161675..2128d381470 100644 --- a/src/TraceInfoCommand.cc +++ b/src/TraceInfoCommand.cc @@ -41,6 +41,7 @@ TraceInfoCommand TraceInfoCommand::singleton( " Dump trace header in JSON format.\n"); static int dump_trace_info(const string& trace_dir, FILE* out) { + int ret = 0; TraceReader trace(trace_dir); fputs("{\n", out); @@ -101,10 +102,10 @@ static int dump_trace_info(const string& trace_dir, FILE* out) { flags.cpu_unbound = true; ReplaySession::shr_ptr replay_session = ReplaySession::create(trace_dir, flags); - fputs(" \"environ\":[", out); while (true) { auto result = replay_session->replay_step(RUN_CONTINUE); if (replay_session->done_initial_exec()) { + fputs(" \"environ\":[", out); auto environ = read_env(replay_session->current_task()); for (size_t i = 0; i < environ.size(); ++i) { if (i > 0) { @@ -112,17 +113,18 @@ static int dump_trace_info(const string& trace_dir, FILE* out) { } fprintf(out, "\n \"%s\"", json_escape(environ[i]).c_str()); } + fputs("\n ]\n", out); break; } if (result.status == REPLAY_EXITED) { fputs("Replay finished before initial exec!\n", stderr); - return 1; + ret = 1; + break; } } - fputs("\n ]\n", out); fputs("}\n", out); - return 0; + return ret; } int TraceInfoCommand::run(vector& args) { From 59fb6d7fa59bca04468e0a8e43c39393dcf670f0 Mon Sep 17 00:00:00 2001 From: Bob131 Date: Fri, 4 Jun 2021 08:29:09 +1000 Subject: [PATCH 052/110] Session: add TaskContext Adds a helper type for BreakStatus that stores some of a Task's context alongside it, keeping it alive after the Task dies. --- src/DiversionSession.cc | 2 +- src/GdbServer.cc | 12 ++++---- src/ReplaySession.cc | 6 ++-- src/ReplayTimeline.cc | 65 ++++++++++++++++++++++------------------- src/Session.cc | 2 +- src/Session.h | 43 +++++++++++++++++++++++---- 6 files changed, 83 insertions(+), 47 deletions(-) diff --git a/src/DiversionSession.cc b/src/DiversionSession.cc index f6ec7fe8be3..13baa04a969 100644 --- a/src/DiversionSession.cc +++ b/src/DiversionSession.cc @@ -142,7 +142,7 @@ DiversionSession::DiversionResult DiversionSession::diversion_step( if (t->ptrace_event() == PTRACE_EVENT_EXIT) { handle_ptrace_exit_event(t); result.status = DIVERSION_EXITED; - result.break_status.task = t; + result.break_status.task_context = TaskContext(t); result.break_status.task_exit = true; return result; } diff --git a/src/GdbServer.cc b/src/GdbServer.cc index 83cfbb005f4..eca333cb1a3 100644 --- a/src/GdbServer.cc +++ b/src/GdbServer.cc @@ -900,7 +900,7 @@ bool GdbServer::diverter_process_debugger_requests( static bool is_last_thread_exit(const BreakStatus& break_status) { // The task set may be empty if the task has already exited. return break_status.task_exit && - break_status.task->thread_group()->task_set().size() <= 1; + break_status.task_context.thread_group->task_set().size() <= 1; } static Task* is_in_exec(ReplayTimeline& timeline) { @@ -957,7 +957,7 @@ void GdbServer::maybe_notify_stop(const GdbRequest& req, LOG(debug) << "Stopping for signal " << stop_siginfo; } if (is_last_thread_exit(break_status)) { - if (break_status.task->session().is_diversion()) { + if (break_status.task_context.session->is_diversion()) { // If the last task of a diversion session has exited, we need // to make sure GDB knows it's unrecoverable. There's no good // way to do this: a stop is insufficient, but an inferior exit @@ -985,7 +985,7 @@ void GdbServer::maybe_notify_stop(const GdbRequest& req, } } } - Task* t = break_status.task; + Task* t = break_status.task(); Task* in_exec_task = is_in_exec(timeline); if (in_exec_task) { do_stop = true; @@ -1208,7 +1208,7 @@ void GdbServer::try_lazy_reverse_singlesteps(GdbRequest& req) { now = previous; need_seek = true; BreakStatus break_status; - break_status.task = t; + break_status.task_context = TaskContext(t); break_status.singlestep_complete = true; LOG(debug) << " using lazy reverse-singlestep"; maybe_notify_stop(req, break_status); @@ -1374,7 +1374,7 @@ GdbServer::ContinueOrStop GdbServer::debug_one_step( } if (req.cont().run_direction == RUN_FORWARD && is_last_thread_exit(result.break_status) && - result.break_status.task->thread_group()->tguid() == debuggee_tguid) { + result.break_status.task_context.thread_group->tguid() == debuggee_tguid) { in_debuggee_end_state = true; } return CONTINUE_DEBUGGING; @@ -1573,7 +1573,7 @@ void GdbServer::restart_session(const GdbRequest& req) { // condition below. DEBUG_ASSERT(result.status != REPLAY_EXITED); if (is_last_thread_exit(result.break_status) && - result.break_status.task->thread_group()->tgid == target.pid) { + result.break_status.task_context.thread_group->tgid == target.pid) { // Debuggee task is about to exit. Stop here. in_debuggee_end_state = true; break; diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index f76fa9bd6a4..709d699fcd7 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -1745,7 +1745,7 @@ ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { advance_to_next_trace_frame(); } if (current_step.action == TSTEP_EXIT_TASK) { - result.break_status.task = t; + result.break_status.task_context = TaskContext(t); result.break_status.task_exit = true; } return result; @@ -1754,7 +1754,7 @@ ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { fast_forward_status = FastForwardStatus(); // Now we know |t| hasn't died, so save it in break_status. - result.break_status.task = t; + result.break_status.task_context = TaskContext(t); /* Advance towards fulfilling |current_step|. */ if (try_one_trace_step(t, constraints) == INCOMPLETE) { @@ -1821,7 +1821,7 @@ ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { } break; case TSTEP_EXIT_TASK: - result.break_status.task = nullptr; + result.break_status.task_context = TaskContext(); t = nullptr; DEBUG_ASSERT(!result.break_status.any_break()); break; diff --git a/src/ReplayTimeline.cc b/src/ReplayTimeline.cc index b1b758c54cf..9f59583d2be 100644 --- a/src/ReplayTimeline.cc +++ b/src/ReplayTimeline.cc @@ -243,7 +243,7 @@ void ReplayTimeline::mark_after_singlestep(const Mark& from, fprintf(stderr, "Probable duplicated state at %d:", (int)m_prev + 1); m.ptr->full_print(stderr); } - ASSERT(result.break_status.task, false) + ASSERT(result.break_status.task(), false) << " Probable duplicated states leading to " << m << " at index " << i + 1; } break; @@ -612,15 +612,15 @@ bool ReplayTimeline::fix_watchpoint_coalescing_quirk(ReplayResult& result, // no watchpoint hit. Nothing to fix. return false; } - if (!maybe_at_or_after_x86_string_instruction(result.break_status.task)) { + if (!maybe_at_or_after_x86_string_instruction(result.break_status.task())) { return false; } - TaskUid after_tuid = result.break_status.task->tuid(); - Ticks after_ticks = result.break_status.task->tick_count(); + TaskUid after_tuid = result.break_status.task()->tuid(); + Ticks after_ticks = result.break_status.task()->tick_count(); LOG(debug) << "Fixing x86-string coalescing quirk from " << before << " to " << proto_mark() << " (final cx " - << result.break_status.task->regs().cx() << ")"; + << result.break_status.task()->regs().cx() << ")"; seek_to_proto_mark(before); @@ -640,7 +640,7 @@ bool ReplayTimeline::fix_watchpoint_coalescing_quirk(ReplayResult& result, if (!result.break_status.data_watchpoints_hit().empty()) { LOG(debug) << "Fixed x86-string coalescing quirk; now at " << current_mark_key() << " (new cx " - << result.break_status.task->regs().cx() << ")"; + << result.break_status.task()->regs().cx() << ")"; break; } } else { @@ -929,7 +929,7 @@ bool ReplayTimeline::run_forward_to_intermediate_point(const Mark& end, static const int stop_count_limit = 20; static ReplayTask* to_replay_task(const BreakStatus& status) { - return static_cast(status.task); + return static_cast(status.task()); } static bool arch_watch_fires_before_instr(SupportedArch arch) { @@ -1021,10 +1021,10 @@ ReplayResult ReplayTimeline::reverse_continue( !result.break_status.watchpoints_hit.empty(); if (avoidable_stop) { made_progress_between_stops = - avoidable_stop_ip != result.break_status.task->ip() || - avoidable_stop_ticks != result.break_status.task->tick_count(); - avoidable_stop_ip = result.break_status.task->ip(); - avoidable_stop_ticks = result.break_status.task->tick_count(); + avoidable_stop_ip != result.break_status.task()->ip() || + avoidable_stop_ticks != result.break_status.task()->tick_count(); + avoidable_stop_ip = result.break_status.task()->ip(); + avoidable_stop_ticks = result.break_status.task()->tick_count(); } evaluate_conditions(result); @@ -1053,10 +1053,11 @@ ReplayResult ReplayTimeline::reverse_continue( } } final_result = result; - final_tuid = result.break_status.task ? result.break_status.task->tuid() - : TaskUid(); - final_ticks = result.break_status.task - ? result.break_status.task->tick_count() + final_tuid = result.break_status.task() + ? result.break_status.task()->tuid() + : TaskUid(); + final_ticks = result.break_status.task() + ? result.break_status.task()->tick_count() : 0; last_stop_is_watch_or_signal = true; } @@ -1065,10 +1066,11 @@ ReplayResult ReplayTimeline::reverse_continue( if (is_start_of_reverse_execution_barrier_event()) { dest = mark(); final_result = result; - final_result.break_status.task = current->current_task(); + final_result.break_status.task_context = + TaskContext(current->current_task()); final_result.break_status.task_exit = true; - final_tuid = final_result.break_status.task->tuid(); - final_ticks = result.break_status.task->tick_count(); + final_tuid = final_result.break_status.task()->tuid(); + final_ticks = result.break_status.task()->tick_count(); last_stop_is_watch_or_signal = false; } @@ -1084,10 +1086,11 @@ ReplayResult ReplayTimeline::reverse_continue( dest = mark(); LOG(debug) << "Found breakpoint break at " << dest; final_result = result; - final_tuid = result.break_status.task ? result.break_status.task->tuid() - : TaskUid(); - final_ticks = result.break_status.task - ? result.break_status.task->tick_count() + final_tuid = result.break_status.task() + ? result.break_status.task()->tuid() + : TaskUid(); + final_ticks = result.break_status.task() + ? result.break_status.task()->tick_count() : 0; last_stop_is_watch_or_signal = false; } @@ -1096,7 +1099,8 @@ ReplayResult ReplayTimeline::reverse_continue( LOG(debug) << "Interrupted at " << end; seek_to_mark(end); final_result = ReplayResult(); - final_result.break_status.task = current->current_task(); + final_result.break_status.task_context = + TaskContext(current->current_task()); return final_result; } @@ -1138,7 +1142,8 @@ ReplayResult ReplayTimeline::reverse_continue( } // fix break_status.task since the actual ReplayTask* may have changed // since we saved final_result - final_result.break_status.task = current->find_task(final_tuid); + final_result.break_status.task_context = + TaskContext(current->find_task(final_tuid)); // Hide any singlestepping we did, since a continue operation should // never return a singlestep status final_result.break_status.singlestep_complete = false; @@ -1299,8 +1304,8 @@ ReplayResult ReplayTimeline::reverse_singlestep( } else if (now == end && result.break_status.signal && result.break_status.signal->si_signo == SIGTRAP && - is_advanced_pc_and_signaled_instruction(result.break_status.task, - result.break_status.task->ip())) { + is_advanced_pc_and_signaled_instruction(result.break_status.task(), + result.break_status.task()->ip())) { LOG(debug) << " singlestepped exactly to instruction that advances pc and signals (e.g. int3)," << " pretending we stopped earlier."; break; @@ -1309,7 +1314,7 @@ ReplayResult ReplayTimeline::reverse_singlestep( LOG(debug) << "Setting candidate after step: " << destination_candidate; destination_candidate_result = result; - destination_candidate_tuid = result.break_status.task->tuid(); + destination_candidate_tuid = result.break_status.task()->tuid(); destination_candidate_saw_other_task_break = seen_other_task_break; seen_other_task_break = false; step_start = now; @@ -1365,9 +1370,9 @@ ReplayResult ReplayTimeline::reverse_singlestep( if (destination_candidate) { LOG(debug) << "Found destination " << destination_candidate; seek_to_mark(destination_candidate); - destination_candidate_result.break_status.task = - current->find_task(destination_candidate_tuid); - DEBUG_ASSERT(destination_candidate_result.break_status.task); + destination_candidate_result.break_status.task_context = + TaskContext(current->find_task(destination_candidate_tuid)); + DEBUG_ASSERT(destination_candidate_result.break_status.task()); evaluate_conditions(destination_candidate_result); return destination_candidate_result; } diff --git a/src/Session.cc b/src/Session.cc index dd119e08fe4..c9a84efdf82 100644 --- a/src/Session.cc +++ b/src/Session.cc @@ -266,7 +266,7 @@ string Session::read_spawned_task_error() const { BreakStatus Session::diagnose_debugger_trap(Task* t, RunCommand run_command) { assert_fully_initialized(); BreakStatus break_status; - break_status.task = t; + break_status.task_context = TaskContext(t); int stop_sig = t->stop_sig(); if (!stop_sig) { diff --git a/src/Session.h b/src/Session.h index ce5cdeb5f05..4b2dcb7a082 100644 --- a/src/Session.h +++ b/src/Session.h @@ -12,6 +12,7 @@ #include "AddressSpace.h" #include "MonitoredSharedMemory.h" +#include "Task.h" #include "TaskishUid.h" #include "TraceStream.h" #include "preload/preload_interface.h" @@ -30,18 +31,47 @@ class AutoRemoteSyscalls; // The following types are used by step() APIs in Session subclasses. +/** + * Stores a Task and information about it separately so decisions can + * still be made from a Task's context even if it dies. + */ +struct TaskContext { + TaskContext() + : task(nullptr), + session(nullptr), + thread_group(nullptr) {} + explicit TaskContext(Task* task) + : task(task), + session(task ? &task->session() : nullptr), + thread_group(task ? task->thread_group() : nullptr) {} + TaskContext(Session* session, std::shared_ptr thread_group) + : task(nullptr), + session(session), + thread_group(thread_group) {} + + // A pointer to a task. This may be |nullptr|. When non-NULL, this + // is not necessarily the same as session->current_task() (for + // example, when replay switches to a new task after + // ReplaySession::replay_step()). + Task* task; + // The session to which |task| belongs/belonged. + Session* session; + // The thread group to which |task| belongs/belonged. + std::shared_ptr thread_group; +}; + /** * In general, multiple break reasons can apply simultaneously. */ struct BreakStatus { BreakStatus() - : task(nullptr), + : task_context(TaskContext()), breakpoint_hit(false), singlestep_complete(false), approaching_ticks_target(false), task_exit(false) {} BreakStatus(const BreakStatus& other) - : task(other.task), + : task_context(other.task_context), watchpoints_hit(other.watchpoints_hit), signal(other.signal ? std::unique_ptr(new siginfo_t(*other.signal)) @@ -51,7 +81,7 @@ struct BreakStatus { approaching_ticks_target(other.approaching_ticks_target), task_exit(other.task_exit) {} const BreakStatus& operator=(const BreakStatus& other) { - task = other.task; + task_context = other.task_context; watchpoints_hit = other.watchpoints_hit; signal = other.signal ? std::unique_ptr(new siginfo_t(*other.signal)) @@ -63,9 +93,8 @@ struct BreakStatus { return *this; } - // The triggering Task. This may be different from session->current_task() - // when replay switches to a new task when ReplaySession::replay_step() ends. - Task* task; + // The triggering TaskContext. + TaskContext task_context; // List of watchpoints hit; any watchpoint hit causes a stop after the // instruction that triggered the watchpoint has completed. std::vector watchpoints_hit; @@ -109,6 +138,8 @@ struct BreakStatus { return !watchpoints_hit.empty() || signal || breakpoint_hit || singlestep_complete || approaching_ticks_target; } + + Task* task() const { return task_context.task; } }; enum RunCommand { // Continue until we hit a breakpoint or a new replay event From 3fb8ef2f09dd34282b817fee2d2adca315b73e96 Mon Sep 17 00:00:00 2001 From: Bob131 Date: Fri, 4 Jun 2021 08:33:51 +1000 Subject: [PATCH 053/110] DiversionSession: capture task context before exit DiversionSession::diversion_step() returns a Task pointer after passing it to handle_ptrace_exit_event() which results in its deletion. Capture the Task's context before dealing with the exit to avoid returning an invalid pointer. This fixes a use-after-free introduced in e627c19b0aaf7f18207cc1ce4d019f4b9ffe603b. Closes #2881. --- src/DiversionSession.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/DiversionSession.cc b/src/DiversionSession.cc index 13baa04a969..f639835dc48 100644 --- a/src/DiversionSession.cc +++ b/src/DiversionSession.cc @@ -140,9 +140,14 @@ DiversionSession::DiversionResult DiversionSession::diversion_step( // An exit might have occurred while processing a previous syscall. if (t->ptrace_event() == PTRACE_EVENT_EXIT) { + // We're about to destroy the task, so capture the context while + // we can. + TaskContext context(t); handle_ptrace_exit_event(t); + // This is now a dangling pointer, so clear it. + context.task = nullptr; result.status = DIVERSION_EXITED; - result.break_status.task_context = TaskContext(t); + result.break_status.task_context = context; result.break_status.task_exit = true; return result; } From bfbd3ce8796c6f9e5e6810eb865da7d4adaf6158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Thu, 3 Jun 2021 22:23:31 +0200 Subject: [PATCH 054/110] Avoid use-after-free of task object in ReplaySession::replay_step. The task object given to `ReplaySession::try_one_trace_step` gets deleted with `TSTEP_EXIT_TASK` in `ReplaySession::exit_task` / `end_task`. Visible in replay of accept test with asan enabled rr build. --- src/ReplaySession.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index 709d699fcd7..8b07582c455 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -1788,7 +1788,11 @@ ReplayResult ReplaySession::replay_step(const StepConstraints& constraints) { // If try_one_trace_step set extra-registers already, the values it used from the frame // will already have FIP/FDP cleared if necessary. Clearing them again here is fine. - if (trace_reader().clear_fip_fdp()) { + if (trace_reader().clear_fip_fdp() && + current_step.action != TSTEP_EXIT_TASK) + /* TSTEP_EXIT_TASK means the task object got already + deleted above in try_one_trace_step/exit_task/end_task. */ + { const ExtraRegisters* maybe_extra = t->extra_regs_fallible(); if (maybe_extra) { ExtraRegisters extra_registers = *maybe_extra; From 62b2d4d94486fb52ebe8dee67b9b89def0c4677b Mon Sep 17 00:00:00 2001 From: Konstantin Kharlamov Date: Sun, 6 Jun 2021 13:47:40 +0300 Subject: [PATCH 055/110] PerfCounters_x86.h: add support for AMD A8-3530MX Fixes: https://github.com/rr-debugger/rr/issues/2872 --- src/PerfCounters_x86.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/PerfCounters_x86.h b/src/PerfCounters_x86.h index aafe0b5c8f6..2b0d35d197b 100644 --- a/src/PerfCounters_x86.h +++ b/src/PerfCounters_x86.h @@ -86,6 +86,8 @@ static CpuMicroarch compute_cpu_microarch() { case 0x70f10: // Matisse (Zen 2) (UNTESTED) if (ext_family == 8) { return AMDZen; + } else if (ext_family == 3) { + return AMDF15R30; } break; case 0x20f10: // Vermeer (Zen 3) From 59e55ba828c41c8ac236407be34690bd2d3d1970 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Sat, 5 Jun 2021 10:25:42 +0200 Subject: [PATCH 056/110] Increase timeout for conditional_breakpoint_offload test. This test, especially in the no-syscallbuf variants, takes regularly more than 120 seconds at my Ryzen 7 1700, when running the tests in parallel. --- src/test/conditional_breakpoint_offload.run | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/conditional_breakpoint_offload.run b/src/test/conditional_breakpoint_offload.run index 3eadb5a637f..5641c9ed3fb 100644 --- a/src/test/conditional_breakpoint_offload.run +++ b/src/test/conditional_breakpoint_offload.run @@ -1,2 +1,3 @@ source `dirname $0`/util.sh +TIMEOUT=300 debug_test From 177a0d65c7dd409cd6adbfd547493280765f4ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Thu, 3 Jun 2021 12:03:32 +0200 Subject: [PATCH 057/110] Use unsigned long instead of int for PTRACE_GETEVENTMSG. "int raw_status" is just 4 bytes, but PTRACE_GETEVENTMSG writes "unsigned long" with 8 bytes at x86_64. Visible in replay of accept test with asan enabled rr build. --- src/Task.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Task.cc b/src/Task.cc index d5091ed8ff0..ea8837a9045 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -236,8 +236,9 @@ WaitStatus Task::kill() { * the exit event, we already reaped it from the ptrace perspective, * which implicitly detached. */ - if (ptrace_if_alive(PTRACE_GETEVENTMSG, nullptr, &raw_status)) { - status = WaitStatus(raw_status); + unsigned long long_status; + if (ptrace_if_alive(PTRACE_GETEVENTMSG, nullptr, &long_status)) { + status = WaitStatus(long_status); } else { status = WaitStatus::for_fatal_sig(SIGKILL); } From 2c78ef48e32605fc88b67186ac558a6c67f46261 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Mon, 7 Jun 2021 15:39:06 +0200 Subject: [PATCH 058/110] Change plain char buffer to unique_ptr to free memory at function exit. Visible in `rr record simple` with asan enabled rr build. ==647542==ERROR: LeakSanitizer: detected memory leaks Direct leak of 19 byte(s) in 1 object(s) allocated from: 0x7fe6e70187a7 in operator new[](unsigned long) ../../../../src/libsanitizer/asan/asan_new_delete.cpp:102 0x55a421ec5ca1 in simple_to_lower /home/bernhard/data/entwicklung/2021/rr/2021-04-25/rr/src/log.cc:62 0x55a421ec6f3b in get_log_level /home/bernhard/data/entwicklung/2021/rr/2021-04-25/rr/src/log.cc:191 0x55a421ec7766 in get_log_module /home/bernhard/data/entwicklung/2021/rr/2021-04-25/rr/src/log.cc:224 ... --- src/log.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/log.cc b/src/log.cc index 93dcc996e78..67cc422a4c3 100644 --- a/src/log.cc +++ b/src/log.cc @@ -59,12 +59,12 @@ static char simple_to_lower(char ch) { } static string simple_to_lower(const string& s) { - char* buf = new char[s.size() + 1]; + std::unique_ptr buf(new char[s.size() + 1]); for (size_t i = 0; i < s.size(); ++i) { buf[i] = simple_to_lower(s[i]); } buf[s.size()] = 0; - return string(buf); + return string(buf.get()); } #if __has_attribute(require_constant_initialization) From 7214d961476ba3284a1b5b367cfde10e6e373b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Mon, 7 Jun 2021 16:20:06 +0200 Subject: [PATCH 059/110] Close directory after use in read_all_proc_fds. Visible in `rr record detach_state` with asan enabled rr build. ==831460==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32816 byte(s) in 1 object(s) allocated from: 0x7efd2d90de8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 0x7efd2d2df636 in __alloc_dir ../sysdeps/posix/opendir.c:118 SUMMARY: AddressSanitizer: 32816 byte(s) leaked in 1 allocation(s). --- src/util.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util.cc b/src/util.cc index b88b225216a..338b30ca2ee 100644 --- a/src/util.cc +++ b/src/util.cc @@ -2140,6 +2140,7 @@ std::vector read_all_proc_fds(pid_t tid) while (struct dirent *dir = readdir(fddir)) { ret.push_back(atoi(dir->d_name)); } + closedir(fddir); return ret; } From 8c9223775c74024f0f6e1cf80a560b7b4648f5f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Mon, 7 Jun 2021 16:52:15 +0200 Subject: [PATCH 060/110] Close directory after use in ls. Visible in `rr ls` with asan enabled rr build. ==1006929==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32816 byte(s) in 1 object(s) allocated from: 0x7f2c0534ee8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 0x7f2c04d20636 in __alloc_dir ../sysdeps/posix/opendir.c:118 SUMMARY: AddressSanitizer: 32816 byte(s) leaked in 1 allocation(s). --- src/LsCommand.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/LsCommand.cc b/src/LsCommand.cc index ed2be01c8ca..6c05c287d45 100644 --- a/src/LsCommand.cc +++ b/src/LsCommand.cc @@ -172,6 +172,7 @@ static int ls(const string& traces_dir, const LsFlags& flags, FILE* out) { traces.back().ctime = st.st_ctim; } } + closedir(dir); if (flags.sort_by_time) { auto compare_by_time = [&](const TraceInfo& at, From a781a4735a2fc84d1c2baa5cc274327c876e897c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Mon, 7 Jun 2021 16:55:12 +0200 Subject: [PATCH 061/110] Close directory after use in get_folder_size. Visible in `rr ls -l` with asan enabled rr build. ==1014095==ERROR: LeakSanitizer: detected memory leaks Direct leak of 3314416 byte(s) in 101 object(s) allocated from: 0x7fbe6acabe8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 0x7fbe6a67d636 in __alloc_dir ../sysdeps/posix/opendir.c:118 SUMMARY: AddressSanitizer: 3314416 byte(s) leaked in 101 allocation(s). --- src/LsCommand.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/LsCommand.cc b/src/LsCommand.cc index 6c05c287d45..721c73dcf46 100644 --- a/src/LsCommand.cc +++ b/src/LsCommand.cc @@ -105,6 +105,7 @@ static bool get_folder_size(string dir_name, string& size_str) { bytes += st.st_size; } + closedir(dir); static const char suffixes[] = " KMGT"; double size = bytes; From 265cb4907bdb09fdb0387b78f72ad6c13269c068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Mon, 7 Jun 2021 16:59:59 +0200 Subject: [PATCH 062/110] Close directory after use in SourcesCommand::run. Visible in `rr sources` with asan enabled rr build. ==1021263==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32816 byte(s) in 1 object(s) allocated from: 0x7f379df85e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 0x7f379d957636 in __alloc_dir ../sysdeps/posix/opendir.c:118 SUMMARY: AddressSanitizer: 32816 byte(s) leaked in 1 allocation(s). --- src/SourcesCommand.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index d5a5331a828..6f56a31e0fa 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -907,6 +907,7 @@ int SourcesCommand::run(vector& args) { if (!files) { FATAL() << "Can't open trace dir"; } + closedir(files); map binary_file_names; while (true) { From 7228160900c5bb6594e16ad33ae4f2ed71af91d5 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 8 Jun 2021 11:42:18 +1200 Subject: [PATCH 063/110] Syscall-buffer F_SETLK/F_SETLKW on 64-bit systems --- src/preload/syscallbuf.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/preload/syscallbuf.c b/src/preload/syscallbuf.c index 0ab8e0fd6b8..7a2d2384b06 100644 --- a/src/preload/syscallbuf.c +++ b/src/preload/syscallbuf.c @@ -1470,8 +1470,10 @@ static int sys_fcntl64_setlkw64(const struct syscall_info* call) { } #if defined(SYS_fcntl64) +/* 32-bit system */ static long sys_fcntl64(const struct syscall_info* call) #else +/* 64-bit system */ static long sys_fcntl(const struct syscall_info* call) #endif { @@ -1490,16 +1492,16 @@ static long sys_fcntl(const struct syscall_info* call) case F_SETOWN_EX: return sys_fcntl64_own_ex(call); -#if F_SETLK != F_SETLK64 case F_SETLK64: -#else +#if !defined(SYS_fcntl64) + /* Also uses 64-bit flock format */ case F_SETLK: #endif return sys_fcntl64_setlk64(call); -#if F_SETLKW != F_SETLKW64 case F_SETLKW64: -#else +#if !defined(SYS_fcntl64) + /* Also uses 64-bit flock format */ case F_SETLKW: #endif return sys_fcntl64_setlkw64(call); From 0f60b491118ba4d2e271101a264b6d6a005faad9 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Thu, 10 Jun 2021 21:17:27 +1200 Subject: [PATCH 064/110] We don't need to worry about the value of perf_event_paranoid if we're running with CAP_SYS_ADMIN or CAP_PERFMON --- src/RecordSession.cc | 37 +++++++++++++++++++++---------------- src/kernel_supplement.h | 5 +++++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/RecordSession.cc b/src/RecordSession.cc index d861eda250e..166d1afdd52 100644 --- a/src/RecordSession.cc +++ b/src/RecordSession.cc @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include "core.h" #include "ftrace.h" #include "kernel_metadata.h" +#include "kernel_supplement.h" #include "log.h" #include "record_signal.h" #include "record_syscall.h" @@ -2135,22 +2137,25 @@ static string lookup_by_path(const string& name) { } else { setenv(SYSCALLBUF_ENABLED_ENV_VAR, "1", 1); - ScopedFd fd("/proc/sys/kernel/perf_event_paranoid", O_RDONLY); - if (fd.is_open()) { - char buf[100]; - ssize_t size = read(fd, buf, sizeof(buf) - 1); - if (size >= 0) { - buf[size] = 0; - int val = atoi(buf); - if (val > 1) { - fprintf(stderr, - "rr needs /proc/sys/kernel/perf_event_paranoid <= 1, but it is %d.\n" - "Change it to 1, or use 'rr record -n' (slow).\n" - "Consider putting 'kernel.perf_event_paranoid = 1' in /etc/sysctl.d/10-rr.conf.\n" - "See 'man 8 sysctl', 'man 5 sysctl.d' (systemd systems)\n" - "and 'man 5 sysctl.conf' (non-systemd systems) for more details.\n", - val); - exit(1); + if (!has_effective_caps(uint64_t(1) << CAP_SYS_ADMIN) && + !has_effective_caps(uint64_t(1) << CAP_PERFMON)) { + ScopedFd fd("/proc/sys/kernel/perf_event_paranoid", O_RDONLY); + if (fd.is_open()) { + char buf[100]; + ssize_t size = read(fd, buf, sizeof(buf) - 1); + if (size >= 0) { + buf[size] = 0; + int val = atoi(buf); + if (val > 1) { + fprintf(stderr, + "rr needs /proc/sys/kernel/perf_event_paranoid <= 1, but it is %d.\n" + "Change it to 1, or use 'rr record -n' (slow).\n" + "Consider putting 'kernel.perf_event_paranoid = 1' in /etc/sysctl.d/10-rr.conf.\n" + "See 'man 8 sysctl', 'man 5 sysctl.d' (systemd systems)\n" + "and 'man 5 sysctl.conf' (non-systemd systems) for more details.\n", + val); + exit(1); + } } } } diff --git a/src/kernel_supplement.h b/src/kernel_supplement.h index d4bac404496..6705ea14a78 100644 --- a/src/kernel_supplement.h +++ b/src/kernel_supplement.h @@ -5,6 +5,7 @@ #define _GNU_SOURCE 1 +#include #include #include #include @@ -410,6 +411,10 @@ enum { #define RLIMIT_RTTIME 15 #endif +#ifndef CAP_PERFMON +#define CAP_PERFMON 38 +#endif + } // namespace rr #endif /* RR_KERNEL_SUPPLEMENT_H_ */ From 3e99470982a420f95e19af71b12c2b165bb30c74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Wed, 9 Jun 2021 18:16:53 +0200 Subject: [PATCH 065/110] Pass TEST_MONITOR_DEFAULT_TIMEOUT also to 32 bit tests. --- CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b24d83d7ef3..a69bd358882 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1658,20 +1658,20 @@ if(BUILD_TESTS) foreach(test ${BASIC_TESTS} ${BASIC_CPP_TESTS} ${OTHER_TESTS}) get_filename_component(testname ${test} NAME) add_test(${test}-32 - bash source_dir/src/test/basic_test.run ${testname}_32 "" bin_dir) + bash source_dir/src/test/basic_test.run ${testname}_32 "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32) add_test(${test}-32-no-syscallbuf - bash source_dir/src/test/basic_test.run ${testname}_32 -n bin_dir) + bash source_dir/src/test/basic_test.run ${testname}_32 -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32-no-syscallbuf) endforeach(test) foreach(test ${TESTS_WITH_PROGRAM} ${TESTS_WITHOUT_PROGRAM}) get_filename_component(testname ${test} NAME) add_test(${test}-32 - bash source_dir/src/test/${test}.run ${testname}_32 "" bin_dir) + bash source_dir/src/test/${test}.run ${testname}_32 "" bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32) add_test(${test}-32-no-syscallbuf - bash source_dir/src/test/${test}.run ${testname}_32 -n bin_dir) + bash source_dir/src/test/${test}.run ${testname}_32 -n bin_dir ${TEST_MONITOR_DEFAULT_TIMEOUT}) configure_test(${test}-32-no-syscallbuf) endforeach(test) endif() From 49dc4a7c3761ac59ff286cb34a6d2e00e3582ff7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Sat, 12 Jun 2021 01:13:33 +0200 Subject: [PATCH 066/110] Do not wait for force_close_record_session. --- src/test-monitor/test-monitor.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/test-monitor/test-monitor.cc b/src/test-monitor/test-monitor.cc index 6aba5d39375..ff91784acf1 100644 --- a/src/test-monitor/test-monitor.cc +++ b/src/test-monitor/test-monitor.cc @@ -145,10 +145,16 @@ static void dump_gdb_stacktrace(pid_t child, FILE* out) { static void force_trace_closure(pid_t child, FILE* out) { char cmdline[1024 * 10]; - sprintf(cmdline, "gdb -p %d -ex 'set confirm off' -ex 'set height 0' -ex " - "'p rr::force_close_record_session()' -ex q &1", + sprintf(cmdline, "gdb -p %d " + "-ex 'set confirm off' " + "-ex 'set height 0' " + "-ex 'b rr::force_close_record_session' " + "-ex 'p rr::force_close_record_session()' " + "-ex detach " + "-ex q &1", child); dump_popen_cmdline(cmdline, out); + sleep(2); /* give the force_close_record_session time to take effect */ } static void dump_emergency_debugger(char* gdb_cmd, FILE* out) { From 82ab0f9f5bc13fb27acaccb9a85397bcaa7a7b05 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sun, 13 Jun 2021 18:26:53 +1200 Subject: [PATCH 067/110] Add '--tty' option to 'rr replay' to redirect echoed tracee output to a specific tty or file Resolves #1988 --- CMakeLists.txt | 1 + src/ReplayCommand.cc | 10 +++++++++- src/ReplaySession.cc | 8 ++++++++ src/ReplaySession.h | 6 ++++++ src/Session.h | 4 ++++ src/Task.cc | 4 ++-- src/replay_syscall.cc | 2 +- src/test/tty.run | 13 +++++++++++++ 8 files changed, 44 insertions(+), 4 deletions(-) create mode 100644 src/test/tty.run diff --git a/CMakeLists.txt b/CMakeLists.txt index a69bd358882..620f4d01051 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1424,6 +1424,7 @@ set(TESTS_WITHOUT_PROGRAM syscallbuf_timeslice_250 trace_version term_trace_cpu + tty unmap_vdso unwind_on_signal vfork_exec diff --git a/src/ReplayCommand.cc b/src/ReplayCommand.cc index 9f40e033913..c11328f3dd4 100644 --- a/src/ReplayCommand.cc +++ b/src/ReplayCommand.cc @@ -69,7 +69,8 @@ ReplayCommand ReplayCommand::singleton( " --stats= display brief stats every N steps (eg 10000).\n" " --serve-files Serve all files from the trace rather than\n" " assuming they exist on disk. Debugging will\n" - " be slower, but be able to tolerate missing files\n"); + " be slower, but be able to tolerate missing files\n" + " --tty Redirect tracee replay output to \n"); struct ReplayFlags { // Start a debug server for the task scheduled at the first @@ -128,6 +129,8 @@ struct ReplayFlags { // to get them from the filesystem bool serve_files; + string tty; + ReplayFlags() : goto_event(0), singlestep_to_event(0), @@ -167,6 +170,7 @@ static bool parse_replay_arg(vector& args, ReplayFlags& flags) { { 1, "fullname", NO_PARAMETER }, { 2, "stats", HAS_PARAMETER }, { 3, "serve-files", NO_PARAMETER }, + { 4, "tty", HAS_PARAMETER }, { 'u', "cpu-unbound", NO_PARAMETER }, { 'i', "interpreter", HAS_PARAMETER } }; @@ -252,6 +256,9 @@ static bool parse_replay_arg(vector& args, ReplayFlags& flags) { case 3: flags.serve_files = true; break; + case 4: + flags.tty = opt.value; + break; case 'u': flags.cpu_unbound = true; break; @@ -326,6 +333,7 @@ static pid_t waiting_for_child; static ReplaySession::Flags session_flags(const ReplayFlags& flags) { ReplaySession::Flags result; result.redirect_stdio = flags.redirect; + result.redirect_stdio_file = flags.tty; result.share_private_mappings = flags.share_private_mappings; result.cpu_unbound = flags.cpu_unbound; return result; diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index 8b07582c455..7a5fc184637 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -171,6 +171,13 @@ ReplaySession::ReplaySession(const std::string& dir, const Flags& flags) ticks_semantics_ = trace_in.ticks_semantics(); rrcall_base_ = trace_in.rrcall_base(); + if (!flags.redirect_stdio_file.empty()) { + tracee_output_fd_ = make_shared(flags.redirect_stdio_file.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0600); + if (!tracee_output_fd_->is_open()) { + FATAL() << "Can't open/create tracee output file " << flags.redirect_stdio_file; + } + } + memset(&last_siginfo_, 0, sizeof(last_siginfo_)); advance_to_next_trace_frame(); @@ -201,6 +208,7 @@ ReplaySession::ReplaySession(const std::string& dir, const Flags& flags) ReplaySession::ReplaySession(const ReplaySession& other) : Session(other), emu_fs(EmuFs::create()), + tracee_output_fd_(other.tracee_output_fd_), trace_in(other.trace_in), trace_frame(other.trace_frame), current_step(other.current_step), diff --git a/src/ReplaySession.h b/src/ReplaySession.h index ea89245d682..ae30a3923f4 100644 --- a/src/ReplaySession.h +++ b/src/ReplaySession.h @@ -239,6 +239,7 @@ class ReplaySession : public Session { , cpu_unbound(false) {} Flags(const Flags& other) = default; bool redirect_stdio; + std::string redirect_stdio_file; bool share_private_mappings; bool cpu_unbound; }; @@ -317,6 +318,10 @@ class ReplaySession : public Session { bool has_trace_quirk(TraceReader::TraceQuirks quirk) { return trace_in.quirks() & quirk; } + virtual int tracee_output_fd(int dflt) override { + return tracee_output_fd_->is_open() ? tracee_output_fd_->get() : dflt; + } + private: ReplaySession(const std::string& dir, const Flags& flags); ReplaySession(const ReplaySession& other); @@ -360,6 +365,7 @@ class ReplaySession : public Session { void clear_syscall_bp(); std::shared_ptr emu_fs; + std::shared_ptr tracee_output_fd_; TraceReader trace_in; TraceFrame trace_frame; ReplayTraceStep current_step; diff --git a/src/Session.h b/src/Session.h index 4b2dcb7a082..6446d4f656b 100644 --- a/src/Session.h +++ b/src/Session.h @@ -395,6 +395,10 @@ class Session { const ThreadGroupMap& thread_group_map() const { return thread_group_map_; } + virtual int tracee_output_fd(int dflt) { + return dflt; + } + protected: Session(); virtual ~Session(); diff --git a/src/Task.cc b/src/Task.cc index ea8837a9045..9812dab99c0 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -3159,8 +3159,8 @@ static long perform_remote_clone(AutoRemoteSyscalls& remote, } static void setup_fd_table(Task* t, FdTable& fds, int tracee_socket_fd_number) { - fds.add_monitor(t, STDOUT_FILENO, new StdioMonitor(STDOUT_FILENO)); - fds.add_monitor(t, STDERR_FILENO, new StdioMonitor(STDERR_FILENO)); + fds.add_monitor(t, STDOUT_FILENO, new StdioMonitor(t->session().tracee_output_fd(STDOUT_FILENO))); + fds.add_monitor(t, STDERR_FILENO, new StdioMonitor(t->session().tracee_output_fd(STDERR_FILENO))); fds.add_monitor(t, RR_MAGIC_SAVE_DATA_FD, new MagicSaveDataMonitor()); fds.add_monitor(t, tracee_socket_fd_number, new PreserveFileMonitor()); } diff --git a/src/replay_syscall.cc b/src/replay_syscall.cc index d508e1188c5..17da2f78857 100644 --- a/src/replay_syscall.cc +++ b/src/replay_syscall.cc @@ -1062,7 +1062,7 @@ static void handle_opened_files(ReplayTask* t, int flags) { if (emu_file) { file_monitor = new MmappedFileMonitor(t, emu_file); } else if (o.path == "terminal") { - file_monitor = new StdioMonitor(STDERR_FILENO); + file_monitor = new StdioMonitor(t->session().tracee_output_fd(STDERR_FILENO)); } else if (is_proc_mem_file(o.path.c_str())) { file_monitor = new ProcMemMonitor(t, o.path); } else if (is_proc_fd_dir(o.path.c_str())) { diff --git a/src/test/tty.run b/src/test/tty.run new file mode 100644 index 00000000000..3a3310d4792 --- /dev/null +++ b/src/test/tty.run @@ -0,0 +1,13 @@ +source `dirname $0`/util.sh +record simple$bitness +replay "--tty tty-output" + +token=EXIT-SUCCESS +if [[ "tty-output" != $(grep -l $token tty-output) ]]; then + failed ": token '$token' not in tty-output:" + echo "--------------------------------------------------" + cat tty-output + echo "--------------------------------------------------" +else + passed +fi From da2cb409e3136e7cf102500f902dac280ce1f4c6 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sun, 13 Jun 2021 18:31:09 +1200 Subject: [PATCH 068/110] Don't crash when not using --tty --- src/ReplaySession.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ReplaySession.h b/src/ReplaySession.h index ae30a3923f4..f025fdacea6 100644 --- a/src/ReplaySession.h +++ b/src/ReplaySession.h @@ -319,7 +319,7 @@ class ReplaySession : public Session { bool has_trace_quirk(TraceReader::TraceQuirks quirk) { return trace_in.quirks() & quirk; } virtual int tracee_output_fd(int dflt) override { - return tracee_output_fd_->is_open() ? tracee_output_fd_->get() : dflt; + return tracee_output_fd_.get() ? tracee_output_fd_->get() : dflt; } private: From a386a8687db2125d17e852d7e07a626f945a36ff Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 07:33:45 -0700 Subject: [PATCH 069/110] Move pkeys test to x86 dir. --- CMakeLists.txt | 2 +- src/test/{ => x86}/pkeys.c | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/test/{ => x86}/pkeys.c (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 620f4d01051..482ca728b4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -951,7 +951,7 @@ set(BASIC_TESTS pid_ns_reap pid_ns_segv pidfd - pkeys + x86/pkeys poll_sig_race ppoll prctl diff --git a/src/test/pkeys.c b/src/test/x86/pkeys.c similarity index 100% rename from src/test/pkeys.c rename to src/test/x86/pkeys.c From 37dcab88775db878d6a925eda24220b6b01ea672 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 07:40:26 -0700 Subject: [PATCH 070/110] Move ioperm test to x86 dir. --- CMakeLists.txt | 2 +- src/test/{ => x86}/ioperm.c | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/test/{ => x86}/ioperm.c (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 482ca728b4c..f68b05efe94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -872,7 +872,7 @@ set(BASIC_TESTS ioctl_sg ioctl_tty ioctl_vt - ioperm + x86/ioperm iopl join_threads joystick diff --git a/src/test/ioperm.c b/src/test/x86/ioperm.c similarity index 100% rename from src/test/ioperm.c rename to src/test/x86/ioperm.c From 3a7609014ee7886639558f3950d7b5fb9184568f Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 08:37:08 -0700 Subject: [PATCH 071/110] Implement _raw_syscall for aarch64. --- src/preload/raw_syscall.S | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/preload/raw_syscall.S b/src/preload/raw_syscall.S index 717109198b3..1c42d86c7cd 100644 --- a/src/preload/raw_syscall.S +++ b/src/preload/raw_syscall.S @@ -136,6 +136,24 @@ _raw_syscall: .cfi_endproc .size _raw_syscall, . - _raw_syscall +#elif defined(__aarch64__) + .text + .globl _raw_syscall + .hidden _raw_syscall + .type _raw_syscall, @function +_raw_syscall: + .cfi_startproc + mov x8,x0 + mov x0,x1 + mov x1,x2 + mov x2,x3 + mov x3,x4 + mov x4,x5 + mov x5,x6 + svc #0 + ret + .cfi_endproc + .size _raw_syscall, . - _raw_syscall #else #error unknown CPU architecture #endif /* __i386__/__x86_64__ */ From 6ac98b319d6999ec18b9080e9c078a3b6b14e578 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 08:38:22 -0700 Subject: [PATCH 072/110] Suppress warnings about unused variables on AArch64. --- src/test/unexpected_exit.c | 4 +++- src/test/unexpected_exit_execve.c | 4 +++- src/test/unexpected_exit_execve_twice.c | 4 +++- src/test/unexpected_exit_pid_ns.c | 4 +++- src/util.cc | 3 +++ src/util.h | 7 +++++++ 6 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/test/unexpected_exit.c b/src/test/unexpected_exit.c index fd7d262bb76..6a5e777e44d 100644 --- a/src/test/unexpected_exit.c +++ b/src/test/unexpected_exit.c @@ -22,11 +22,12 @@ static int do_child(void) { /* Do a busy delay loop that changes registers so won't trigger rr's spinlock-detection heuristic */ -static void delay(void) { +static char delay(void) { #if defined(__x86_64__) || defined(__i386__) asm("mov $10000000,%%ecx\n\t" "1: loop 1b\n\t" : : : "ecx"); + return 0; #else /* Does this actually change registers on ARM??? */ int i; @@ -34,6 +35,7 @@ static void delay(void) { for (i = 0; i < 10000000; ++i) { ch = i % 3; } + return ch; #endif } diff --git a/src/test/unexpected_exit_execve.c b/src/test/unexpected_exit_execve.c index 93db859d52b..98d99388a7a 100644 --- a/src/test/unexpected_exit_execve.c +++ b/src/test/unexpected_exit_execve.c @@ -22,11 +22,12 @@ static int do_child(void) { /* Do a busy delay loop that changes registers so won't trigger rr's spinlock-detection heuristic */ -static void delay(void) { +static char delay(void) { #if defined(__x86_64__) || defined(__i386__) asm("mov $10000000,%%ecx\n\t" "1: loop 1b\n\t" : : : "ecx"); + return 0; #else /* Does this actually change registers on ARM??? */ int i; @@ -34,6 +35,7 @@ static void delay(void) { for (i = 0; i < 10000000; ++i) { ch = i % 3; } + return ch; #endif } diff --git a/src/test/unexpected_exit_execve_twice.c b/src/test/unexpected_exit_execve_twice.c index e784130aec9..2e4f93795e0 100644 --- a/src/test/unexpected_exit_execve_twice.c +++ b/src/test/unexpected_exit_execve_twice.c @@ -22,11 +22,12 @@ static int do_child(void) { /* Do a busy delay loop that changes registers so won't trigger rr's spinlock-detection heuristic */ -static void delay(void) { +static char delay(void) { #if defined(__x86_64__) || defined(__i386__) asm("mov $10000000,%%ecx\n\t" "1: loop 1b\n\t" : : : "ecx"); + return 0; #else /* Does this actually change registers on ARM??? */ int i; @@ -34,6 +35,7 @@ static void delay(void) { for (i = 0; i < 10000000; ++i) { ch = i % 3; } + return ch; #endif } diff --git a/src/test/unexpected_exit_pid_ns.c b/src/test/unexpected_exit_pid_ns.c index 03dfaf655be..80cf65fa218 100644 --- a/src/test/unexpected_exit_pid_ns.c +++ b/src/test/unexpected_exit_pid_ns.c @@ -7,11 +7,12 @@ static int child_to_parent[2]; /* Do a busy delay loop that changes registers so won't trigger rr's spinlock-detection heuristic */ -static void delay(void) { +static char delay(void) { #if defined(__x86_64__) || defined(__i386__) asm("mov $10000000,%%ecx\n\t" "1: loop 1b\n\t" : : : "ecx", "memory"); + return 0; #else /* Does this actually change registers on ARM??? */ int i; @@ -19,6 +20,7 @@ static void delay(void) { for (i = 0; i < 10000000; ++i) { ch = i % 3; } + return ch; #endif } diff --git a/src/util.cc b/src/util.cc index 338b30ca2ee..3007a4593b5 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1762,6 +1762,9 @@ bool is_advanced_pc_and_signaled_instruction(Task* t, remote_code_ptr ip) { !memcmp(insn, int3_insn, sizeof(int3_insn))) { return true; } +#else + UNUSED(t); + UNUSED(ip); #endif return false; } diff --git a/src/util.h b/src/util.h index bbc63c727d1..902d1c47baf 100644 --- a/src/util.h +++ b/src/util.h @@ -28,6 +28,13 @@ #define SOL_NETLINK 270 #endif +#define UNUSED(expr) \ + do { \ + if (expr) { \ + (void)0; \ + } \ + } while (0) + namespace rr { /* From 3e22528d9319fc88fdbfbfba49dd958f16270809 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 08:39:36 -0700 Subject: [PATCH 073/110] Modify alignment directed for AArch64 which uses powers of 2 instead of numbers of bytes as arguments. --- src/preload/rr_page.S | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/preload/rr_page.S b/src/preload/rr_page.S index b8136f2f093..aa1e7ae5e62 100644 --- a/src/preload/rr_page.S +++ b/src/preload/rr_page.S @@ -33,6 +33,8 @@ #define TRAP \ int $3; \ ret +#define PAGE_ALIGN \ + .align 0x1000 #elif defined(__x86_64__) #define CALL \ syscall; \ @@ -43,6 +45,8 @@ #define TRAP \ nop; int $3; \ ret +#define PAGE_ALIGN \ + .align 0x1000 #elif defined(__aarch64__) #define CALL \ svc #0; \ @@ -53,19 +57,21 @@ #define TRAP \ brk #0; \ ret +#define PAGE_ALIGN \ + .align 12 #endif .section .sh_placeholder, "a" -.align 0x1000 +PAGE_ALIGN .fill 0x1000, 1, 0xff .section .vdso.text, "a", @progbits -.align 0x1000 +PAGE_ALIGN #include "rr_vdso.S" .section .record.text, "a", @progbits -.align 0x1000 +PAGE_ALIGN .global rr_page_start rr_page_start: @@ -76,7 +82,7 @@ rr_page_start: #include "rr_page_instructions.S" .section .replay.text, "", @progbits -.align 0x1000 +PAGE_ALIGN replay_page: // No CFI instructions or symbols for the replay page - we'll implicitly share // those of the record copy From e717735b47e6ca3248fc4d901ff26a60d9f79379 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 09:07:01 -0700 Subject: [PATCH 074/110] Move iopl test to x86 dir. --- CMakeLists.txt | 2 +- src/test/{ => x86}/iopl.c | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/test/{ => x86}/iopl.c (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index f68b05efe94..d9f60c910bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -873,7 +873,7 @@ set(BASIC_TESTS ioctl_tty ioctl_vt x86/ioperm - iopl + x86/iopl join_threads joystick kcmp diff --git a/src/test/iopl.c b/src/test/x86/iopl.c similarity index 100% rename from src/test/iopl.c rename to src/test/x86/iopl.c From cd611e49147a1075d91b725227d71d28a09eacd3 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 09:24:18 -0700 Subject: [PATCH 075/110] sys/io.h is an x86 specific header. --- src/test/util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/util.h b/src/test/util.h index c53135656e6..dc654176e17 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -71,7 +71,6 @@ #include #include #include -#include #include #include #include @@ -111,6 +110,7 @@ // X86 specific headers #if defined(__i386__) || defined(__x86_64__) #include +#include #include #endif From 0d857d3a97727fce890516cb394b6e6dff4fc5fa Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 22:35:33 -0700 Subject: [PATCH 076/110] Implement my_read on AArch64. --- src/test/reverse_step_threads_break.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/test/reverse_step_threads_break.c b/src/test/reverse_step_threads_break.c index 5d2351b8dc7..df53402e016 100644 --- a/src/test/reverse_step_threads_break.c +++ b/src/test/reverse_step_threads_break.c @@ -14,13 +14,25 @@ static size_t my_read(int fd, void* buf, size_t size) { #ifdef __x86_64__ __asm__("syscall\n\t" : "=a"(ret) - : "a"(SYS_read), "D"(fd), "S"(buf), "d"(size)); + : "a"(SYS_read), "D"(fd), "S"(buf), "d"(size) + : "memory"); #elif defined(__i386__) __asm__("xchg %%ebx,%%edi\n\t" "int $0x80\n\t" "xchg %%ebx,%%edi\n\t" : "=a"(ret) - : "a"(SYS_read), "c"(buf), "d"(size), "D"(fd)); + : "a"(SYS_read), "c"(buf), "d"(size), "D"(fd) + : "memory"); +#elif defined(__aarch64__) + register uint64_t x0 __asm__ ("x0") = fd; + register void *x1 __asm__ ("x1") = buf; + register uint64_t x2 __asm__ ("x2") = size; + register uint64_t x8 __asm__ ("x8") = SYS_read; + __asm__("svc #0\n\t" + : "+r"(x0) + : "r"(x1), "r"(x2), "r"(x8) + : "memory"); + ret = x0; #else #error define syscall here #endif From 29f8a00fcbd16cf69bcb92f414bdfd212562b753 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Fri, 11 Jun 2021 22:36:24 -0700 Subject: [PATCH 077/110] Move ptrace_sysemu to the x86 dir because it's very x86 specific. --- CMakeLists.txt | 2 +- src/test/{ => x86}/ptrace_sysemu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename src/test/{ => x86}/ptrace_sysemu.c (99%) diff --git a/CMakeLists.txt b/CMakeLists.txt index d9f60c910bf..6d176d0e5c8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -989,7 +989,7 @@ set(BASIC_TESTS ptrace_singlestep ptrace_syscall ptrace_syscall_clone_untraced - ptrace_sysemu + x86/ptrace_sysemu ptrace_sysemu_syscall ptrace_trace_clone ptrace_trace_exit diff --git a/src/test/ptrace_sysemu.c b/src/test/x86/ptrace_sysemu.c similarity index 99% rename from src/test/ptrace_sysemu.c rename to src/test/x86/ptrace_sysemu.c index eca2ca4cf67..04083026d11 100644 --- a/src/test/ptrace_sysemu.c +++ b/src/test/x86/ptrace_sysemu.c @@ -1,7 +1,7 @@ /* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ #include "util.h" -#include "ptrace_util.h" +#include "../ptrace_util.h" /* This tests PTRACE_SYSEMU, PTRACE_SINGLESTEP and PTRACE_SYSEMU_SINGLESTEP */ From a940db0c03279e1f89bcacff29b2df4f5fe3fd54 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Sat, 12 Jun 2021 21:21:21 -0700 Subject: [PATCH 078/110] Fix typos. --- src/Task.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Task.cc b/src/Task.cc index 9812dab99c0..3a7877a2c73 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -1501,7 +1501,7 @@ void Task::flush_regs() { registers_dirty = false; } #else - #error "Unknown archietcture" + #error "Unknown architecture" #endif } #if defined(__i386__) || defined(__x86_64__) @@ -1513,7 +1513,7 @@ void Task::flush_regs() { uintptr_t syscall = registers.original_syscallno(); struct iovec vec = { &syscall, sizeof(syscall) }; - LOG(debug) << "Chaning syscall to " << syscall; + LOG(debug) << "Changing syscall to " << syscall; if (ptrace_if_alive(PTRACE_SETREGSET, NT_ARM_SYSTEM_CALL, &vec)) { orig_syscallno_dirty = false; } From 339c53cf0119f6e20c8cea5eb8af9e75b7684201 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Sun, 13 Jun 2021 06:52:34 -0700 Subject: [PATCH 079/110] Temporarily skip building librrpreload.so on AArch64. --- CMakeLists.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d176d0e5c8..236ee16c89b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -364,6 +364,7 @@ set(PRELOAD_SOURCE_FILES rrcalls.h syscallbuf.h ) +if (x86ish) add_library(rrpreload) foreach(file ${PRELOAD_FILES}) target_sources(rrpreload PUBLIC "${CMAKE_SOURCE_DIR}/src/preload/${file}") @@ -372,6 +373,7 @@ foreach(file ${PRELOAD_FILES}) endforeach(file) set_target_properties(rrpreload PROPERTIES LINK_FLAGS "-nostartfiles ${LINKER_FLAGS}") set_target_properties(rrpreload PROPERTIES INSTALL_RPATH "\$ORIGIN") +endif() set(AUDIT_FILES rtld-audit.c @@ -575,9 +577,11 @@ endif() set_target_properties(rr PROPERTIES LINK_FLAGS "${RR_MAIN_LINKER_FLAGS}") +if (x86ish) target_link_libraries(rrpreload ${CMAKE_DL_LIBS} ) +endif() add_executable(rr_exec_stub src/exec_stub.c) post_build_executable(rr_exec_stub) @@ -639,7 +643,11 @@ install(PROGRAMS scripts/signal-rr-recording.sh install(PROGRAMS scripts/rr_completion DESTINATION ${CMAKE_INSTALL_DATADIR}/bash-completion/completions RENAME rr) -install(TARGETS ${RR_BIN} rrpreload rrpage rraudit rr_exec_stub +set(RR_INSTALL_LIBS rrpage rraudit rr_exec_stub) +if (x86ish) + set(RR_INSTALL_LIBS rrpreload ${RR_INSTALL_LIBS}) +endif() +install(TARGETS ${RR_BIN} ${RR_INSTALL_LIBS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}/rr) From 09e56d730275ceeccb74b86152972bfbb192150a Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Sun, 13 Jun 2021 07:41:01 -0700 Subject: [PATCH 080/110] Don't make an iovec of an iovec! --- src/Task.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Task.cc b/src/Task.cc index 3a7877a2c73..cc65b490e77 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -1495,9 +1495,7 @@ void Task::flush_regs() { orig_syscallno_dirty = false; } #elif defined(__aarch64__) - struct iovec vec = { &ptrace_regs, - sizeof(ptrace_regs) }; - if (ptrace_if_alive(PTRACE_SETREGSET, NT_PRSTATUS, &vec)) { + if (ptrace_if_alive(PTRACE_SETREGSET, NT_PRSTATUS, &ptrace_regs)) { registers_dirty = false; } #else From cadfb062d3fdf63a8600d4522facb603de00f639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Wed, 9 Jun 2021 17:36:07 +0200 Subject: [PATCH 081/110] Use external timeout if that is greater. --- src/test/conditional_breakpoint_offload.run | 2 +- src/test/condvar_stress.run | 2 +- src/test/fork_stress.run | 2 +- src/test/mutex_pi_stress.run | 2 +- src/test/record_replay.run | 2 +- src/test/thread_open_race.run | 2 +- src/test/thread_stress.run | 2 +- src/test/vsyscall_reverse_next.run | 2 +- src/test/x86/string_instructions.run | 2 +- src/test/x86/string_instructions_async_signals.run | 2 +- src/test/x86/string_instructions_async_signals_shared.run | 2 +- src/test/x86/string_instructions_break.run | 2 +- src/test/x86/string_instructions_multiwatch.run | 2 +- src/test/x86/string_instructions_replay.run | 2 +- src/test/x86/string_instructions_replay_quirk.run | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/test/conditional_breakpoint_offload.run b/src/test/conditional_breakpoint_offload.run index 5641c9ed3fb..d3dc1eae0a0 100644 --- a/src/test/conditional_breakpoint_offload.run +++ b/src/test/conditional_breakpoint_offload.run @@ -1,3 +1,3 @@ source `dirname $0`/util.sh -TIMEOUT=300 +if [ $TIMEOUT -lt 300 ]; then TIMEOUT=300; fi debug_test diff --git a/src/test/condvar_stress.run b/src/test/condvar_stress.run index d3661713dae..68e616cf1f5 100644 --- a/src/test/condvar_stress.run +++ b/src/test/condvar_stress.run @@ -2,5 +2,5 @@ source `dirname $0`/util.sh # Switch threads very eagerly on recorded events. RECORD_ARGS="-s" -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/fork_stress.run b/src/test/fork_stress.run index 9802f031d0d..08226ab6b4a 100644 --- a/src/test/fork_stress.run +++ b/src/test/fork_stress.run @@ -1,4 +1,4 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/mutex_pi_stress.run b/src/test/mutex_pi_stress.run index d3661713dae..68e616cf1f5 100644 --- a/src/test/mutex_pi_stress.run +++ b/src/test/mutex_pi_stress.run @@ -2,5 +2,5 @@ source `dirname $0`/util.sh # Switch threads very eagerly on recorded events. RECORD_ARGS="-s" -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/record_replay.run b/src/test/record_replay.run index 885812abd22..b35c3c88cd0 100644 --- a/src/test/record_replay.run +++ b/src/test/record_replay.run @@ -1,5 +1,5 @@ source `dirname $0`/util.sh -TIMEOUT=300 +if [ $TIMEOUT -lt 300 ]; then TIMEOUT=300; fi record record_replay_subject$bitness just_record rr "--suppress-environment-warnings replay -a $workdir/*-0" replay diff --git a/src/test/thread_open_race.run b/src/test/thread_open_race.run index 908f7827d6b..a8840d0efbe 100644 --- a/src/test/thread_open_race.run +++ b/src/test/thread_open_race.run @@ -3,6 +3,6 @@ source `dirname $0`/util.sh # This test requires syscallbuf syscall patching skip_if_no_syscall_buf -TIMEOUT=300 +if [ $TIMEOUT -lt 300 ]; then TIMEOUT=300; fi compare_test EXIT-SUCCESS diff --git a/src/test/thread_stress.run b/src/test/thread_stress.run index 9802f031d0d..08226ab6b4a 100644 --- a/src/test/thread_stress.run +++ b/src/test/thread_stress.run @@ -1,4 +1,4 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/vsyscall_reverse_next.run b/src/test/vsyscall_reverse_next.run index 5641c9ed3fb..d3dc1eae0a0 100644 --- a/src/test/vsyscall_reverse_next.run +++ b/src/test/vsyscall_reverse_next.run @@ -1,3 +1,3 @@ source `dirname $0`/util.sh -TIMEOUT=300 +if [ $TIMEOUT -lt 300 ]; then TIMEOUT=300; fi debug_test diff --git a/src/test/x86/string_instructions.run b/src/test/x86/string_instructions.run index 9802f031d0d..08226ab6b4a 100644 --- a/src/test/x86/string_instructions.run +++ b/src/test/x86/string_instructions.run @@ -1,4 +1,4 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/x86/string_instructions_async_signals.run b/src/test/x86/string_instructions_async_signals.run index ab81f7f6835..7a5599b14f0 100644 --- a/src/test/x86/string_instructions_async_signals.run +++ b/src/test/x86/string_instructions_async_signals.run @@ -1,6 +1,6 @@ source `dirname $0`/util.sh RECORD_ARGS="-c1000" -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/x86/string_instructions_async_signals_shared.run b/src/test/x86/string_instructions_async_signals_shared.run index ab81f7f6835..7a5599b14f0 100644 --- a/src/test/x86/string_instructions_async_signals_shared.run +++ b/src/test/x86/string_instructions_async_signals_shared.run @@ -1,6 +1,6 @@ source `dirname $0`/util.sh RECORD_ARGS="-c1000" -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi compare_test EXIT-SUCCESS diff --git a/src/test/x86/string_instructions_break.run b/src/test/x86/string_instructions_break.run index cc9681a1651..14de8ac41e3 100644 --- a/src/test/x86/string_instructions_break.run +++ b/src/test/x86/string_instructions_break.run @@ -1,5 +1,5 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi record string_instructions_replay$bitness debug x86/string_instructions_break diff --git a/src/test/x86/string_instructions_multiwatch.run b/src/test/x86/string_instructions_multiwatch.run index 080818d751c..0463cb6902c 100644 --- a/src/test/x86/string_instructions_multiwatch.run +++ b/src/test/x86/string_instructions_multiwatch.run @@ -1,4 +1,4 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi debug_test diff --git a/src/test/x86/string_instructions_replay.run b/src/test/x86/string_instructions_replay.run index 1b0e1dda0dd..b3e39b47d33 100644 --- a/src/test/x86/string_instructions_replay.run +++ b/src/test/x86/string_instructions_replay.run @@ -1,6 +1,6 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi record $TESTNAME & diff --git a/src/test/x86/string_instructions_replay_quirk.run b/src/test/x86/string_instructions_replay_quirk.run index 18b73177f50..ad709863d88 100644 --- a/src/test/x86/string_instructions_replay_quirk.run +++ b/src/test/x86/string_instructions_replay_quirk.run @@ -1,5 +1,5 @@ source `dirname $0`/util.sh -TIMEOUT=600 +if [ $TIMEOUT -lt 600 ]; then TIMEOUT=600; fi record string_instructions_replay$bitness debug x86/string_instructions_replay_quirk From 8345ba685e15ba114c0c6fd979d967d61e19f0ee Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Mon, 14 Jun 2021 16:34:57 +1200 Subject: [PATCH 082/110] Support 'end' event spec in 'rr dump' to select the last event in the trace --- src/DumpCommand.cc | 58 ++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/src/DumpCommand.cc b/src/DumpCommand.cc index bd941e428fd..b78e07380b8 100644 --- a/src/DumpCommand.cc +++ b/src/DumpCommand.cc @@ -36,7 +36,8 @@ DumpCommand DumpCommand::singleton( "dump", " rr dump [OPTIONS] [] [...]\n" " Event specs can be either an event number like `127', or a range\n" - " like `1000-5000'. By default, all events are dumped.\n" + " like `1000-5000', or `end' for the last record in the trace.\n" + " By default, all events are dumped.\n" " -b, --syscallbuf dump syscallbuf contents\n" " -e, --task-events dump task events\n" " -m, --recorded-metadata dump recorded data metadata\n" @@ -165,30 +166,21 @@ static void dump_task_event(FILE* out, const TraceTaskEvent& event) { * event sets. No attempt is made to enforce this or normalize specs. */ static void dump_events_matching(TraceReader& trace, const DumpFlags& flags, - FILE* out, const string* spec) { + FILE* out, const string* spec, + const unordered_map& task_events) { uint32_t start = 0, end = numeric_limits::max(); + bool only_end = false; - // Try to parse the "range" syntax '[start]-[end]'. - if (spec && 2 > sscanf(spec->c_str(), "%u-%u", &start, &end)) { - // Fall back on assuming the spec is a single event - // number, however it parses out with atoi(). - start = end = atoi(spec->c_str()); - } - - unordered_map task_events; - FrameTime last_time = 0; - while (true) { - FrameTime time; - TraceTaskEvent r = trace.read_task_event(&time); - if (time < last_time) { - FATAL() << "TraceTaskEvent times non-monotonic"; - } - if (r.type() == TraceTaskEvent::NONE) { - break; + if (spec && *spec == "end") { + only_end = true; + } else { + // Try to parse the "range" syntax '[start]-[end]'. + if (spec && 2 > sscanf(spec->c_str(), "%u-%u", &start, &end)) { + // Fall back on assuming the spec is a single event + // number, however it parses out with atoi(). + start = end = atoi(spec->c_str()); } - task_events.insert(make_pair(time, r)); - last_time = time; } bool process_raw_data = @@ -198,8 +190,9 @@ static void dump_events_matching(TraceReader& trace, const DumpFlags& flags, if (end < frame.time()) { return; } - if (start <= frame.time() && frame.time() <= end && - (!flags.only_tid || flags.only_tid == frame.tid())) { + if (only_end ? trace.at_end() : + (start <= frame.time() && frame.time() <= end && + (!flags.only_tid || flags.only_tid == frame.tid()))) { if (flags.raw_dump) { frame.dump_raw(out); } else { @@ -311,13 +304,28 @@ void dump(const string& trace_dir, const DumpFlags& flags, "eax ebx ecx edx esi edi ebp orig_eax esp eip eflags\n"); } + unordered_map task_events; + FrameTime last_time = 0; + while (true) { + FrameTime time; + TraceTaskEvent r = trace.read_task_event(&time); + if (time < last_time) { + FATAL() << "TraceTaskEvent times non-monotonic"; + } + if (r.type() == TraceTaskEvent::NONE) { + break; + } + task_events.insert(make_pair(time, r)); + last_time = time; + } + if (specs.size() > 0) { for (size_t i = 0; i < specs.size(); ++i) { - dump_events_matching(trace, flags, out, &specs[i]); + dump_events_matching(trace, flags, out, &specs[i], task_events); } } else { // No specs => dump all events. - dump_events_matching(trace, flags, out, nullptr /*all events*/); + dump_events_matching(trace, flags, out, nullptr /*all events*/, task_events); } if (flags.dump_statistics) { From 7854be5362baadc0143b956279e96f3c4f511dfa Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 15 Jun 2021 10:29:34 +1200 Subject: [PATCH 083/110] Make io_uring return ENOSYS for now --- CMakeLists.txt | 1 + src/record_syscall.cc | 7 ++++--- src/syscalls.py | 2 +- src/test/io_uring.c | 44 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 4 deletions(-) create mode 100644 src/test/io_uring.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 236ee16c89b..8cdc1a31321 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -873,6 +873,7 @@ set(BASIC_TESTS invalid_fcntl invalid_ioctl io + io_uring ioctl ioctl_fb ioctl_fs diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 3946a57e416..9e08eccebeb 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -3967,10 +3967,10 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, (size_t)regs.arg4())); return PREVENT_SWITCH; + case Arch::io_uring_setup: case Arch::io_setup: { - // Prevent the io_setup from running and fake an ENOSYS return. We want - // to discourage applications from using this API because the async - // reads are writes by the kernel that can race with userspace execution. + // Prevent the io_setup/io_uring from running and fake an ENOSYS return. We want + // to stop applications from using these APIs because we don't support them currently. Registers r = regs; r.set_arg2(0); t->set_regs(r); @@ -6227,6 +6227,7 @@ static void rec_process_syscall_arch(RecordTask* t, case Arch::futex: case Arch::ioctl: case Arch::io_setup: + case Arch::io_uring_setup: case Arch::madvise: case Arch::memfd_create: case Arch::mprotect: diff --git a/src/syscalls.py b/src/syscalls.py index 188359ec3f4..929d0a0a76b 100644 --- a/src/syscalls.py +++ b/src/syscalls.py @@ -1707,7 +1707,7 @@ def __init__(self, **kwargs): # x86-64 decided to skip ahead here to catchup pidfd_send_signal = UnsupportedSyscall(x86=424, x64=424, generic=424) -io_uring_setup = UnsupportedSyscall(x86=425, x64=425, generic=425) +io_uring_setup = IrregularEmulatedSyscall(x86=425, x64=425, generic=425) io_uring_enter = UnsupportedSyscall(x86=426, x64=426, generic=426) io_uring_register = UnsupportedSyscall(x86=427, x64=427, generic=427) open_tree = UnsupportedSyscall(x86=428, x64=428, generic=428) diff --git a/src/test/io_uring.c b/src/test/io_uring.c new file mode 100644 index 00000000000..a2bf6e1c6e4 --- /dev/null +++ b/src/test/io_uring.c @@ -0,0 +1,44 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +struct io_sqring_offsets { + uint32_t head; + uint32_t tail; + uint32_t ring_mask; + uint32_t ring_entries; + uint32_t flags; + uint32_t dropped; + uint32_t array; + uint32_t resv[3]; +}; + +struct io_cqring_offsets { + uint32_t head; + uint32_t tail; + uint32_t ring_mask; + uint32_t ring_entries; + uint32_t overflow; + uint32_t cqes; + uint32_t flags; + uint32_t resv[3]; +}; + +struct io_uring_params { + uint32_t sq_entries; + uint32_t cq_entries; + uint32_t flags; + uint32_t sq_thread_idle; + uint32_t features; + uint32_t resv[4]; + struct io_sqring_offsets sq_off; + struct io_cqring_offsets cq_off; +}; + +int main(void) { + struct io_uring_params params; + int ret = syscall(RR_io_uring_setup, 32, ¶ms); + test_assert(ret == -1 && errno == ENOSYS); + atomic_puts("EXIT-SUCCESS"); + return 0; +} From 38571520bee9360aeb454a14be1de43e7490f324 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Wed, 16 Jun 2021 16:38:46 +1200 Subject: [PATCH 084/110] Ensure our VDSO has DT_HASH and DT_VERSYM/DT_VERDEF/DT_VERDEFNUM entries --- CMakeLists.txt | 5 +++-- src/preload/rr_page.ld | 6 ++++++ src/preload/rr_vdso.S | 6 ++++++ src/test/util.h | 1 + src/test/vdso_parts.c | 45 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 src/test/vdso_parts.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cdc1a31321..00cbe02d904 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -342,7 +342,7 @@ foreach(file ${RR_PAGE_FILES}) set_source_files_properties("${CMAKE_SOURCE_DIR}/src/preload/${file}" PROPERTIES COMPILE_FLAGS ${PRELOAD_COMPILE_FLAGS}) endforeach(file) -set_target_properties(rrpage PROPERTIES LINK_FLAGS "-Wl,-T -Wl,${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld -nostartfiles -nostdlib -Wl,-z,max-page-size=0x1000 ${LINKER_FLAGS}") +set_target_properties(rrpage PROPERTIES LINK_FLAGS "-Wl,-T -Wl,${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld -Wl,--hash-style=both -nostartfiles -nostdlib -Wl,-z,max-page-size=0x1000 ${LINKER_FLAGS}") set_target_properties(rrpage PROPERTIES LINK_DEPENDS ${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld) # CMake seems to have trouble generating the link line without this set_target_properties(rrpage PROPERTIES LINKER_LANGUAGE C) @@ -1141,6 +1141,7 @@ set(BASIC_TESTS unshare userfaultfd utimes + vdso_parts vfork_flush vfork_shared video_capture @@ -1319,8 +1320,8 @@ set(TESTS_WITH_PROGRAM ttyname unexpected_stack_growth user_ignore_sig - vdso_gettimeofday_stack vdso_clock_gettime_stack + vdso_gettimeofday_stack vdso_time_stack vfork vfork_read_clone_stress diff --git a/src/preload/rr_page.ld b/src/preload/rr_page.ld index 552ef883c2b..f0e5ca909f5 100644 --- a/src/preload/rr_page.ld +++ b/src/preload/rr_page.ld @@ -37,3 +37,9 @@ SECTIONS .replay.text : { *(.replay.text) } :replay /DISCARD/ : { *(.debug_* ) } } + +VERSION { + VER_1 { + global:__vdso_time; + }; +} \ No newline at end of file diff --git a/src/preload/rr_vdso.S b/src/preload/rr_vdso.S index 82dd43870e5..5953c616e08 100644 --- a/src/preload/rr_vdso.S +++ b/src/preload/rr_vdso.S @@ -38,6 +38,9 @@ WEAK_ALIAS(time, __vdso_time) WEAK_ALIAS(clock_gettime, __vdso_clock_gettime) WEAK_ALIAS(gettimeofday,__vdso_gettimeofday) +// Dummy versioned symbol to trigger presence of DT_VERDEF/DT_VERSYM/DT_VERDEFNUM: +.symver __vdso_time,__vdso_time@VER_1 + #elif defined(__i386__) // __vdso functions use the C calling convention, so @@ -89,6 +92,9 @@ WEAK_ALIAS(clock_gettime, __vdso_clock_gettime) WEAK_ALIAS(clock_gettime64, __vdso_clock_gettime64) WEAK_ALIAS(gettimeofday,__vdso_gettimeofday) +// Dummy versioned symbol to trigger presence of DT_VERDEF/DT_VERSYM/DT_VERDEFNUM: +.symver __vdso_time,__vdso_time@VER_1 + #else #error "VDSO Hooks not defined for this platform" diff --git a/src/test/util.h b/src/test/util.h index dc654176e17..39c80aa5496 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include diff --git a/src/test/vdso_parts.c b/src/test/vdso_parts.c new file mode 100644 index 00000000000..9e067a9d914 --- /dev/null +++ b/src/test/vdso_parts.c @@ -0,0 +1,45 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +#ifdef __x86_64__ +static int found_dyn(Elf64_Dyn* dyn, Elf64_Sxword tag) { + while (dyn->d_tag != DT_NULL) { + if (dyn->d_tag == tag) { + return 1; + } + ++dyn; + } + return 0; +} +#endif + +int main(void) { +#ifdef __x86_64__ + char* vdso = (char*)getauxval(AT_SYSINFO_EHDR); + Elf64_Ehdr* ehdr = (Elf64_Ehdr*)vdso; + Elf64_Phdr* dynamic = NULL; + for (int i = 0; i < ehdr->e_phnum; ++i) { + Elf64_Phdr* phdr = (Elf64_Phdr*)(vdso + ehdr->e_phoff + i*ehdr->e_phentsize); + if (phdr->p_type == PT_DYNAMIC) { + dynamic = phdr; + break; + } + } + if (!dynamic) { + atomic_puts("PT_DYNAMIC not found in VDSO"); + return 1; + } + Elf64_Dyn* dyn = (Elf64_Dyn*)(vdso + dynamic->p_offset); + test_assert(found_dyn(dyn, DT_HASH)); + test_assert(found_dyn(dyn, DT_SYMTAB)); + test_assert(found_dyn(dyn, DT_STRTAB)); + test_assert(found_dyn(dyn, DT_VERSYM)); + test_assert(found_dyn(dyn, DT_VERDEF)); + test_assert(found_dyn(dyn, DT_VERDEFNUM)); + test_assert(found_dyn(dyn, DT_STRSZ)); +#endif + + atomic_puts("EXIT-SUCCESS"); + return 0; +} From 24df90a75d8834c086cd524f63426749f561db6b Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Thu, 17 Jun 2021 09:23:33 +1200 Subject: [PATCH 085/110] FS_IOC_FIEMAP can return EOPNOTSUPP --- src/test/ioctl_fs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/ioctl_fs.c b/src/test/ioctl_fs.c index a371c30e53e..b81d623e438 100644 --- a/src/test/ioctl_fs.c +++ b/src/test/ioctl_fs.c @@ -14,13 +14,13 @@ int main(void) { test_assert(fd >= 0); ret = ioctl(fd, FS_IOC_GETVERSION, &version); if (ret < 0) { - test_assert(errno == ENOTTY); + test_assert(errno == ENOTTY || errno == EOPNOTSUPP); } else { atomic_printf("version=%ld\n", version); } ret = ioctl(fd, FS_IOC_GETFLAGS, &flags); if (ret < 0) { - test_assert(errno == ENOTTY); + test_assert(errno == ENOTTY || errno == EOPNOTSUPP); } else { atomic_printf("flags=%lx\n", flags); } @@ -33,7 +33,7 @@ int main(void) { fm->fm_length = FIEMAP_MAX_OFFSET - fm->fm_start; ret = ioctl(fd, FS_IOC_FIEMAP, fm); if (ret < 0) { - test_assert(errno == ENOTTY); + test_assert(errno == ENOTTY || errno == EOPNOTSUPP); } else { atomic_printf("fm->fm_mapped_extents=%d\n", fm->fm_mapped_extents); for (unsigned int i=0; i < fm->fm_mapped_extents; i++) { From d1b5b788c16bd49e3be0119954d28943ae0bc503 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Fri, 18 Jun 2021 16:27:35 +1200 Subject: [PATCH 086/110] Fix some missing error handling exits --- src/Dwarf.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Dwarf.cc b/src/Dwarf.cc index dd859e19ecd..c86f1dec94c 100644 --- a/src/Dwarf.cc +++ b/src/Dwarf.cc @@ -453,7 +453,7 @@ template void DwarfCompilationUnit::init_size(DwarfSpan* debug_info template void DwarfCompilationUnit::init(DwarfSpan* debug_info, DwarfAbbrevs& abbrevs, bool* ok) { DwarfSpan span(*debug_info); auto h = span.read(ok); - if (!ok) { + if (!*ok) { return; } uint64_t length = h->preamble.unit_length; @@ -465,6 +465,9 @@ template void DwarfCompilationUnit::init(DwarfSpan* debug_info, Dwa debug_info->consume(length + sizeof(h->preamble)); DwarfAbbrevSet& abbrev_set = abbrevs.lookup(h->debug_abbrev_offset); die_ = make_unique(span, abbrev_set, sizeof(typename H::Size::Offset), h->address_size, ok); + if (!*ok) { + return; + } if (die_->tag() != DW_TAG_compile_unit && die_->tag() != DW_TAG_partial_unit && die_->tag() != DW_TAG_skeleton_unit) { From 5cd6e25788dd17e292f155db0e758cc81bc10e38 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sat, 19 Jun 2021 00:10:26 +1200 Subject: [PATCH 087/110] Use the correct size for DWARF5 compilation units --- src/Dwarf.cc | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/src/Dwarf.cc b/src/Dwarf.cc index c86f1dec94c..e297f35ed5b 100644 --- a/src/Dwarf.cc +++ b/src/Dwarf.cc @@ -45,14 +45,23 @@ template struct __attribute__((packed)) Dwarf5CompilationUnitHeade uint8_t unit_type; uint8_t address_size; typename D::Offset debug_abbrev_offset; + + void install_dwo_id(DwarfCompilationUnit* unit) const { + unit->set_dwo_id(0); + } +}; + +template struct __attribute__((packed)) Dwarf5SkeletonSplitCompilationUnitHeader { + typedef D Size; + typename D::CompilationUnitPreamble preamble; + uint16_t version; + uint8_t unit_type; + uint8_t address_size; + typename D::Offset debug_abbrev_offset; uint64_t dwo_id; void install_dwo_id(DwarfCompilationUnit* unit) const { - if (version == 5 && (unit_type == DW_UT_skeleton || unit_type == DW_UT_split_compile)) { - unit->set_dwo_id(dwo_id); - } else { - unit->set_dwo_id(0); - } + unit->set_dwo_id(dwo_id); } }; @@ -443,7 +452,15 @@ template void DwarfCompilationUnit::init_size(DwarfSpan* debug_info if (2 <= h->version && h->version <= 4) { init>(debug_info, abbrevs, ok); } else if (h->version == 5) { - init>(debug_info, abbrevs, ok); + auto hh = DwarfSpan(*debug_info).read>(ok); + if (!ok) { + return; + } + if (hh->unit_type == DW_UT_skeleton || hh->unit_type == DW_UT_split_compile) { + init>(debug_info, abbrevs, ok); + } else { + init>(debug_info, abbrevs, ok); + } } else { LOG(warn) << "Unknown compilation unit version " << h->version; *ok = false; From 5d9c89422c3ffb49a9a3b657f7a3af78b49fedaa Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sat, 19 Jun 2021 00:15:49 +1200 Subject: [PATCH 088/110] Downgrade warnings to infos --- src/SourcesCommand.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 6f56a31e0fa..4bb42866e76 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -340,7 +340,7 @@ find_auxiliary_file(const string& original_file_name, goto found; } } - LOG(warn) << "Can't find external debuginfo file " << full_file_name; + LOG(info) << "Can't find external debuginfo file " << full_file_name; // Next try in a subdirectory called .debug full_file_name = original_file_dir + "/.debug/" + aux_file_name; @@ -349,7 +349,7 @@ find_auxiliary_file(const string& original_file_name, if (fd.is_open()) { goto found; } - LOG(warn) << "Can't find external debuginfo file " << full_file_name; + LOG(info) << "Can't find external debuginfo file " << full_file_name; // Then try in /usr/lib/debug full_file_name = "/usr/lib/debug/" + aux_file_name; @@ -358,7 +358,7 @@ find_auxiliary_file(const string& original_file_name, if (fd.is_open()) { goto found; } - LOG(warn) << "Can't find external debuginfo file " << full_file_name; + LOG(info) << "Can't find external debuginfo file " << full_file_name; // Try in an appropriate subdirectory of /usr/lib/debug full_file_name = "/usr/lib/debug" + original_file_dir + "/" + aux_file_name; @@ -367,7 +367,7 @@ find_auxiliary_file(const string& original_file_name, if (fd.is_open()) { goto found; } - LOG(warn) << "Can't find external debuginfo file " << full_file_name; + LOG(info) << "Can't find external debuginfo file " << full_file_name; // If none of those worked, give up. LOG(warn) << "Exhausted auxilliary debuginfo search locations for " << aux_file_name; From 5126dacde40adc3493fd48225260c241a5edc3b1 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sat, 19 Jun 2021 01:15:50 +1200 Subject: [PATCH 089/110] Support DW_FORM_string --- src/Dwarf.cc | 11 +++++++++++ src/Dwarf.h | 1 + 2 files changed, 12 insertions(+) diff --git a/src/Dwarf.cc b/src/Dwarf.cc index e297f35ed5b..551e6277a64 100644 --- a/src/Dwarf.cc +++ b/src/Dwarf.cc @@ -272,6 +272,15 @@ static size_t form_size(DWForm form, size_t address_size, size_t dwarf_size, Dwa case DW_FORM_strx2: return 2; case DW_FORM_strx3: return 3; case DW_FORM_strx4: return 4; + case DW_FORM_string: { + auto before = span->size(); + DwarfSpan a_span(*span); + a_span.read_null_terminated_string(ok); + if (!ok) { + return 0; + } + return before - a_span.size(); + } case DW_FORM_sec_offset: return dwarf_size; case DW_FORM_flag_present: return 0; case DW_FORM_implicit_const: return 0; @@ -394,6 +403,8 @@ static const char* decode_string(const DwarfCompilationUnit& cu, DwarfSpan span, } return debug_strs.debug_str.subspan(offset).read_null_terminated_string(ok); } + case DW_FORM_string: + return span.read_null_terminated_string(ok); default: LOG(warn) << "Unknown string form " << form; *ok = false; diff --git a/src/Dwarf.h b/src/Dwarf.h index a7796e0550c..eb2d3dba844 100644 --- a/src/Dwarf.h +++ b/src/Dwarf.h @@ -40,6 +40,7 @@ enum DWForm { DW_FORM_data2 = 0x05, DW_FORM_data4 = 0x06, DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, DW_FORM_data1 = 0x0b, DW_FORM_flag = 0x0c, DW_FORM_strp = 0x0e, From 99d3d4173badf63071eb05259e113dd3a398c5cb Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sat, 19 Jun 2021 12:57:16 +1200 Subject: [PATCH 090/110] Cache access() calls to avoid syscalls Before: [roc@localhost code]$ time rr sources ~/pernosco/main/test-tmp/basics-demo >& ~/tmp/output2 real 3m19.648s user 1m9.157s sys 2m9.416s After: [roc@localhost code]$ time rr sources ~/pernosco/main/test-tmp/basics-demo >& ~/tmp/output2 real 0m36.160s user 0m36.009s sys 0m0.053s --- src/SourcesCommand.cc | 51 ++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 4bb42866e76..8e4cbea597a 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -122,6 +122,19 @@ static void prepend_path(const char* prefix, string& s) { } } +struct DirExistsCache { + unordered_map cache; + bool dir_exists(const string& dir) { + auto it = cache.find(dir); + if (it != cache.end()) { + return it->second; + } + bool exists = access(dir.c_str(), F_OK) == 0; + cache.insert(make_pair(dir, exists)); + return exists; + } +}; + // Resolve a file name relative to a compilation directory and relative directory. // file_name cannot be null, but the others can be. // Takes into accout the original file name as follows: @@ -134,7 +147,8 @@ static void prepend_path(const char* prefix, string& s) { // of original_file_name, and if we find a file there, we return that name. static string resolve_file_name(const char* original_file_name, const char* comp_dir, const char* rel_dir, - const char* file_name) { + const char* file_name, + DirExistsCache& dir_exists_cache) { string path = file_name; if (is_absolute(path)) { return path; @@ -161,8 +175,7 @@ static string resolve_file_name(const char* original_file_name, return path; } string candidate = original + "/" + path; - int ret = access(candidate.c_str(), F_OK); - if (!ret) { + if (dir_exists_cache.dir_exists(candidate)) { return candidate; } } @@ -182,7 +195,8 @@ static bool process_compilation_units(ElfFileReader& reader, const string& trace_relative_name, const string& original_file_name, const string& comp_dir_substitution, - set* file_names, vector* dwos) { + set* file_names, vector* dwos, + DirExistsCache& dir_exists_cache) { DwarfSpan debug_info = reader.dwarf_section(".debug_info"); DwarfSpan debug_abbrev = reader.dwarf_section(".debug_abbrev"); DwarfSpan debug_str = reader.dwarf_section(".debug_str"); @@ -248,7 +262,7 @@ static bool process_compilation_units(ElfFileReader& reader, } } if (has_dwo_id) { - string full_name = resolve_file_name(original_file_name.c_str(), comp_dir, nullptr, dwo_name); + string full_name = resolve_file_name(original_file_name.c_str(), comp_dir, nullptr, dwo_name, dir_exists_cache); string c; if (comp_dir) { c = comp_dir; @@ -263,7 +277,7 @@ static bool process_compilation_units(ElfFileReader& reader, continue; } if (source_file_name) { - file_names->insert(resolve_file_name(original_file_name.c_str(), comp_dir, nullptr, source_file_name)); + file_names->insert(resolve_file_name(original_file_name.c_str(), comp_dir, nullptr, source_file_name, dir_exists_cache)); } intptr_t stmt_list = cu.die().section_ptr_attr(DW_AT_stmt_list, &ok); if (stmt_list < 0 || !ok) { @@ -279,7 +293,7 @@ static bool process_compilation_units(ElfFileReader& reader, continue; } const char* dir = lines.directories()[f.directory_index]; - file_names->insert(resolve_file_name(original_file_name.c_str(), comp_dir, dir, f.file_name)); + file_names->insert(resolve_file_name(original_file_name.c_str(), comp_dir, dir, f.file_name, dir_exists_cache)); } } while (!debug_info.empty()); @@ -425,7 +439,8 @@ static bool process_auxiliary_file(ElfFileReader& trace_file_reader, const map& comp_dir_substitutions, vector* dwos, set* external_debug_info, - bool already_used_file) { + bool already_used_file, + DirExistsCache& dir_exists_cache) { string build_id = trace_file_reader.read_buildid(); if (build_id.empty()) { LOG(warn) << "Main ELF binary has no build ID!"; @@ -440,12 +455,12 @@ static bool process_auxiliary_file(ElfFileReader& trace_file_reader, LOG(debug) << "\tFound comp_dir substitution " << it->second; did_work = process_compilation_units(aux_file_reader, alt_file_reader, trace_relative_name, original_file_name, - it->second, file_names, dwos); + it->second, file_names, dwos, dir_exists_cache); } else { LOG(debug) << "\tNone found"; did_work = process_compilation_units(aux_file_reader, alt_file_reader, trace_relative_name, original_file_name, - {}, file_names, dwos); + {}, file_names, dwos, dir_exists_cache); } if (!did_work) { @@ -467,7 +482,8 @@ static bool try_debuglink_file(ElfFileReader& trace_file_reader, set* file_names, const string& aux_file_name, const map& comp_dir_substitutions, vector* dwos, - set* external_debug_info) { + set* external_debug_info, + DirExistsCache& dir_exists_cache) { string full_file_name; auto reader = find_auxiliary_file(original_file_name, aux_file_name, full_file_name); @@ -488,14 +504,14 @@ static bool try_debuglink_file(ElfFileReader& trace_file_reader, trace_relative_name, original_file_name, file_names, full_file_name, DEBUGLINK, comp_dir_substitutions, - dwos, external_debug_info, false); + dwos, external_debug_info, false, dir_exists_cache); if (altlink_reader) { has_source_files |= process_auxiliary_file(trace_file_reader, *altlink_reader, nullptr, trace_relative_name, original_file_name, file_names, full_altfile_name, DEBUGALTLINK, comp_dir_substitutions, - dwos, external_debug_info, has_source_files); + dwos, external_debug_info, has_source_files, dir_exists_cache); } return has_source_files; } @@ -680,6 +696,7 @@ static int sources(const map& binary_file_names, const map external_debug_info; vector dwos; vector output_comp_dir_substitutions; + DirExistsCache dir_exists_cache; for (auto& pair : binary_file_names) { string trace_relative_name = pair.first; string original_name = pair.second; @@ -712,12 +729,12 @@ static int sources(const map& binary_file_names, const mapsecond }); has_source_files = process_compilation_units(reader, altlink_reader.get(), trace_relative_name, pair.second, - it->second, &file_names, &dwos); + it->second, &file_names, &dwos, dir_exists_cache); } else { LOG(debug) << "\tNone found"; has_source_files = process_compilation_units(reader, altlink_reader.get(), trace_relative_name, pair.second, - {}, &file_names, &dwos); + {}, &file_names, &dwos, dir_exists_cache); } /* If the original binary had source files, force the inclusion of any debugaltlink * file, even if it does not itself have compilation units (it may have relevant strings) @@ -728,7 +745,7 @@ static int sources(const map& binary_file_names, const map& binary_file_names, const map Date: Sun, 20 Jun 2021 00:09:11 +1200 Subject: [PATCH 091/110] Fix truly, mind-bogglingly broken ExternalDebugInfo::operator< --- src/SourcesCommand.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/SourcesCommand.cc b/src/SourcesCommand.cc index 8e4cbea597a..93c4a589544 100644 --- a/src/SourcesCommand.cc +++ b/src/SourcesCommand.cc @@ -308,13 +308,13 @@ struct ExternalDebugInfo { if (path < other.path) { return true; } - if (path == other.path) { + if (path > other.path) { return false; } if (build_id < other.build_id) { return true; } - if (build_id == other.build_id) { + if (build_id > other.build_id) { return false; } return type < other.type; @@ -457,7 +457,7 @@ static bool process_auxiliary_file(ElfFileReader& trace_file_reader, trace_relative_name, original_file_name, it->second, file_names, dwos, dir_exists_cache); } else { - LOG(debug) << "\tNone found"; + LOG(debug) << "\tNo comp_dir substitution found"; did_work = process_compilation_units(aux_file_reader, alt_file_reader, trace_relative_name, original_file_name, {}, file_names, dwos, dir_exists_cache); @@ -731,7 +731,7 @@ static int sources(const map& binary_file_names, const mapsecond, &file_names, &dwos, dir_exists_cache); } else { - LOG(debug) << "\tNone found"; + LOG(debug) << "\tNo comp_dir substitution found"; has_source_files = process_compilation_units(reader, altlink_reader.get(), trace_relative_name, pair.second, {}, &file_names, &dwos, dir_exists_cache); From 18d9e2d3100ceb4e9503abae8817d87bb2492767 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 22 Jun 2021 11:21:46 +1200 Subject: [PATCH 092/110] Fix faccess mode argument --- src/test/chmod.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/chmod.c b/src/test/chmod.c index b0815d63d68..4332c6f2847 100644 --- a/src/test/chmod.c +++ b/src/test/chmod.c @@ -13,9 +13,9 @@ int main(void) { test_assert(0 == access(file_path, W_OK)); test_assert(0 == fchmodat(AT_FDCWD, file_path, 0400, 0)); test_assert(0 == access(file_path, R_OK)); - test_assert(0 == faccessat(AT_FDCWD, file_path, 0400, AT_SYMLINK_NOFOLLOW) || errno == ENOSYS); + test_assert(0 == faccessat(AT_FDCWD, file_path, R_OK, AT_SYMLINK_NOFOLLOW) || errno == ENOSYS); #ifdef SYS_faccessat2 - test_assert(0 == syscall(SYS_faccessat2, AT_FDCWD, file_path, 0400, AT_SYMLINK_NOFOLLOW) || errno == ENOSYS); + test_assert(0 == syscall(SYS_faccessat2, AT_FDCWD, file_path, R_OK, AT_SYMLINK_NOFOLLOW) || errno == ENOSYS); #endif atomic_puts("EXIT-SUCCESS"); From b774becea808bdc4fe558390429846a66e3a5324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Tue, 22 Jun 2021 00:32:23 +0200 Subject: [PATCH 093/110] Change tweak_librrpage.py to relocate the last three sections. By the commit 38571520 two new sections get now added to librrpage.so. Therefore the assumption to relocate sections 12 to 14 did not work anymore. Also, librrpage.so had one section more than librrpage_32.so. This patch assumes now, not much better but a little, that the last three sections are .symtab, .strtab and .shstrtab, and have to be relocated. --- src/preload/tweak_librrpage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preload/tweak_librrpage.py b/src/preload/tweak_librrpage.py index 41139fda79d..ea62a9735fe 100755 --- a/src/preload/tweak_librrpage.py +++ b/src/preload/tweak_librrpage.py @@ -97,7 +97,7 @@ def write_uptr(is64, f, v): write_uptr(is64, f, new_table_offset) alloc_offset = new_table_offset + size - for n in range(12, 15): + for n in range(e_shnum-3, e_shnum): seek_nth_section_sh_offset(f, new_table_offset, e_shentsize, n, sh_offset_offset) sh_offs = read_uptr(is64, f) sh_size = read_uptr(is64, f) From 7b5db50c2c86566af9bc018ef72b7bbc624b5119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Mon, 21 Jun 2021 00:58:28 +0200 Subject: [PATCH 094/110] Ensure our 32-bit VDSO has the same additional sections like 64-bit VDSO. Related commit: 38571520bee9360aeb454a14be1de43e7490f324 --- CMakeLists.txt | 2 +- src/test/vdso_parts.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 00cbe02d904..8cc78ce9ad4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -672,7 +672,7 @@ if(rr_32BIT AND rr_64BIT) PROPERTIES COMPILE_FLAGS "-m32 ${PRELOAD_COMPILE_FLAGS}") endforeach(file) - set_target_properties(rrpage_32 PROPERTIES LINK_FLAGS "-m32 -Wl,-T -Wl,${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld -nostartfiles -nostdlib ${LINKER_FLAGS}") + set_target_properties(rrpage_32 PROPERTIES LINK_FLAGS "-m32 -Wl,-T -Wl,${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld -Wl,--hash-style=both -nostartfiles -nostdlib ${LINKER_FLAGS}") set_target_properties(rrpage_32 PROPERTIES LINK_DEPENDS ${CMAKE_SOURCE_DIR}/src/preload/rr_page.ld) set_target_properties(rrpage_32 PROPERTIES LINKER_LANGUAGE C) diff --git a/src/test/vdso_parts.c b/src/test/vdso_parts.c index 9e067a9d914..d75fb6646c7 100644 --- a/src/test/vdso_parts.c +++ b/src/test/vdso_parts.c @@ -12,6 +12,16 @@ static int found_dyn(Elf64_Dyn* dyn, Elf64_Sxword tag) { } return 0; } +#elif defined(__i386__) +static int found_dyn(Elf32_Dyn* dyn, Elf32_Sxword tag) { + while (dyn->d_tag != DT_NULL) { + if (dyn->d_tag == tag) { + return 1; + } + ++dyn; + } + return 0; +} #endif int main(void) { @@ -38,6 +48,29 @@ int main(void) { test_assert(found_dyn(dyn, DT_VERDEF)); test_assert(found_dyn(dyn, DT_VERDEFNUM)); test_assert(found_dyn(dyn, DT_STRSZ)); +#elif defined(__i386__) + char* vdso = (char*)getauxval(AT_SYSINFO_EHDR); + Elf32_Ehdr* ehdr = (Elf32_Ehdr*)vdso; + Elf32_Phdr* dynamic = NULL; + for (int i = 0; i < ehdr->e_phnum; ++i) { + Elf32_Phdr* phdr = (Elf32_Phdr*)(vdso + ehdr->e_phoff + i*ehdr->e_phentsize); + if (phdr->p_type == PT_DYNAMIC) { + dynamic = phdr; + break; + } + } + if (!dynamic) { + atomic_puts("PT_DYNAMIC not found in VDSO"); + return 1; + } + Elf32_Dyn* dyn = (Elf32_Dyn*)(vdso + dynamic->p_offset); + test_assert(found_dyn(dyn, DT_HASH)); + test_assert(found_dyn(dyn, DT_SYMTAB)); + test_assert(found_dyn(dyn, DT_STRTAB)); + test_assert(found_dyn(dyn, DT_VERSYM)); + test_assert(found_dyn(dyn, DT_VERDEF)); + test_assert(found_dyn(dyn, DT_VERDEFNUM)); + test_assert(found_dyn(dyn, DT_STRSZ)); #endif atomic_puts("EXIT-SUCCESS"); From 43f2829971c58e36fc1a357262bc54ca4788e346 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 22 Jun 2021 16:55:32 +1200 Subject: [PATCH 095/110] Print useful info when assertion fails --- src/record_syscall.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 9e08eccebeb..5ccc7689857 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -5516,7 +5516,9 @@ static void process_mmap(RecordTask* t, size_t length, int prot, int flags, if (rt->fd_table()->is_monitoring(f.fd)) { ASSERT(rt, rt->fd_table()->get_monitor(f.fd)->type() == - FileMonitor::Type::Mmapped); + FileMonitor::Type::Mmapped) + << "Expected monitor type Mmapped for fd " << f.fd << ", got monitor type " + << rt->fd_table()->get_monitor(f.fd)->type(); ((MmappedFileMonitor*)rt->fd_table()->get_monitor(f.fd))->revive(); } else { rt->fd_table()->add_monitor(rt, f.fd, new MmappedFileMonitor(rt, f.fd)); From 6599586d1f8f719a4a3d21a6f0992f9347783729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernhard=20=C3=9Cbelacker?= Date: Sun, 20 Jun 2021 02:10:40 +0200 Subject: [PATCH 096/110] Do not wait for SYNC_TOKEN until timeout exceeds, if subshell died already. --- src/test/ignored_async_usr1.run | 2 ++ src/test/nested_detach_kill.run | 2 ++ src/test/term_trace_cpu.run | 2 ++ src/test/term_trace_syscall.run | 2 ++ 4 files changed, 8 insertions(+) diff --git a/src/test/ignored_async_usr1.run b/src/test/ignored_async_usr1.run index 27f71c2690e..4c94ca492fd 100644 --- a/src/test/ignored_async_usr1.run +++ b/src/test/ignored_async_usr1.run @@ -3,10 +3,12 @@ source `dirname $0`/util.sh SYNC_TOKEN=disabled record $TESTNAME & +SUB_ID=$! echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 + if ! kill -0 "$SUB_ID" >/dev/null 2>&1; then failed "subshell died, no need to longer wait for '$SYNC_TOKEN'"; exit; fi done echo " done. Delivering SIGUSR1 ..." diff --git a/src/test/nested_detach_kill.run b/src/test/nested_detach_kill.run index eacb0cfe3ea..f7b59dca697 100644 --- a/src/test/nested_detach_kill.run +++ b/src/test/nested_detach_kill.run @@ -11,10 +11,12 @@ save_exe "$NEST_EXE" save_exe "$SLEEP_EXE" touch record.out just_record $NEST_EXE-$nonce "$(which rr) record --nested=detach $PWD/$SLEEP_EXE-$nonce" & +SUB_ID=$! echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 + if ! kill -0 "$SUB_ID" >/dev/null 2>&1; then failed "subshell died, no need to longer wait for '$SYNC_TOKEN'"; exit; fi done rrpid=$(parent_pid_of $(pidof $NEST_EXE-$nonce)) diff --git a/src/test/term_trace_cpu.run b/src/test/term_trace_cpu.run index 939b8cd42e4..d1d6652f67d 100644 --- a/src/test/term_trace_cpu.run +++ b/src/test/term_trace_cpu.run @@ -15,10 +15,12 @@ SYNC_TOKEN=spinning WAIT_SECS=1 record $EXE & +SUB_ID=$! echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 + if ! kill -0 "$SUB_ID" >/dev/null 2>&1; then failed "subshell died, no need to longer wait for '$SYNC_TOKEN'"; exit; fi done rrpid=$(parent_pid_of $(pidof $EXE-$nonce)) diff --git a/src/test/term_trace_syscall.run b/src/test/term_trace_syscall.run index 0e67c56c923..03e0c3b5592 100644 --- a/src/test/term_trace_syscall.run +++ b/src/test/term_trace_syscall.run @@ -3,10 +3,12 @@ source `dirname $0`/util.sh SYNC_TOKEN=sleeping record $TESTNAME & # sleep "forever" +SUB_ID=$! echo "Waiting for token '$SYNC_TOKEN' from tracee ..." until grep -q $SYNC_TOKEN record.out; do sleep 0 + if ! kill -0 "$SUB_ID" >/dev/null 2>&1; then failed "subshell died, no need to longer wait for '$SYNC_TOKEN'"; exit; fi done rrpid=$(parent_pid_of $(pidof $TESTNAME-$nonce)) From 89ae3264b9b7c93a1c1bc4f26c39a2e910577922 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 22 Jun 2021 11:37:17 -0700 Subject: [PATCH 097/110] Recognize the latest syscalls. --- src/syscalls.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/syscalls.py b/src/syscalls.py index 929d0a0a76b..6831f24af39 100644 --- a/src/syscalls.py +++ b/src/syscalls.py @@ -1723,6 +1723,10 @@ def __init__(self, **kwargs): process_madvise = UnsupportedSyscall(x86=440, x64=440, generic=440) epoll_pwait2 = UnsupportedSyscall(x86=441, x64=441, generic=441) mount_setattr = UnsupportedSyscall(x86=442, x64=442, generic=442) +# 443 reserved for quotactl_path +landlock_create_ruleset = UnsupportedSyscall(x86=444, x64=444, generic=444) +landlock_add_rule = UnsupportedSyscall(x86=445, x64=445, generic=445) +landlock_restrict_self = UnsupportedSyscall(x86=446, x64=446, generic=446) # restart_syscall is a little special. restart_syscall = RestartSyscall(x86=0, x64=219, generic=128) From 81decaeffe2f786624eec2d039bdb26eab52028a Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 22 Jun 2021 12:04:01 -0700 Subject: [PATCH 098/110] Make rseq return ENOSYS for now. --- CMakeLists.txt | 1 + src/record_syscall.cc | 6 ++++-- src/syscalls.py | 2 +- src/test/rseq.c | 18 ++++++++++++++++++ 4 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 src/test/rseq.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cc78ce9ad4..1554c112a0a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1022,6 +1022,7 @@ set(BASIC_TESTS rename rlimit robust_futex + rseq rusage samask save_data_fd diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 5ccc7689857..1f827494559 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -3968,8 +3968,9 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, return PREVENT_SWITCH; case Arch::io_uring_setup: - case Arch::io_setup: { - // Prevent the io_setup/io_uring from running and fake an ENOSYS return. We want + case Arch::io_setup: + case Arch::rseq: { + // Prevent the io_setup/io_uring_setup/rseq from running and fake an ENOSYS return. We want // to stop applications from using these APIs because we don't support them currently. Registers r = regs; r.set_arg2(0); @@ -6239,6 +6240,7 @@ static void rec_process_syscall_arch(RecordTask* t, case Arch::ptrace: case Arch::read: case Arch::readv: + case Arch::rseq: case Arch::sched_setaffinity: case Arch::userfaultfd: { // Restore the registers that we may have altered. diff --git a/src/syscalls.py b/src/syscalls.py index 6831f24af39..a4397029ea2 100644 --- a/src/syscalls.py +++ b/src/syscalls.py @@ -1682,7 +1682,7 @@ def __init__(self, **kwargs): pkey_free = EmulatedSyscall(x86=382, x64=331, generic=290) statx = EmulatedSyscall(x86=383, x64=332, generic=291, arg5="typename Arch::statx_struct") io_pgetevents = UnsupportedSyscall(x86=385, x64=333, generic=292) -rseq = UnsupportedSyscall(x86=386, x64=334, generic=293) +rseq = IrregularEmulatedSyscall(x86=386, x64=334, generic=293) clock_gettime64 = EmulatedSyscall(x86=403, arg2="typename Arch::Arch64::timespec") clock_settime64 = UnsupportedSyscall(x86=404) diff --git a/src/test/rseq.c b/src/test/rseq.c new file mode 100644 index 00000000000..86d76f5763e --- /dev/null +++ b/src/test/rseq.c @@ -0,0 +1,18 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +struct rseq { + uint32_t cpu_id_start; + uint32_t cpu_id; + uint64_t rseq_cs; + uint32_t flags; +}; + +int main(void) { + struct rseq rs; + int ret = syscall(RR_rseq, &rs, sizeof(rs), 0, 0); + test_assert(ret == -1 && errno == ENOSYS); + atomic_puts("EXIT-SUCCESS"); + return 0; +} From dfab4e7f452f738e0fbd22e81eda36841223b7db Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Thu, 24 Jun 2021 20:10:46 -0700 Subject: [PATCH 099/110] Skip call_exit on gdb 9.2 --- src/test/call_exit.py | 12 ++++++++++++ src/test/util.py | 10 +++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/test/call_exit.py b/src/test/call_exit.py index c03dab80903..1c8b262d8f4 100644 --- a/src/test/call_exit.py +++ b/src/test/call_exit.py @@ -1,5 +1,17 @@ +import sys from util import * +gdb_version = get_gdb_version() +if gdb_version < 10: + # On gdb 9.2 after calling exit(0) + # gdb's internal state is confused + # about which thread we're on, and + # the 'finish' command fails. + send_gdb('c') + expect_gdb('EXIT-SUCCESS') + ok() + sys.exit(0) + send_gdb('b main') expect_gdb('Breakpoint 1') diff --git a/src/test/util.py b/src/test/util.py index d7891265f6f..c8c7bf1f4fd 100644 --- a/src/test/util.py +++ b/src/test/util.py @@ -2,7 +2,8 @@ __all__ = [ 'expect_gdb', 'send_gdb','expect_rr', 'expect_list', 'restart_replay', 'interrupt_gdb', 'ok', - 'failed', 'iterlines_both', 'last_match', 'get_exe_arch' ] + 'failed', 'iterlines_both', 'last_match', 'get_exe_arch', + 'get_gdb_version' ] # Public API def expect_gdb(what): @@ -99,6 +100,13 @@ def get_rr_cmd(): rrargs = sys.argv[1:] return (rrargs[0], rrargs[1:]) +def get_gdb_version(): + '''Return the gdb version''' + send_gdb('python print(gdb.VERSION)') + expect_gdb(r'(\d+.\d+)') + global gdb_rr + return float(gdb_rr.match.group(1)) + def send(prog, what): try: prog.send(what) From 21cad8366c7470ac62e5bbb89a7ec03da0ae3302 Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Fri, 25 Jun 2021 15:58:10 -0400 Subject: [PATCH 100/110] Implement GET_ACTION_AVAIL and GET_NOTIF_SIZES for seccomp Fixes #2903 --- src/kernel_abi.cc | 1 + src/kernel_abi.h | 7 +++++++ src/kernel_supplement.h | 6 ++++++ src/record_syscall.cc | 4 ++++ src/test/seccomp.c | 26 ++++++++++++++++++++++++++ src/test/util.h | 6 ++++++ 6 files changed, 50 insertions(+) diff --git a/src/kernel_abi.cc b/src/kernel_abi.cc index efc8100aa03..9ad4271554a 100644 --- a/src/kernel_abi.cc +++ b/src/kernel_abi.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/src/kernel_abi.h b/src/kernel_abi.h index b597fb8d85b..90d54163f23 100644 --- a/src/kernel_abi.h +++ b/src/kernel_abi.h @@ -623,6 +623,13 @@ struct BaseArch : public wordsize, }; RR_VERIFY_TYPE(termio); + struct seccomp_notif_sizes { + uint16_t seccomp_notif; + uint16_t seccomp_notif_resp; + uint16_t seccomp_data; + }; + RR_VERIFY_TYPE(seccomp_notif_sizes); + struct serial_struct { signed_int type; signed_int line; diff --git a/src/kernel_supplement.h b/src/kernel_supplement.h index 6705ea14a78..617ed173657 100644 --- a/src/kernel_supplement.h +++ b/src/kernel_supplement.h @@ -76,6 +76,12 @@ namespace rr { #ifndef SECCOMP_FILTER_FLAG_TSYNC #define SECCOMP_FILTER_FLAG_TSYNC 1 #endif +#ifndef SECCOMP_GET_ACTION_AVAIL +#define SECCOMP_GET_ACTION_AVAIL 2 +#endif +#ifndef SECCOMP_GET_NOTIF_SIZES +#define SECCOMP_GET_NOTIF_SIZES 3 +#endif #ifndef SYS_SECCOMP #define SYS_SECCOMP 1 diff --git a/src/record_syscall.cc b/src/record_syscall.cc index 1f827494559..fddab68cea6 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -4633,6 +4633,7 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, case Arch::seccomp: switch ((unsigned int)regs.arg1()) { case SECCOMP_SET_MODE_STRICT: + case SECCOMP_GET_ACTION_AVAIL: break; case SECCOMP_SET_MODE_FILTER: { // Prevent the actual seccomp call. We'll fix this up afterwards. @@ -4641,6 +4642,9 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, t->set_regs(r); break; } + case SECCOMP_GET_NOTIF_SIZES: + syscall_state.reg_parameter(3); + break; default: syscall_state.expect_errno = EINVAL; break; diff --git a/src/test/seccomp.c b/src/test/seccomp.c index 94130b1c4d0..d06809d4e51 100644 --- a/src/test/seccomp.c +++ b/src/test/seccomp.c @@ -131,12 +131,38 @@ static void* run_thread(__attribute__((unused)) void* p) { return NULL; } +static void test_get_action_avail(void) { + // `SECCOMP_RET_ALLOW` is available since the first version of `SECCOMP_GET_ACTION_AVAIL` + uint32_t action = SECCOMP_RET_ALLOW; + int ret = syscall(RR_seccomp, SECCOMP_GET_ACTION_AVAIL, 0, &action); + test_assert(ret == EINVAL || ret == 0); +} + +static void test_get_notif_sizes(void) { + struct { + uint16_t seccomp_notif; + uint16_t seccomp_notif_resp; + uint16_t seccomp_data; + } sizes; + int ret = syscall(RR_seccomp, SECCOMP_GET_NOTIF_SIZES, 0, &sizes); + test_assert(ret == EINVAL || ret == 0); + if (ret == 0) { + // These were the sizes when `SECCOMP_GET_NOTIF_SIZES` was first added. + test_assert(sizes.seccomp_notif >= 80); + test_assert(sizes.seccomp_notif_resp >= 24); + test_assert(sizes.seccomp_data >= 64); + } +} + int main(void) { struct sigaction sa; pthread_t thread; pthread_t w_thread; char ch; + test_get_action_avail(); + test_get_notif_sizes(); + test_assert(0 == pipe(pipe_fds)); sa.sa_sigaction = handler; diff --git a/src/test/util.h b/src/test/util.h index 39c80aa5496..895a4089d1f 100644 --- a/src/test/util.h +++ b/src/test/util.h @@ -395,6 +395,12 @@ inline static SyscallWrapper get_spurious_desched_syscall(void) { #ifndef SECCOMP_FILTER_FLAG_TSYNC #define SECCOMP_FILTER_FLAG_TSYNC 1 #endif +#ifndef SECCOMP_GET_ACTION_AVAIL +#define SECCOMP_GET_ACTION_AVAIL 2 +#endif +#ifndef SECCOMP_GET_NOTIF_SIZES +#define SECCOMP_GET_NOTIF_SIZES 3 +#endif /* Old systems don't have linux/kcmp.h */ #define RR_KCMP_FILE 0 From 7fd07892496fe743c4de2c41c9729854ed1c3b0c Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Sat, 26 Jun 2021 18:30:51 +1200 Subject: [PATCH 101/110] Make block-cloning syscallbuf optimization use privileged syscalls to evade tracee seccomp filters --- CMakeLists.txt | 1 + src/preload/syscallbuf.c | 104 +++++++++++++++++++++++++++---------- src/test/seccomp_cloning.c | 62 ++++++++++++++++++++++ 3 files changed, 141 insertions(+), 26 deletions(-) create mode 100644 src/test/seccomp_cloning.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 1554c112a0a..9060cff0171 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1034,6 +1034,7 @@ set(BASIC_TESTS scm_rights scratch_read seccomp + seccomp_cloning seccomp_clone_fail seccomp_desched seccomp_kill_exit diff --git a/src/preload/syscallbuf.c b/src/preload/syscallbuf.c index 7a2d2384b06..58919b35a2e 100644 --- a/src/preload/syscallbuf.c +++ b/src/preload/syscallbuf.c @@ -280,6 +280,17 @@ static long traced_raw_syscall(const struct syscall_info* call) { RR_PAGE_SYSCALL_TRACED, 0, 0); } +/** + * Make a raw traced syscall using the params in |call|, privileged. + */ +static long privileged_traced_raw_syscall(const struct syscall_info* call) { + /* FIXME: pass |call| to avoid pushing these on the stack + * again. */ + return _raw_syscall(call->no, call->args[0], call->args[1], call->args[2], + call->args[3], call->args[4], call->args[5], + RR_PAGE_SYSCALL_PRIVILEGED_TRACED, 0, 0); +} + #if defined(SYS_fcntl64) #define RR_FCNTL_SYSCALL SYS_fcntl64 #else @@ -443,9 +454,9 @@ untraced_replay_assist_syscall_base(int syscallno, long a0, long a1, long a2, untraced_replay_assist_syscall1(no, 0) #define privileged_untraced_syscall6(no, a0, a1, a2, a3, a4, a5) \ - _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, (uintptr_t)a3, \ - (uintptr_t)a4, (uintptr_t)a5, \ - RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY, 0, 0) + untraced_syscall_base(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, \ + (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5, \ + RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY) #define privileged_untraced_syscall5(no, a0, a1, a2, a3, a4) \ privileged_untraced_syscall6(no, a0, a1, a2, a3, a4, 0) #define privileged_untraced_syscall4(no, a0, a1, a2, a3) \ @@ -458,6 +469,22 @@ untraced_replay_assist_syscall_base(int syscallno, long a0, long a1, long a2, privileged_untraced_syscall2(no, a0, 0) #define privileged_untraced_syscall0(no) privileged_untraced_syscall1(no, 0) +#define privileged_unrecorded_syscall6(no, a0, a1, a2, a3, a4, a5) \ + _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, \ + (uintptr_t)a3, (uintptr_t)a4, (uintptr_t)a5, \ + RR_PAGE_SYSCALL_PRIVILEGED_UNTRACED_RECORDING_ONLY, 0, 0) +#define privileged_unrecorded_syscall5(no, a0, a1, a2, a3, a4) \ + privileged_unrecorded_syscall6(no, a0, a1, a2, a3, a4, 0) +#define privileged_unrecorded_syscall4(no, a0, a1, a2, a3) \ + privileged_unrecorded_syscall5(no, a0, a1, a2, a3, 0) +#define privileged_unrecorded_syscall3(no, a0, a1, a2) \ + privileged_unrecorded_syscall4(no, a0, a1, a2, 0) +#define privileged_unrecorded_syscall2(no, a0, a1) \ + privileged_unrecorded_syscall3(no, a0, a1, 0) +#define privileged_unrecorded_syscall1(no, a0) \ + privileged_unrecorded_syscall2(no, a0, 0) +#define privileged_unrecorded_syscall0(no) privileged_unrecorded_syscall1(no, 0) + #define replay_only_syscall6(no, a0, a1, a2, a3, a4, a5) \ _raw_syscall(no, (uintptr_t)a0, (uintptr_t)a1, (uintptr_t)a2, (uintptr_t)a3, \ (uintptr_t)a4, (uintptr_t)a5, \ @@ -473,7 +500,7 @@ untraced_replay_assist_syscall_base(int syscallno, long a0, long a1, long a2, #define replay_only_syscall0(no) replay_only_syscall1(no, 0) static int privileged_untraced_close(int fd) { - return privileged_untraced_syscall1(SYS_close, fd); + return privileged_unrecorded_syscall1(SYS_close, fd); } static int privileged_untraced_fcntl(int fd, int cmd, ...) { @@ -484,7 +511,7 @@ static int privileged_untraced_fcntl(int fd, int cmd, ...) { arg = va_arg(ap, void*); va_end(ap); - return privileged_untraced_syscall3(RR_FCNTL_SYSCALL, fd, cmd, arg); + return privileged_unrecorded_syscall3(RR_FCNTL_SYSCALL, fd, cmd, arg); } /** @@ -923,18 +950,18 @@ static void arm_desched_event(void) { * avoid! :) Although we don't allocate extra space for these * ioctl's, we do record that we called them; the replayer * knows how to skip over them. */ - if ((int)privileged_untraced_syscall3(SYS_ioctl, - thread_locals->desched_counter_fd, - PERF_EVENT_IOC_ENABLE, 0)) { + if ((int)privileged_unrecorded_syscall3(SYS_ioctl, + thread_locals->desched_counter_fd, + PERF_EVENT_IOC_ENABLE, 0)) { fatal("Failed to ENABLE counter"); } } static void disarm_desched_event(void) { /* See above. */ - if ((int)privileged_untraced_syscall3(SYS_ioctl, - thread_locals->desched_counter_fd, - PERF_EVENT_IOC_DISABLE, 0)) { + if ((int)privileged_unrecorded_syscall3(SYS_ioctl, + thread_locals->desched_counter_fd, + PERF_EVENT_IOC_DISABLE, 0)) { fatal("Failed to DISABLE counter"); } } @@ -1007,9 +1034,9 @@ static int start_commit_buffered_syscall(int syscallno, void* record_end, pid_t tid = 0; uid_t uid = 0; if (impose_spurious_desched) { - pid = privileged_untraced_syscall0(SYS_getpid); - tid = privileged_untraced_syscall0(SYS_gettid); - uid = privileged_untraced_syscall0(SYS_getuid); + pid = privileged_unrecorded_syscall0(SYS_getpid); + tid = privileged_unrecorded_syscall0(SYS_gettid); + uid = privileged_unrecorded_syscall0(SYS_getuid); } /* NB: the ordering of the next two statements is @@ -1042,9 +1069,9 @@ static int start_commit_buffered_syscall(int syscallno, void* record_end, si.si_fd = thread_locals->desched_counter_fd; si.si_pid = pid; si.si_uid = uid; - privileged_untraced_syscall4(SYS_rt_tgsigqueueinfo, pid, tid, - globals.desched_sig, - &si); + privileged_unrecorded_syscall4(SYS_rt_tgsigqueueinfo, pid, tid, + globals.desched_sig, + &si); } } return 1; @@ -1287,6 +1314,23 @@ static long sys_generic_nonblocking_fd(const struct syscall_info* call) { return commit_raw_syscall(call->no, ptr, ret); } +/** + * Call this for syscalls that have no memory effects, don't block, and + * have an fd as their first parameter, and should run privileged. + */ +static long privileged_sys_generic_nonblocking_fd(const struct syscall_info* call) { + int fd = call->args[0]; + void* ptr = prep_syscall_for_fd(fd); + long ret; + + if (!start_commit_buffered_syscall(call->no, ptr, WONT_BLOCK)) { + return privileged_traced_raw_syscall(call); + } + ret = privileged_untraced_syscall6(call->no, fd, call->args[1], call->args[2], + call->args[3], call->args[4], call->args[5]); + return commit_raw_syscall(call->no, ptr, ret); +} + static long sys_clock_gettime(const struct syscall_info* call) { const int syscallno = SYS_clock_gettime; __kernel_clockid_t clk_id = (__kernel_clockid_t)call->args[0]; @@ -2005,7 +2049,7 @@ static int supported_open(const char* file_name, int flags) { (flags & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT); } -static long sys_readlinkat(const struct syscall_info* call); +static long sys_readlinkat(const struct syscall_info* call, int privileged); static int check_file_open_ok(const struct syscall_info* call, int ret, int did_abort) { if (did_abort || ret < 0) { @@ -2016,7 +2060,7 @@ static int check_file_open_ok(const struct syscall_info* call, int ret, int did_ char link[PATH_MAX]; struct syscall_info readlink_call = { SYS_readlinkat, { -1, (long)buf, (long)link, sizeof(link), 0, 0 } }; - long link_ret = sys_readlinkat(&readlink_call); + long link_ret = sys_readlinkat(&readlink_call, 1); if (link_ret >= 0 && link_ret < (ssize_t)sizeof(link)) { link[link_ret] = 0; if (allow_buffered_open(link)) { @@ -2272,7 +2316,7 @@ static long sys_read(const struct syscall_info* call) { sizeof(void*) == 8 && !(count & 4095)) { struct syscall_info lseek_call = { SYS_lseek, { fd, 0, SEEK_CUR, 0, 0, 0 } }; - off_t lseek_ret = sys_generic_nonblocking_fd(&lseek_call); + off_t lseek_ret = privileged_sys_generic_nonblocking_fd(&lseek_call); if (lseek_ret >= 0 && !(lseek_ret & 4095)) { struct btrfs_ioctl_clone_range_args ioctl_args; int ioctl_ret; @@ -2291,11 +2335,11 @@ static long sys_read(const struct syscall_info* call) { { thread_locals->cloned_file_data_fd, BTRFS_IOC_CLONE_RANGE, (long)&ioctl_args, 0, 0, 0 } }; - ioctl_ret = traced_raw_syscall(&ioctl_call); + ioctl_ret = privileged_traced_raw_syscall(&ioctl_call); } else { ioctl_ret = - untraced_syscall3(SYS_ioctl, thread_locals->cloned_file_data_fd, - BTRFS_IOC_CLONE_RANGE, &ioctl_args); + privileged_untraced_syscall3(SYS_ioctl, thread_locals->cloned_file_data_fd, + BTRFS_IOC_CLONE_RANGE, &ioctl_args); ioctl_ret = commit_raw_syscall(SYS_ioctl, ioctl_ptr, ioctl_ret); } @@ -2408,7 +2452,7 @@ static long sys_readlink(const struct syscall_info* call) { } #endif -static long sys_readlinkat(const struct syscall_info* call) { +static long sys_readlinkat(const struct syscall_info* call, int privileged) { const int syscallno = SYS_readlinkat; int dirfd = call->args[0]; const char* path = (const char*)call->args[1]; @@ -2426,10 +2470,17 @@ static long sys_readlinkat(const struct syscall_info* call) { ptr += bufsiz; } if (!start_commit_buffered_syscall(syscallno, ptr, WONT_BLOCK)) { + if (privileged) { + return privileged_traced_raw_syscall(call); + } return traced_raw_syscall(call); } - ret = untraced_syscall4(syscallno, dirfd, path, buf2, bufsiz); + if (privileged) { + ret = privileged_untraced_syscall4(syscallno, dirfd, path, buf2, bufsiz); + } else { + ret = untraced_syscall4(syscallno, dirfd, path, buf2, bufsiz); + } ptr = copy_output_buffer(ret, ptr, buf, buf2); return commit_raw_syscall(syscallno, ptr, ret); } @@ -3291,7 +3342,8 @@ case SYS_epoll_pwait: #if defined(SYS_readlink) CASE(readlink); #endif - CASE(readlinkat); + case SYS_readlinkat: + return sys_readlinkat(call, 0); #if defined(SYS_recvfrom) CASE(recvfrom); #endif diff --git a/src/test/seccomp_cloning.c b/src/test/seccomp_cloning.c new file mode 100644 index 00000000000..658bb0a1989 --- /dev/null +++ b/src/test/seccomp_cloning.c @@ -0,0 +1,62 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +#define BUF_SIZE 65536 + +static void install_filter(void) { + struct sock_filter filter[] = { + /* Load system call number from 'seccomp_data' buffer into + accumulator */ + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr)), + /* Jump forward 1 instruction if system call number + is not SYS_read */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_read, 0, 1), + /* Allow syscall */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /* Jump forward 1 instruction if system call number + is not SYS_write */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_write, 0, 1), + /* Allow syscall */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /* Jump forward 1 instruction if system call number + is not SYS_exit_group */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, SYS_exit_group, 0, 1), + /* Allow syscall */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + /* Kill process */ + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog = { + .len = (unsigned short)(sizeof(filter) / sizeof(filter[0])), + .filter = filter, + }; + int ret; + + ret = syscall(RR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &prog); + if (ret == -1 && errno == ENOSYS) { + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + } + test_assert(ret == 0); +} + +int main(void) { + char buf[BUF_SIZE]; + int fd = open("tmp.bin", O_RDWR | O_CREAT | O_EXCL, 0600); + test_assert(fd >= 0); + unlink("tmp.bin"); + + memset(buf, 1, sizeof(buf)); + + test_assert(write(fd, buf, BUF_SIZE) == BUF_SIZE); + test_assert(0 == lseek(fd, 0, SEEK_SET)); + + test_assert(0 == prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + install_filter(); + + test_assert(read(fd, buf, BUF_SIZE) == BUF_SIZE); + + atomic_puts("EXIT-SUCCESS"); + syscall(SYS_exit_group, 0); + return 0; +} From 67520320b5592aed97608f944368963f526c5c09 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 28 Jun 2021 10:21:12 -0700 Subject: [PATCH 102/110] Move explicit_checkpoints to x86/ because it uses rdtsc. --- CMakeLists.txt | 2 +- src/test/{ => x86}/explicit_checkpoints.c | 0 src/test/{ => x86}/explicit_checkpoints.py | 0 src/test/{ => x86}/explicit_checkpoints.run | 0 4 files changed, 1 insertion(+), 1 deletion(-) rename src/test/{ => x86}/explicit_checkpoints.c (100%) rename src/test/{ => x86}/explicit_checkpoints.py (100%) rename src/test/{ => x86}/explicit_checkpoints.run (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9060cff0171..10cdd661f2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1228,7 +1228,7 @@ set(TESTS_WITH_PROGRAM exit_group exit_race exit_status - explicit_checkpoints + x86/explicit_checkpoints fork_stress fork_syscalls function_calls diff --git a/src/test/explicit_checkpoints.c b/src/test/x86/explicit_checkpoints.c similarity index 100% rename from src/test/explicit_checkpoints.c rename to src/test/x86/explicit_checkpoints.c diff --git a/src/test/explicit_checkpoints.py b/src/test/x86/explicit_checkpoints.py similarity index 100% rename from src/test/explicit_checkpoints.py rename to src/test/x86/explicit_checkpoints.py diff --git a/src/test/explicit_checkpoints.run b/src/test/x86/explicit_checkpoints.run similarity index 100% rename from src/test/explicit_checkpoints.run rename to src/test/x86/explicit_checkpoints.run From 2099ff3f8bc06434cc9cf2d73d3ae24b562dfae2 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 28 Jun 2021 11:27:35 -0700 Subject: [PATCH 103/110] implement my_write for gdb_bogus_breakpoint --- src/test/gdb_bogus_breakpoint.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/test/gdb_bogus_breakpoint.c b/src/test/gdb_bogus_breakpoint.c index 4e2f0ac7f6e..e14199366c0 100644 --- a/src/test/gdb_bogus_breakpoint.c +++ b/src/test/gdb_bogus_breakpoint.c @@ -5,11 +5,11 @@ static char ch = 'E'; static char my_write(int fd, void* buf, size_t size) { - long ret; /* Do a write syscall where the address of the buffer is at the top of stack during the syscall. This may trigger gdb to try to set a breakpoint in that buffer. */ #ifdef __x86_64__ + long ret; asm("push %5\n\t" "syscall\n\t" "nop\n\t" @@ -20,6 +20,7 @@ static char my_write(int fd, void* buf, size_t size) { : "=a"(ret) : "a"(SYS_write), "D"(fd), "S"(buf), "d"(size), "r"(&ch)); #elif __i386__ + long ret; asm("push %5\n\t" "int $0x80\n\t" "nop\n\t" @@ -29,6 +30,22 @@ static char my_write(int fd, void* buf, size_t size) { "mov (%5),%0\n\t" : "=a"(ret) : "a"(SYS_write), "b"(fd), "c"(buf), "d"(size), "r"(&ch)); +#elif __aarch64__ + register long x0 __asm("x0") = fd; + register long x1 __asm("x1") = (uintptr_t)buf; + register long x2 __asm("x2") = size; + register long x7 __asm("x7") = (uintptr_t)&ch; + register long x8 __asm("x8") = SYS_write; + asm("stp x1, x7, [sp, #-16]!\n\t" + "svc #0\n\t" + "nop\n\t" + "nop\n\t" + "nop\n\t" + "ldp x1, x7, [sp], #16\n\t" + "ldr x0, [x7]\n\t" + : "+r"(x0) + : "r"(x1), "r"(x2), "r"(x8), "r"(x7)); + long ret = x0; #else #error Unknown architecture #endif From 26fccb30272c13963ba26e8b6420b15379b26f1d Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 28 Jun 2021 11:28:11 -0700 Subject: [PATCH 104/110] Stub out the last couple things that don't work on AArch64. --- src/AutoRemoteSyscalls.cc | 5 +++++ src/preload/rr_vdso.S | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/AutoRemoteSyscalls.cc b/src/AutoRemoteSyscalls.cc index 865c082e072..4a62e09ad93 100644 --- a/src/AutoRemoteSyscalls.cc +++ b/src/AutoRemoteSyscalls.cc @@ -113,6 +113,11 @@ AutoRemoteSyscalls::AutoRemoteSyscalls(Task* t, } void AutoRemoteSyscalls::setup_path(bool enable_singlestep_path) { +#if defined(__aarch64__) + // XXXkhuey this fast path doesn't work on AArch64 yet, go slow instead + enable_singlestep_path = false; +#endif + if (!replaced_bytes.empty()) { // XXX what to do here to clean up if the task died unexpectedly? t->write_mem(remote_ptr(initial_regs.ip().to_data_ptr()), diff --git a/src/preload/rr_vdso.S b/src/preload/rr_vdso.S index 5953c616e08..b673cb2e98a 100644 --- a/src/preload/rr_vdso.S +++ b/src/preload/rr_vdso.S @@ -95,6 +95,8 @@ WEAK_ALIAS(gettimeofday,__vdso_gettimeofday) // Dummy versioned symbol to trigger presence of DT_VERDEF/DT_VERSYM/DT_VERDEFNUM: .symver __vdso_time,__vdso_time@VER_1 +#elif defined(__aarch64__) +// XXXkhuey there should probably be something here #else #error "VDSO Hooks not defined for this platform" From 713858c6b8c3d75bb75ae8d5557bb5a21c509e19 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 28 Jun 2021 12:34:59 -0700 Subject: [PATCH 105/110] Force use of the LSE-enabled ld. On Ubuntu 20.04, the libc6-lse package provides libc, pthreads, etc with LSE enabled. There's also an LSE-enabled ld.so, but it does not replace the default dynamic linker on the system, so every program on the system will use a non-LSE enabled ld.so. ld.so will use atomics in the presence of threads, so this precludes recording any program that uses threads. By forcing rr's test programs to use the LSE-enabled version of ld.so, our test suite can run successfully. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 10cdd661f2b..06665e54f45 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,7 @@ set(supports32bit true) set(x86ish false) if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") set(supports32bit false) - set(FLAGS_COMMON "${FLAGS_COMMON} -march=armv8.3-a") + set(FLAGS_COMMON "${FLAGS_COMMON} -march=armv8.3-a -Wl,-dynamic-linker=/lib/aarch64-linux-gnu/atomics/ld-linux-aarch64.so.1") else() set(x86ish true) set(FLAGS_COMMON "${FLAGS_COMMON} -msse2 -D__MMX__ -D__SSE__ -D__SSE2__") From bcb7ec1e7087ba9370b413573be6bd0d40628dbb Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 29 Jun 2021 13:13:30 +1200 Subject: [PATCH 106/110] Keep high resource limits during replay --- CMakeLists.txt | 1 + src/Task.cc | 6 ++++-- src/test/fd_limit.c | 40 ++++++++++++++++++++++++++++++++++++++++ src/test/fd_limit.run | 3 +++ 4 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 src/test/fd_limit.c create mode 100644 src/test/fd_limit.run diff --git a/CMakeLists.txt b/CMakeLists.txt index 06665e54f45..4497b735583 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1229,6 +1229,7 @@ set(TESTS_WITH_PROGRAM exit_race exit_status x86/explicit_checkpoints + fd_limit fork_stress fork_syscalls function_calls diff --git a/src/Task.cc b/src/Task.cc index cc65b490e77..47a7f396cc1 100644 --- a/src/Task.cc +++ b/src/Task.cc @@ -3208,8 +3208,6 @@ static void set_up_process(Session& session, const ScopedFd& err_fd, /* TODO tracees can probably undo some of the setup below * ... */ - restore_initial_resource_limits(); - /* CLOEXEC so that the original fd here will be closed by the exec that's * about to happen. */ @@ -3245,6 +3243,10 @@ static void set_up_process(Session& session, const ScopedFd& err_fd, // signals being sent to these processes by the terminal --- in particular // SIGTSTP/SIGINT/SIGWINCH. setsid(); + // Preserve increased resource limits, in case the tracee + // increased its limits and we need high limits to apply during replay. + } else { + restore_initial_resource_limits(); } /* Do any architecture specific setup, such as disabling non-deterministic diff --git a/src/test/fd_limit.c b/src/test/fd_limit.c new file mode 100644 index 00000000000..c7e55e1aa7a --- /dev/null +++ b/src/test/fd_limit.c @@ -0,0 +1,40 @@ +/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */ + +#include "util.h" + +static void* do_thread(__attribute__((unused)) void* p) { + atomic_puts("EXIT-SUCCESS"); + return NULL; +} + +int main(void) { + pthread_t thread; + struct rlimit limit; + int ret = getrlimit(RLIMIT_NOFILE, &limit); + int new_fd; + rlim_t initial_limit = limit.rlim_cur; + test_assert(ret >= 0); + + if (initial_limit + 10 > limit.rlim_max) { + atomic_puts("Current soft limit cannot be increased enough, skipping test"); + atomic_puts("EXIT-SUCCESS"); + return 0; + } + + /* Increase soft limit. */ + limit.rlim_cur += 10; + ret = setrlimit(RLIMIT_NOFILE, &limit); + test_assert(ret >= 0); + + /* Consume file descriptors until we've allocated all previously available descriptors (plus one). */ + do { + new_fd = open("/dev/null", O_RDONLY); + test_assert(new_fd >= 0); + } while (new_fd < (int)initial_limit); + + /* This will allocate new fds for thread stack and syscallbuf stuff */ + pthread_create(&thread, NULL, do_thread, NULL); + pthread_join(thread, NULL); + + return 0; +} diff --git a/src/test/fd_limit.run b/src/test/fd_limit.run new file mode 100644 index 00000000000..d52f1c922d7 --- /dev/null +++ b/src/test/fd_limit.run @@ -0,0 +1,3 @@ +source `dirname $0`/util.sh +ulimit -S -n 1024 +compare_test EXIT-SUCCESS From 0a26c2a1330142b891d943168c0dc562d065f2c6 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 29 Jun 2021 13:56:46 +1200 Subject: [PATCH 107/110] Suppress compiler warnings by checking for failed patch matches --- src/Monkeypatcher.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Monkeypatcher.cc b/src/Monkeypatcher.cc index e3e57d53e35..7d49f5730be 100644 --- a/src/Monkeypatcher.cc +++ b/src/Monkeypatcher.cc @@ -361,24 +361,30 @@ static bool patch_syscall_with_hook(Monkeypatcher& patcher, RecordTask* t, } template -static void match_extended_jump_patch(uint8_t patch[], +static bool match_extended_jump_patch(uint8_t patch[], uint64_t *return_addr); template <> -void match_extended_jump_patch( +bool match_extended_jump_patch( uint8_t patch[], uint64_t *return_addr) { uint32_t return_addr_lo, return_addr_hi; uint64_t jmp_target; - X64SyscallStubExtendedJump::match(patch, &return_addr_lo, &return_addr_hi, &jmp_target); + if (!X64SyscallStubExtendedJump::match(patch, &return_addr_lo, &return_addr_hi, &jmp_target)) { + return false; + } *return_addr = return_addr_lo | (((uint64_t)return_addr_hi) << 32); + return true; } template <> -void match_extended_jump_patch( +bool match_extended_jump_patch( uint8_t patch[], uint64_t *return_addr) { uint32_t return_addr_32, jmp_target_relative; - X86SyscallStubExtendedJump::match(patch, &return_addr_32, &jmp_target_relative); + if (!X86SyscallStubExtendedJump::match(patch, &return_addr_32, &jmp_target_relative)) { + return false; + } *return_addr = return_addr_32; + return true; } template @@ -412,7 +418,9 @@ static void unpatch_extended_jumps(Monkeypatcher& patcher, uint8_t bytes[ExtendedJumpPatch::size]; t->read_bytes_helper(patch.first, sizeof(bytes), bytes); uint64_t return_addr; - match_extended_jump_patch(bytes, &return_addr); + if (!match_extended_jump_patch(bytes, &return_addr)) { + ASSERT(t, false) << "Failed to match extended jump patch at " << patch.first; + } std::vector syscall = rr::syscall_instruction(t->arch()); From 782537e770f31b32272b4d1d1fa4c2ca790cf220 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Tue, 29 Jun 2021 14:01:00 +1200 Subject: [PATCH 108/110] Don't verify seccomp_notif_sizes --- src/kernel_abi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/kernel_abi.h b/src/kernel_abi.h index 90d54163f23..4776a5e97d7 100644 --- a/src/kernel_abi.h +++ b/src/kernel_abi.h @@ -628,7 +628,8 @@ struct BaseArch : public wordsize, uint16_t seccomp_notif_resp; uint16_t seccomp_data; }; - RR_VERIFY_TYPE(seccomp_notif_sizes); + // seccomp_notif_sizes is not present in older kernels + // RR_VERIFY_TYPE(seccomp_notif_sizes); struct serial_struct { signed_int type; From bcb7b3043f4b05624806442360980a073ab0ca1a Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 29 Jun 2021 11:54:59 -0700 Subject: [PATCH 109/110] Make error message more informative. --- src/ReplaySession.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ReplaySession.cc b/src/ReplaySession.cc index 7a5fc184637..794a4acc6d7 100644 --- a/src/ReplaySession.cc +++ b/src/ReplaySession.cc @@ -1334,7 +1334,8 @@ Completion ReplaySession::flush_syscallbuf(ReplayTask* t, Registers r = t->regs(); ASSERT(t, t->stop_sig() == SIGSEGV && r.ip() == t->vm()->do_breakpoint_fault_addr()) - << "Replay got unexpected signal (or none) " << t->stop_sig(); + << "Replay got unexpected signal (or none) " << t->stop_sig() + << " ip " << r.ip() << " breakpoint_fault_addr " << t->vm()->do_breakpoint_fault_addr(); r.set_ip(r.ip().increment_by_movrm_insn_length(t->arch())); t->set_regs(r); From 7797753637f3e800213c877bd7efec40bac1fe49 Mon Sep 17 00:00:00 2001 From: Dariusz Sosnowski Date: Tue, 6 Oct 2020 21:15:50 +0200 Subject: [PATCH 110/110] Adds support for RDMA_VERBS_IOCTL requests --- src/kernel_abi.h | 33 +++++++++++++++++++ src/record_syscall.cc | 73 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/src/kernel_abi.h b/src/kernel_abi.h index 4776a5e97d7..fb05fee38db 100644 --- a/src/kernel_abi.h +++ b/src/kernel_abi.h @@ -1801,6 +1801,39 @@ struct BaseArch : public wordsize, __u32 reserved[4]; }; RR_VERIFY_TYPE(fb_var_screeninfo); + + struct ib_uverbs_attr { + uint16_t attr_id; /* command specific type attribute */ + uint16_t len; /* only for pointers and IDRs array */ + uint16_t flags; /* combination of UVERBS_ATTR_F_XXXX */ + union { + struct { + uint8_t elem_id; + uint8_t reserved; + } enum_data; + uint16_t reserved; + } attr_data; + union { + /* + * ptr to command, inline data, idr/fd or + * ptr to __u32 array of IDRs + */ + uint64_t __attribute__((aligned(8))) data; + /* Used by FD_IN and FD_OUT */ + int64_t data_s64; + }; + }; + + struct ib_uverbs_ioctl_hdr { + uint16_t length; + uint16_t object_id; + uint16_t method_id; + uint16_t num_attrs; + uint64_t __attribute__((aligned(8))) reserved1; + uint32_t driver_id; + uint32_t reserved2; + struct ib_uverbs_attr attrs[0]; + }; }; struct X64Arch : public BaseArch { diff --git a/src/record_syscall.cc b/src/record_syscall.cc index fddab68cea6..6c00b820bb7 100644 --- a/src/record_syscall.cc +++ b/src/record_syscall.cc @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include #include @@ -1601,6 +1603,73 @@ template void prepare_ethtool_ioctl(RecordTask* t, TaskSyscallSt syscall_state.after_syscall_action(record_page_below_stack_ptr); } +template +static void prepare_rdma_verbs_ioctl(RecordTask* t, + TaskSyscallState& syscall_state) +{ + remote_ptr arg_p = syscall_state.syscall_entry_registers.arg(3); + auto ib_uverbs_hdr_p = arg_p.cast(); + if (ib_uverbs_hdr_p.is_null()) { + // TODO(sodar): Logging. + LOG(warn) << "ib_uverbs_hdr_p is NULL"; + syscall_state.expect_errno = EINVAL; + return; + } + + bool ok = true; + auto ib_uverbs_hdr = t->read_mem(ib_uverbs_hdr_p, &ok); + if (!ok) { + // TODO(sodar): Logging. + LOG(fatal) << "failed to read ib_uverbs_ioctl_hdr contents"; + syscall_state.expect_errno = EFAULT; + return; + } + + { + auto size = ib_uverbs_hdr.length; + auto p = syscall_state.reg_parameter(3, size, IN_OUT); + if (p.is_null()) { + // TODO(sodar): Logging. + syscall_state.expect_errno = EINVAL; + return; + } + } + + auto attr_p = REMOTE_PTR_FIELD(ib_uverbs_hdr_p, attrs[0]); + for (unsigned int i = 0; i < ib_uverbs_hdr.num_attrs; ++i, ++attr_p) { + auto attr = t->read_mem(attr_p, &ok); + ASSERT(t, ok) << "failed to read attrs[" << i << "]"; + + if (ib_uverbs_hdr.object_id == UVERBS_OBJECT_DEVICE + && ib_uverbs_hdr.method_id == UVERBS_METHOD_INVOKE_WRITE) { + switch (attr.attr_id) { + case UVERBS_ATTR_CORE_IN: + case UVERBS_ATTR_UHW_IN: { + if (attr.len > sizeof(uint64_t)) { + auto data_p = REMOTE_PTR_FIELD(attr_p, data); + syscall_state.mem_ptr_parameter(data_p, attr.len, IN_OUT); + } + break; + } + case UVERBS_ATTR_CORE_OUT: + case UVERBS_ATTR_UHW_OUT: { + auto data_p = REMOTE_PTR_FIELD(attr_p, data); + syscall_state.mem_ptr_parameter(data_p, attr.len, IN_OUT); + break; + } + case UVERBS_ATTR_WRITE_CMD: { + // Should be inside attr struct. + break; + } + default: + ASSERT(t, false) << "unknown attr_id for INVOKE_WRITE verb"; + } + } + } + + return; +} + template static Switchable prepare_ioctl(RecordTask* t, TaskSyscallState& syscall_state) { @@ -1798,6 +1867,10 @@ static Switchable prepare_ioctl(RecordTask* t, case FBIOGET_VSCREENINFO: syscall_state.reg_parameter(3); return PREVENT_SWITCH; + + case RDMA_VERBS_IOCTL: + prepare_rdma_verbs_ioctl(t, syscall_state); + return PREVENT_SWITCH; } /* In ioctl language, "_IOC_READ" means "outparam". Both