diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d99b6c5..8fdba41 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,6 +6,8 @@ on: pull_request: branches: [master] +permissions: read-all + jobs: lint: runs-on: ubuntu-latest diff --git a/speakeasy/profiler.py b/speakeasy/profiler.py index 5f800f6..7244cbb 100644 --- a/speakeasy/profiler.py +++ b/speakeasy/profiler.py @@ -122,6 +122,7 @@ def __init__(self): self.coverage: set[int] = set() self.memory_regions: list[dict[str, Any]] = [] self.loaded_modules: list[dict[str, Any]] = [] + self.init_regs: dict[int, int] = {} def get_api_count(self): """ diff --git a/speakeasy/version.py b/speakeasy/version.py index 4b3c3ea..9b8d3ee 100644 --- a/speakeasy/version.py +++ b/speakeasy/version.py @@ -1 +1 @@ -__version__ = "2.0.0a1" +__version__ = "2.0.0b1" diff --git a/speakeasy/windows/win32.py b/speakeasy/windows/win32.py index 3539d70..257a3ef 100644 --- a/speakeasy/windows/win32.py +++ b/speakeasy/windows/win32.py @@ -222,7 +222,8 @@ def prepare_module_for_emulation(self, module, all_entrypoints): self.stop() raise Win32EmuError("Module not found") - # Check if any TLS callbacks exist, these run before the module's entry point + runs = [] + tls = module.get_tls_callbacks() for i, cb_addr in enumerate(tls): base = module.base @@ -232,6 +233,7 @@ def prepare_module_for_emulation(self, module, all_entrypoints): run.type = f"tls_callback_{i}" run.args = [base, DLL_PROCESS_ATTACH, 0] self.add_run(run) + runs.append(run) ep = module.base + module.ep @@ -241,20 +243,14 @@ def prepare_module_for_emulation(self, module, all_entrypoints): if not module.is_exe(): run.args = [module.base, DLL_PROCESS_ATTACH, 0] run.type = "dll_entry.DLL_PROCESS_ATTACH" - container = self.init_container_process() - if container: - self.processes.append(container) - self.curr_process = container else: run.type = "module_entry" run.args = [self.mem_map(8, tag=f"emu.module_arg_{i}") for i in range(4)] self.add_run(run) + runs.append(run) if all_entrypoints: - # Only emulate a subset of all the exported functions - # There are some modules (such as the windows kernel) with - # thousands of exports exports = [k for k in module.get_exports()[:MAX_EXPORTS_TO_EMULATE]] if exports: @@ -275,14 +271,11 @@ def prepare_module_for_emulation(self, module, all_entrypoints): argc, argv = self.build_service_main_args("IPRIP", char_width=char_width) run.args = [argc, argv] else: - # Here we set dummy args to pass into the export function run.args = args - # Store these runs and only queue them before the unload - # routine this is because some exports may not be ready to - # be called yet self.add_run(run) + runs.append(run) - return + return runs def run_module(self, module, all_entrypoints=False, emulate_children=False): """ @@ -291,59 +284,57 @@ def run_module(self, module, all_entrypoints=False, emulate_children=False): Arguments: module: Module to emulate """ - self.prepare_module_for_emulation(module, all_entrypoints) + runs = self.prepare_module_for_emulation(module, all_entrypoints) - # Create an empty process object for the module if none is - # supplied, only do this for the main module - if len(self.processes) == 0: + if not module.is_exe(): + container = self.init_container_process() + if container: + p = container + else: + p = objman.Process(self, path=module.emu_path, base=module.base, pe=module, cmdline=self.command_line) + else: p = objman.Process(self, path=module.emu_path, base=module.base, pe=module, cmdline=self.command_line) - self.curr_process = p - self.om.objects.update({p.address: p}) # type: ignore[union-attr] - mm = self.get_address_map(module.base) - if mm: - mm.process = self.curr_process - t = objman.Thread(self, stack_base=self.stack_base, stack_commit=module.stack_commit) + self.processes.append(p) + self.om.objects.update({p.address: p}) # type: ignore[union-attr] + mm = self.get_address_map(module.base) + if mm: + mm.process = p + t = objman.Thread(self, stack_base=self.stack_base, stack_commit=module.stack_commit) self.om.objects.update({t.address: t}) # type: ignore[union-attr] - self.curr_process.threads.append(t) # type: ignore[union-attr] - self.curr_thread = t - - if self.run_queue: - self.run_queue[0].thread = t + t.process = p + p.threads.append(t) - peb = self.alloc_peb(self.curr_process) + for r in runs: + r.process_context = p + r.thread = t - # Set the TEB - self.init_teb(t, peb) - - # Begin emulation of main module self.start() if not emulate_children or len(self.child_processes) == 0: return - # Emulate any child processes while len(self.child_processes) > 0: child = self.child_processes.pop(0) child.pe = self.load_module(data=child.pe_data) - self.prepare_module_for_emulation(child.pe, all_entrypoints) + child_runs = self.prepare_module_for_emulation(child.pe, all_entrypoints) self.command_line = child.cmdline + child.base = child.pe.base - self.curr_process = child - self.curr_process.base = child.pe.base - self.curr_thread = child.threads[0] + self.processes.append(child) - self.om.objects.update({self.curr_thread.address: self.curr_thread}) # type: ignore[union-attr] + child_thread = child.threads[0] + self.om.objects.update({child_thread.address: child_thread}) # type: ignore[union-attr] - # PEB and TEB will be initialized when the next run happens + for r in child_runs: + r.process_context = child + r.thread = child_thread self.start() - return - def _init_name(self, path, data=None): if not data: self.file_name = os.path.basename(path) @@ -419,8 +410,7 @@ def run_shellcode(self, sc_addr, stack_commit=0x4000, offset=0): if not target: raise Win32EmuError("Invalid shellcode address") - self.stack_base, stack_addr = self.alloc_stack(stack_commit) - self.set_func_args(self.stack_base, self.return_hook, 0x7000) + self.stack_base, _ = self.alloc_stack(stack_commit) run = Run() run.type = "shellcode" @@ -428,36 +418,29 @@ def run_shellcode(self, sc_addr, stack_commit=0x4000, offset=0): run.instr_cnt = 0 args = [self.mem_map(1024, tag=f"emu.shellcode_arg_{i}", base=0x41420000 + i) for i in range(4)] run.args = args + run.init_regs = {_arch.X86_REG_ECX: 1024} - self.reg_write(_arch.X86_REG_ECX, 1024) - - self.add_run(run) - - # Create an empty process object for the shellcode if none is - # supplied container = self.init_container_process() if container: self.processes.append(container) - self.curr_process = container + p = container else: p = objman.Process(self) self.processes.append(p) - self.curr_process = p + self.om.objects.update({p.address: p}) # type: ignore[union-attr] mm = self.get_address_map(sc_addr) if mm: - mm.process = self.curr_process + mm.process = p t = objman.Thread(self, stack_base=self.stack_base, stack_commit=stack_commit) self.om.objects.update({t.address: t}) # type: ignore[union-attr] - self.curr_process.threads.append(t) - - self.curr_thread = t - - peb = self.alloc_peb(self.curr_process) + t.process = p + p.threads.append(t) - # Set the TEB - self.init_teb(t, peb) + run.process_context = p + run.thread = t + self.add_run(run) self.start() @@ -575,6 +558,38 @@ def init_container_process(self): return proc return None + def _create_default_process(self, run): + mod = self.get_module_from_addr(run.start_addr) + + if mod and getattr(mod, "is_exe", lambda: False)(): + p = objman.Process( + self, + path=getattr(mod, "emu_path", ""), + base=getattr(mod, "base", 0), + pe=mod, + cmdline=self.command_line, + ) + else: + container = self.init_container_process() + if container: + p = container + elif mod: + p = objman.Process( + self, + path=getattr(mod, "emu_path", ""), + base=getattr(mod, "base", 0), + pe=mod, + ) + else: + p = objman.Process(self) + + self.processes.append(p) + self.om.objects.update({p.address: p}) # type: ignore[union-attr] + mm = self.get_address_map(run.start_addr) + if mm: + mm.process = p + return p + def _init_user_modules_from_config(self): proc_mod = None for p in self.config.processes: diff --git a/speakeasy/windows/winemu.py b/speakeasy/windows/winemu.py index ea8f114..eac357d 100644 --- a/speakeasy/windows/winemu.py +++ b/speakeasy/windows/winemu.py @@ -403,7 +403,6 @@ def call(self, addr, params=[]): """ Start emulating at the specified address """ - self.reset_stack(self.stack_base) run = Run() run.type = f"call_0x{addr:x}" run.start_addr = addr @@ -415,9 +414,50 @@ def call(self, addr, params=[]): else: self.add_run(run) + def _resolve_run_process(self, run): + if run.process_context: + return run.process_context + if run.thread and getattr(run.thread, "process", None): + return run.thread.process + if self.curr_process: + return self.curr_process + return self._create_default_process(run) + + def _create_default_process(self, run): + p = objman.Process(self) + self.processes.append(p) + self.om.objects.update({p.address: p}) # type: ignore[union-attr] + return p + + def _resolve_run_thread(self, run, proc): + if run.thread: + tp = getattr(run.thread, "process", None) + if tp is None: + run.thread.process = proc + if run.thread not in proc.threads: + proc.threads.append(run.thread) + elif tp is not proc: + raise WindowsEmuError( + f"Run thread is bound to a different process " + f"(thread.process={tp!r}, resolved={proc!r})" + ) + return run.thread + if self.kernel_mode: + return None + thread = objman.Thread(self, stack_base=self.stack_base) + self.om.objects.update({thread.address: thread}) # type: ignore[union-attr] + thread.process = proc + proc.threads.append(thread) + run.thread = thread + return thread + def _prepare_run_context(self, run): """ Prepare CPU and memory state for the given run without starting emulation. + + This is the single canonical path for process/thread/PEB/TEB/TLS + activation. All run types (call, module entry, shellcode, thread) + converge here. """ logger.info("* exec: %s", run.type) @@ -430,30 +470,24 @@ def _prepare_run_context(self, run): stk_ptr = self.get_stack_ptr() self.set_func_args(stk_ptr, self.return_hook, *run.args) + for reg, val in run.init_regs.items(): + self.reg_write(reg, val) stk_ptr = self.get_stack_ptr() stk_map = self.get_address_map(stk_ptr) self.curr_run.stack = MemAccess(base=stk_map.base, size=stk_map.size) - # Set the process context if possible - if run.process_context: - # Init a new peb if the process context changed: - if run.process_context != self.get_current_process(): - self.alloc_peb(run.process_context) - self.set_current_process(run.process_context) - if run.thread: - self.set_current_thread(run.thread) - elif not self.kernel_mode: - thread = objman.Thread(self, stack_base=self.stack_base) - self.om.objects.update({thread.address: thread}) - if self.curr_process: - thread.process = self.curr_process - self.curr_process.threads.append(thread) - run.thread = thread + proc = self._resolve_run_process(run) + run.process_context = proc + self.set_current_process(proc) + self.alloc_peb(proc) + + thread = self._resolve_run_thread(run, proc) + if thread: self.set_current_thread(thread) + run.thread = thread if not self.kernel_mode: - # Reset the TIB data thread = self.get_current_thread() if thread: self.init_teb(thread, self.curr_process.peb) # type: ignore[union-attr] @@ -517,8 +551,7 @@ def start(self, addr=None, size=None): self.set_hooks() self._set_emu_hooks() - # Initialize run context/register state before exposing the target to GDB, - # so the first stop reports a meaningful PC/SP/etc. + self.reset_stack(self.stack_base) self._prepare_run_context(run) if self.gdb_port is not None: diff --git a/tests/test_call_api.py b/tests/test_call_api.py new file mode 100644 index 0000000..28b1838 --- /dev/null +++ b/tests/test_call_api.py @@ -0,0 +1,250 @@ +import pytest + +from speakeasy import Speakeasy +from speakeasy.errors import WindowsEmuError +from speakeasy.profiler import Run +from speakeasy.windows import objman + +DLL_BINS = ["dll_test_x86.dll.xz", "dll_test_x64.dll.xz"] +EXE_BINS = ["argv_test_x86.exe.xz", "argv_test_x64.exe.xz"] + + +@pytest.mark.parametrize("bin_file", DLL_BINS) +def test_call_without_run_module(config, load_test_bin, bin_file): + """call() should work without run_module() being called first (GH-21).""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + se.call(mod.base + mod.ep, [mod.base, 1, 0]) + finally: + se.shutdown() + + +@pytest.mark.parametrize("bin_file", DLL_BINS) +def test_call_after_run_module(config, load_test_bin, bin_file): + """call() should still work after run_module() has set up context.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + se.run_module(mod) + se.call(mod.base + mod.ep, [mod.base, 1, 0]) + finally: + se.shutdown() + + +@pytest.mark.parametrize("bin_file", DLL_BINS) +def test_call_queued_during_run(config, load_test_bin, bin_file): + """call() queued while run_queue is non-empty defers context to execution time.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + ep = mod.base + mod.ep + se.call(ep, [mod.base, 1, 0]) + se.call(ep, [mod.base, 1, 0]) + finally: + se.shutdown() + + +@pytest.mark.parametrize("bin_file", DLL_BINS) +def test_call_context_is_consistent(config, load_test_bin, bin_file): + """After call(), every run has process_context, thread, and active PEB.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + se.call(mod.base + mod.ep, [mod.base, 1, 0]) + + emu = se.emu + assert emu.curr_process is not None, "no process after call()" + assert emu.curr_process.is_peb_active, "PEB not activated" + assert emu.curr_thread is not None, "no thread after call()" + for run in emu.runs: + assert run.process_context is not None, f"run {run.type} missing process_context" + assert run.thread is not None, f"run {run.type} has no thread" + assert run.thread.process is run.process_context, ( + f"run {run.type} thread.process mismatch" + ) + finally: + se.shutdown() + + +@pytest.mark.parametrize("bin_file", DLL_BINS) +def test_call_context_consistent_after_run_module(config, load_test_bin, bin_file): + """After run_module() + call(), context is consistent across all runs.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + se.run_module(mod) + se.call(mod.base + mod.ep, [mod.base, 1, 0]) + + emu = se.emu + assert emu.curr_process is not None + assert emu.curr_process.is_peb_active + assert emu.curr_thread is not None + for run in emu.runs: + assert run.process_context is not None, f"run {run.type} missing process_context" + assert run.thread is not None, f"run {run.type} has no thread" + assert run.thread.process is run.process_context, ( + f"run {run.type} thread.process mismatch" + ) + finally: + se.shutdown() + + +# --- EXE call() without run_module() --- + + +@pytest.mark.parametrize("bin_file", EXE_BINS) +def test_exe_call_without_run_module(config, load_test_bin, bin_file): + """call() on an EXE entrypoint creates a module-backed process, not a container.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + ep = mod.base + mod.ep + se.call(ep, [0, 0, 0, 0]) + + emu = se.emu + assert emu.curr_process is not None + assert emu.curr_process.pe is mod, "EXE call() should produce a module-backed process" + assert emu.curr_process.base == mod.base + finally: + se.shutdown() + + +# --- Process bookkeeping --- + + +@pytest.mark.parametrize("bin_file", DLL_BINS) +def test_process_in_processes_after_run_module(config, load_test_bin, bin_file): + """After run_module(), curr_process must be discoverable in self.processes.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + se.run_module(mod) + + emu = se.emu + assert emu.curr_process is not None + assert emu.curr_process in emu.processes, "curr_process not in self.processes" + finally: + se.shutdown() + + +@pytest.mark.parametrize("bin_file", EXE_BINS) +def test_process_in_processes_after_exe_run_module(config, load_test_bin, bin_file): + """After EXE run_module(), the active process is in self.processes.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + se.run_module(mod) + + emu = se.emu + assert emu.curr_process is not None + assert emu.curr_process in emu.processes, "EXE curr_process not in self.processes" + finally: + se.shutdown() + + +@pytest.mark.parametrize("bin_file", DLL_BINS) +def test_process_in_processes_after_call(config, load_test_bin, bin_file): + """After call() without run_module(), curr_process is in self.processes.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + se.call(mod.base + mod.ep, [mod.base, 1, 0]) + + emu = se.emu + assert emu.curr_process is not None + assert emu.curr_process in emu.processes + finally: + se.shutdown() + + +# --- Queued call does not mutate SP --- + + +@pytest.mark.parametrize("bin_file", DLL_BINS) +def test_queued_call_does_not_mutate_sp(config, load_test_bin, bin_file): + """Invoking call() while the queue is non-empty must not change SP/BP.""" + data = load_test_bin(bin_file) + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + ep = mod.base + mod.ep + + placeholder = Run() + placeholder.type = "placeholder" + placeholder.start_addr = ep + placeholder.args = [mod.base, 1, 0] + se.emu.add_run(placeholder) + + sp_before = se.emu.get_stack_ptr() + se.call(ep, [mod.base, 1, 0]) + sp_after = se.emu.get_stack_ptr() + + assert sp_before == sp_after, ( + f"call() mutated SP while queue non-empty: {sp_before:#x} -> {sp_after:#x}" + ) + finally: + se.shutdown() + + +# --- Shellcode --- + + +@pytest.mark.parametrize("arch", ["x86", "x64"]) +def test_shellcode_context(config, arch): + """Shellcode runs get proper process, thread, and register initialization.""" + sc_data = b"\xc3" # ret + se = Speakeasy(config=config) + try: + sc_addr = se.load_shellcode("test_shellcode", arch, data=sc_data) + se.run_shellcode(sc_addr) + + emu = se.emu + assert len(emu.runs) >= 1 + sc_run = emu.runs[0] + assert sc_run.process_context is not None, "shellcode run missing process_context" + assert sc_run.thread is not None, "shellcode run missing thread" + assert sc_run.thread.process is sc_run.process_context + assert sc_run.process_context in emu.processes + finally: + se.shutdown() + + +# --- Fail-fast on conflicting thread/process --- + + +def test_conflicting_thread_process_raises(config, load_test_bin): + """A run whose thread is bound to a different process should fail fast.""" + data = load_test_bin("dll_test_x86.dll.xz") + se = Speakeasy(config=config) + try: + mod = se.load_module(data=data) + + proc_a = objman.Process(se.emu) + proc_b = objman.Process(se.emu) + thread = objman.Thread(se.emu, stack_base=se.emu.stack_base) + thread.process = proc_a + proc_a.threads.append(thread) + + run = Run() + run.type = "test_conflict" + run.start_addr = mod.base + mod.ep + run.args = [mod.base, 1, 0] + run.process_context = proc_b + run.thread = thread + + se.emu.add_run(run) + + with pytest.raises(WindowsEmuError, match="different process"): + se.emu.start() + finally: + se.shutdown()