From 33d88400963d0da84f44296306c4e39af1f3cf5c Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Fri, 11 Jul 2025 20:28:33 +0900 Subject: [PATCH 01/35] meson: Add wasm64 support to the --cpu flag wasm64 target enables 64bit pointers using Emscripten's -sMEMORY64=1 flag[1]. This enables QEMU to run 64bit guests. Although the configure script uses "uname -m" as the fallback value when "cpu" is empty, this can't be used for Emscripten which targets to Wasm. So, in wasm build, this commit fixes configure to require --cpu flag to be explicitly specified by the user. [1] https://emscripten.org/docs/tools_reference/settings_reference.html#memory64 Signed-off-by: Kohei Tokunaga Reviewed-by: Pierrick Bouvier --- configure | 6 +++++- meson.build | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/configure b/configure index 274a7787642e2..112e4725fc443 100755 --- a/configure +++ b/configure @@ -365,7 +365,6 @@ elif check_define __APPLE__; then host_os=darwin elif check_define EMSCRIPTEN ; then host_os=emscripten - cpu=wasm32 cross_compile="yes" else # This is a fatal error, but don't report it yet, because we @@ -425,6 +424,8 @@ elif check_define __aarch64__ ; then cpu="aarch64" elif check_define __loongarch64 ; then cpu="loongarch64" +elif check_define EMSCRIPTEN ; then + error_exit "wasm32 or wasm64 must be specified to the cpu flag" else # Using uname is really broken, but it is just a fallback for architectures # that are going to use TCI anyway @@ -535,6 +536,9 @@ case "$cpu" in wasm32) CPU_CFLAGS="-m32" ;; + wasm64) + CPU_CFLAGS="-m64 -sMEMORY64=1" + ;; esac if test -n "$host_arch" && { diff --git a/meson.build b/meson.build index 50c774a19557a..44bb7ed334df4 100644 --- a/meson.build +++ b/meson.build @@ -52,7 +52,7 @@ qapi_trace_events = [] bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin'] supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux', 'emscripten'] supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 'x86_64', - 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64', 'wasm32'] + 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64', 'wasm32', 'wasm64'] cpu = host_machine.cpu_family() @@ -916,7 +916,7 @@ if have_tcg if not get_option('tcg_interpreter') error('Unsupported CPU @0@, try --enable-tcg-interpreter'.format(cpu)) endif - elif host_arch == 'wasm32' + elif host_arch == 'wasm32' or host_arch == 'wasm64' if not get_option('tcg_interpreter') error('WebAssembly host requires --enable-tcg-interpreter') endif From f0fcfef4573d19ff9291f6aac8f53c08f72509f7 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Sat, 12 Jul 2025 14:38:35 +0900 Subject: [PATCH 02/35] configure: Enable to propagate -sMEMORY64 flag to Emscripten MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently there are some engines that don't support wasm64 (e.g. unsupported on Safari[1]). To mitigate this issue, the configure script allows the user to use Emscripten's compatibility feature, "-sMEMORY64=2" flag[2]. Emscripten's "-sMEMORY64=2" flag still enables 64bit pointers in C code. But this flag lowers the output binary into wasm32, with limiting the maximum memory size to 4GB. So QEMU can run on wasm32 engines. [1] https://webassembly.org/features/ [2] https://emscripten.org/docs/tools_reference/settings_reference.html#memory64 Signed-off-by: Kohei Tokunaga Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Pierrick Bouvier --- configure | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 112e4725fc443..8ac6cacc06b2f 100755 --- a/configure +++ b/configure @@ -182,6 +182,10 @@ EXTRA_CXXFLAGS="" EXTRA_OBJCFLAGS="" EXTRA_LDFLAGS="" +# The value is propagated to Emscripten's "-sMEMORY64" flag. +# https://emscripten.org/docs/tools_reference/settings_reference.html#memory64 +wasm64_memory64=1 + # Default value for a variable defining feature "foo". # * foo="no" feature will only be used if --enable-foo arg is given # * foo="" feature will be searched for, and if found, will be used @@ -239,6 +243,8 @@ for opt do ;; --without-default-features) default_feature="no" ;; + --wasm64-32bit-address-limit) wasm64_memory64="2" + ;; esac done @@ -537,7 +543,7 @@ case "$cpu" in CPU_CFLAGS="-m32" ;; wasm64) - CPU_CFLAGS="-m64 -sMEMORY64=1" + CPU_CFLAGS="-m64 -sMEMORY64=$wasm64_memory64" ;; esac @@ -795,6 +801,8 @@ for opt do ;; --disable-rust) rust=disabled ;; + --wasm64-32bit-address-limit) + ;; # everything else has the same name in configure and meson --*) meson_option_parse "$opt" "$optarg" ;; @@ -920,6 +928,8 @@ Advanced options (experts only): --disable-containers don't use containers for cross-building --container-engine=TYPE which container engine to use [$container_engine] --gdb=GDB-path gdb to use for gdbstub tests [$gdb_bin] + --wasm64-32bit-address-limit Restrict wasm64 address space to 32-bit (default + is to use the whole 64-bit range). EOF meson_options_help cat << EOF From dd1d5aee5cc69ad95f6cf11106cf020bc3bc4b59 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Fri, 11 Jul 2025 20:32:18 +0900 Subject: [PATCH 03/35] dockerfiles: Add support for wasm64 to the wasm Dockerfile This commit fixes Dockerfile of the wasm build to support both of wasm32 and wasm64 build. Dockerfile takes the following build arguments and use these values for building dependencies. - TARGET_CPU: target wasm arch (wasm32 or wasm64) - WASM64_MEMORY64: target -sMEMORY64 mode (1 or 2) Signed-off-by: Kohei Tokunaga Reviewed-by: Pierrick Bouvier --- MAINTAINERS | 2 +- ...2-cross.docker => emsdk-wasm-cross.docker} | 29 ++++++++++++++----- 2 files changed, 23 insertions(+), 8 deletions(-) rename tests/docker/dockerfiles/{emsdk-wasm32-cross.docker => emsdk-wasm-cross.docker} (85%) diff --git a/MAINTAINERS b/MAINTAINERS index a07086ed76213..433a44118d624 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -647,7 +647,7 @@ F: include/system/os-wasm.h F: os-wasm.c F: util/coroutine-wasm.c F: configs/meson/emscripten.txt -F: tests/docker/dockerfiles/emsdk-wasm32-cross.docker +F: tests/docker/dockerfiles/emsdk-wasm-cross.docker Alpha Machines -------------- diff --git a/tests/docker/dockerfiles/emsdk-wasm32-cross.docker b/tests/docker/dockerfiles/emsdk-wasm-cross.docker similarity index 85% rename from tests/docker/dockerfiles/emsdk-wasm32-cross.docker rename to tests/docker/dockerfiles/emsdk-wasm-cross.docker index 60a7d02f5613e..4b41be62ab864 100644 --- a/tests/docker/dockerfiles/emsdk-wasm32-cross.docker +++ b/tests/docker/dockerfiles/emsdk-wasm-cross.docker @@ -1,14 +1,17 @@ # syntax = docker/dockerfile:1.5 -ARG EMSDK_VERSION_QEMU=3.1.50 +ARG EMSDK_VERSION_QEMU=4.0.10 ARG ZLIB_VERSION=1.3.1 ARG GLIB_MINOR_VERSION=2.84 ARG GLIB_VERSION=${GLIB_MINOR_VERSION}.0 ARG PIXMAN_VERSION=0.44.2 -ARG FFI_VERSION=v3.4.7 +ARG FFI_VERSION=v3.5.2 ARG MESON_VERSION=1.5.0 +ARG TARGET_CPU=wasm32 +ARG WASM64_MEMORY64=0 -FROM emscripten/emsdk:$EMSDK_VERSION_QEMU AS build-base +FROM emscripten/emsdk:$EMSDK_VERSION_QEMU AS build-base-common +ARG TARGET_CPU ARG MESON_VERSION ENV TARGET=/builddeps/target ENV CPATH="$TARGET/include" @@ -33,8 +36,8 @@ RUN < /cross.meson [host_machine] system = 'emscripten' -cpu_family = 'wasm32' -cpu = 'wasm32' +cpu_family = '${TARGET_CPU}' +cpu = '${TARGET_CPU}' endian = 'little' [binaries] @@ -46,6 +49,16 @@ pkgconfig = ['pkg-config', '--static'] EOT EOF +FROM build-base-common AS build-base-wasm32 + +FROM build-base-common AS build-base-wasm64 +ARG WASM64_MEMORY64 +ENV CFLAGS="$CFLAGS -sMEMORY64=${WASM64_MEMORY64}" +ENV CXXFLAGS="$CXXFLAGS -sMEMORY64=${WASM64_MEMORY64}" +ENV LDFLAGS="$LDFLAGS -sMEMORY64=${WASM64_MEMORY64}" + +FROM build-base-${TARGET_CPU} AS build-base + FROM build-base AS zlib-dev ARG ZLIB_VERSION RUN mkdir -p /zlib @@ -56,17 +69,19 @@ RUN emconfigure ./configure --prefix=$TARGET --static RUN emmake make install -j$(nproc) FROM build-base AS libffi-dev +ARG TARGET_CPU +ARG WASM64_MEMORY64 ARG FFI_VERSION RUN mkdir -p /libffi RUN git clone https://github.com/libffi/libffi /libffi WORKDIR /libffi RUN git checkout $FFI_VERSION RUN autoreconf -fiv -RUN emconfigure ./configure --host=wasm32-unknown-linux \ +RUN emconfigure ./configure --host=${TARGET_CPU}-unknown-linux \ --prefix=$TARGET --enable-static \ --disable-shared --disable-dependency-tracking \ --disable-builddir --disable-multi-os-directory \ - --disable-raw-api --disable-docs + --disable-raw-api --disable-docs WASM64_MEMORY64=${WASM64_MEMORY64} RUN emmake make install SUBDIRS='include' -j$(nproc) FROM build-base AS pixman-dev From 74f75ef5de887b7e27ba9d17e21fc083ba99fa67 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Fri, 11 Jul 2025 20:33:16 +0900 Subject: [PATCH 04/35] .gitlab-ci.d: Add build tests for wasm64 The wasm builds are tested for 3 targets: wasm32, wasm64(-sMEMORY64=1) and wasm64(-sMEMORY64=2). The CI builds the containers using the same Dockerfile (emsdk-wasm-cross.docker) with different build args. Signed-off-by: Kohei Tokunaga --- .gitlab-ci.d/buildtest.yml | 26 ++++++++++++++++++++++---- .gitlab-ci.d/container-cross.yml | 20 ++++++++++++++++++-- .gitlab-ci.d/container-template.yml | 4 +++- 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index d888a60063715..77ae8f828160c 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -787,11 +787,29 @@ coverity: # Always manual on forks even if $QEMU_CI == "2" - when: manual -build-wasm: +build-wasm32-32bit: extends: .wasm_build_job_template timeout: 2h needs: - job: wasm-emsdk-cross-container + job: wasm32-32bit-emsdk-cross-container variables: - IMAGE: emsdk-wasm32-cross - CONFIGURE_ARGS: --static --disable-tools --enable-debug --enable-tcg-interpreter + IMAGE: emsdk-wasm32-32bit-cross + CONFIGURE_ARGS: --static --cpu=wasm32 --disable-tools --enable-debug --enable-tcg-interpreter + +build-wasm64-64bit: + extends: .wasm_build_job_template + timeout: 2h + needs: + job: wasm64-64bit-emsdk-cross-container + variables: + IMAGE: emsdk-wasm64-64bit-cross + CONFIGURE_ARGS: --static --cpu=wasm64 --disable-tools --enable-debug --enable-tcg-interpreter + +build-wasm64-32bit: + extends: .wasm_build_job_template + timeout: 2h + needs: + job: wasm64-32bit-emsdk-cross-container + variables: + IMAGE: emsdk-wasm64-32bit-cross + CONFIGURE_ARGS: --static --cpu=wasm64 --wasm64-32bit-address-limit --disable-tools --enable-debug --enable-tcg-interpreter diff --git a/.gitlab-ci.d/container-cross.yml b/.gitlab-ci.d/container-cross.yml index 8d3be53b75b23..84c4be49f43fc 100644 --- a/.gitlab-ci.d/container-cross.yml +++ b/.gitlab-ci.d/container-cross.yml @@ -92,7 +92,23 @@ win64-fedora-cross-container: variables: NAME: fedora-win64-cross -wasm-emsdk-cross-container: +wasm32-32bit-emsdk-cross-container: extends: .container_job_template variables: - NAME: emsdk-wasm32-cross + NAME: emsdk-wasm32-32bit-cross + BUILD_ARGS: --build-arg TARGET_CPU=wasm32 + DOCKERFILE: emsdk-wasm-cross + +wasm64-64bit-emsdk-cross-container: + extends: .container_job_template + variables: + NAME: emsdk-wasm64-64bit-cross + BUILD_ARGS: --build-arg TARGET_CPU=wasm64 --build-arg WASM64_MEMORY64=1 + DOCKERFILE: emsdk-wasm-cross + +wasm64-32bit-emsdk-cross-container: + extends: .container_job_template + variables: + NAME: emsdk-wasm64-32bit-cross + BUILD_ARGS: --build-arg TARGET_CPU=wasm64 --build-arg WASM64_MEMORY64=2 + DOCKERFILE: emsdk-wasm-cross diff --git a/.gitlab-ci.d/container-template.yml b/.gitlab-ci.d/container-template.yml index 4eec72f383dd7..01ca8404136fb 100644 --- a/.gitlab-ci.d/container-template.yml +++ b/.gitlab-ci.d/container-template.yml @@ -10,12 +10,14 @@ - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/qemu/$NAME:latest" - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" - until docker info; do sleep 1; done + - export DOCKERFILE_NAME=${DOCKERFILE:-$NAME} script: - echo "TAG:$TAG" - echo "COMMON_TAG:$COMMON_TAG" - docker build --tag "$TAG" --cache-from "$TAG" --cache-from "$COMMON_TAG" --build-arg BUILDKIT_INLINE_CACHE=1 - -f "tests/docker/dockerfiles/$NAME.docker" "." + $BUILD_ARGS + -f "tests/docker/dockerfiles/$DOCKERFILE_NAME.docker" "." - docker push "$TAG" after_script: - docker logout From a3135800931780f64d95cae0c461954c46ccecc1 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Tue, 26 Aug 2025 10:45:54 +0900 Subject: [PATCH 05/35] tcg/wasm: Add tcg-target.h and tcg-target-reg-bits.h The Wasm backend targets wasm64 as the host so TCG_TARGET_REG_BITS is set to 64. Since WebAssembly instructions vary in size and can include single-byte instructions, TCG_TARGET_INSN_UNIT_SIZE is set to 1. Signed-off-by: Kohei Tokunaga --- MAINTAINERS | 5 +++ tcg/wasm/tcg-target-reg-bits.h | 11 ++++++ tcg/wasm/tcg-target.h | 61 ++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 tcg/wasm/tcg-target-reg-bits.h create mode 100644 tcg/wasm/tcg-target.h diff --git a/MAINTAINERS b/MAINTAINERS index 433a44118d624..89e4b51e22608 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3999,6 +3999,11 @@ F: tcg/tci/ F: tcg/tci.c F: disas/tci.c +WebAssembly TCG target +M: Kohei Tokunaga +S: Maintained +F: tcg/wasm/ + Block drivers ------------- VMDK diff --git a/tcg/wasm/tcg-target-reg-bits.h b/tcg/wasm/tcg-target-reg-bits.h new file mode 100644 index 0000000000000..3dd821691ff89 --- /dev/null +++ b/tcg/wasm/tcg-target-reg-bits.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef TCG_TARGET_REG_BITS_H +#define TCG_TARGET_REG_BITS_H + +#if UINTPTR_MAX != UINT64_MAX +# error Unsupported pointer size for TCG target +#endif +#define TCG_TARGET_REG_BITS 64 + +#endif diff --git a/tcg/wasm/tcg-target.h b/tcg/wasm/tcg-target.h new file mode 100644 index 0000000000000..f00761d19fc01 --- /dev/null +++ b/tcg/wasm/tcg-target.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Tiny Code Generator for QEMU + * + * Based on tci/tcg-target.h + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_TARGET_H +#define TCG_TARGET_H + +#define TCG_TARGET_INSN_UNIT_SIZE 1 +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) + +/* Number of registers available. */ +#define TCG_TARGET_NB_REGS 16 + +/* List of registers which are used by TCG. */ +typedef enum { + TCG_REG_R0 = 0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + + TCG_REG_TMP = TCG_REG_R13, + TCG_AREG0 = TCG_REG_R14, + TCG_REG_CALL_STACK = TCG_REG_R15, +} TCGReg; + +#endif /* TCG_TARGET_H */ From 1582a3fb83f9a0f12a988bc159319362f69676e6 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Mon, 25 Aug 2025 14:44:50 +0900 Subject: [PATCH 06/35] tcg/wasm: Add register-related definitions This commit adds the register allocation definitions and register names to the Wasm backend. As in TCI, call arguments are stored on the stack buffer and the return value is placed in the registers R0 and R1 when needed. Signed-off-by: Kohei Tokunaga --- tcg/wasm/tcg-target.c.inc | 77 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tcg/wasm/tcg-target.c.inc diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc new file mode 100644 index 0000000000000..3affc172328cf --- /dev/null +++ b/tcg/wasm/tcg-target.c.inc @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Based on tci/tcg-target.c.inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + /* 2 of these are call clobbered, so use them last. */ + TCG_REG_R1, + TCG_REG_R0, +}; + +#ifdef CONFIG_DEBUG_TCG +static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r00", + "r01", + "r02", + "r03", + "r04", + "r05", + "r06", + "r07", + "r08", + "r09", + "r10", + "r11", + "r12", + "r13", + "r14", + "r15", +}; +#endif + +/* No call arguments via registers. All will be stored on the "stack". */ +static const int tcg_target_call_iarg_regs[] = { }; + +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot < 128 / TCG_TARGET_REG_BITS); + return TCG_REG_R0 + slot; +} From d935ea00f23ba6b4ea5624a57af0056214c2f495 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Mon, 25 Aug 2025 14:51:22 +0900 Subject: [PATCH 07/35] tcg/wasm: Add constraint definitions The Wasm backend integrates a forked TCI so its constraints are defined to remain compatible with TCI. Signed-off-by: Kohei Tokunaga --- tcg/wasm/tcg-target-con-set.h | 19 +++++++++++++++++++ tcg/wasm/tcg-target-con-str.h | 14 ++++++++++++++ tcg/wasm/tcg-target.c.inc | 13 +++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 tcg/wasm/tcg-target-con-set.h create mode 100644 tcg/wasm/tcg-target-con-str.h diff --git a/tcg/wasm/tcg-target-con-set.h b/tcg/wasm/tcg-target-con-set.h new file mode 100644 index 0000000000000..0dc41ebe33f50 --- /dev/null +++ b/tcg/wasm/tcg-target-con-set.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Wasm target-specific constraint sets. + * + * Based on tci/tcg-target-con-set.h + * + * Copyright (c) 2021 Linaro + */ + +/* + * C_On_Im(...) defines a constraint set with outputs and inputs. + * Each operand should be a sequence of constraint letters as defined by + * tcg-target-con-str.h; the constraint combination is inclusive or. + */ +C_O0_I1(r) +C_O0_I2(r, r) +C_O1_I1(r, r) +C_O1_I2(r, r, r) +C_O1_I4(r, r, r, r, r) diff --git a/tcg/wasm/tcg-target-con-str.h b/tcg/wasm/tcg-target-con-str.h new file mode 100644 index 0000000000000..21ddbcc01a9d3 --- /dev/null +++ b/tcg/wasm/tcg-target-con-str.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Define Wasm target-specific operand constraints. + * + * Based on tci/tcg-target-con-str.h + * + * Copyright (c) 2021 Linaro + */ + +/* + * Define constraint letters for register sets: + * REGS(letter, register_mask) + */ +REGS('r', MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 3affc172328cf..0b12c4ea03297 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -75,3 +75,16 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) tcg_debug_assert(slot >= 0 && slot < 128 / TCG_TARGET_REG_BITS); return TCG_REG_R0 + slot; } + +static TCGConstraintSetIndex +tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) +{ + return C_NotImplemented; +} + +/* Test if a constant matches the constraint. */ +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) +{ + return ct & TCG_CT_CONST; +} From 5baf88bab53be1c51736b8caad24111d386f3f28 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Mon, 25 Aug 2025 14:57:21 +0900 Subject: [PATCH 08/35] tcg/wasm: Add relocation callbacks Relocation callbacks are used for the TCI instructions to preserve the original logic of the TCI backend. Signed-off-by: Kohei Tokunaga --- tcg/wasm/tcg-target.c.inc | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 0b12c4ea03297..4bcb5943609d7 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -88,3 +88,24 @@ static bool tcg_target_const_match(int64_t val, int ct, { return ct & TCG_CT_CONST; } + +static void tcg_out_nop_fill(tcg_insn_unit *p, int count) +{ + memset(p, 0, sizeof(*p) * count); +} + +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + intptr_t diff = value - (intptr_t)(code_ptr + 4); + + tcg_debug_assert(addend == 0); + tcg_debug_assert(type == 20); + + if (diff == sextract32(diff, 0, type)) { + tcg_patch32(code_ptr, + deposit32(*(uint32_t *)code_ptr, 32 - type, type, diff)); + return true; + } + return false; +} From f8cb9a61b63a69cce422b2964659b0e0472347bc Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Mon, 25 Aug 2025 15:00:04 +0900 Subject: [PATCH 09/35] tcg/wasm: Add and/or/xor instructions This commit adds support for generateing the and, or and xor operations. The generated Wasm codes will be instantiated and executed in the browser. Browsers tipycally limit the number of active Wasm instances and the instantiating Wasm modules introduces overhead. As a result, instantiating TBs that are rarely called is undesirable. To address this, the Wasm backend relies on the a forked subset of the TCI interpreter (tcg_qemu_tb_exec_tci function in tcg/wasm.c) for executing such TBs. The Wasm backend emits both Wasm and TCI instructions. TCI instructions are emitted to s->code_ptr, while the corresponding Wasm instructions are generated into a separate buffer allocated via tcg_malloc(). This buffer intends to be merged into the TB before tcg_gen_code returns. In the Wasm code, each TCG variable is mapped to a 64bit Wasm variable. Execution works by first pushing the operands into the Wasm's stack using get instructions. The result is left on the stack and this can be assigned to a variable by popping it using a set instruction. The Wasm binary format is documented at [1]. Additionally, since the Wasm instuction's index operand must be LEB128-encoded, this commit introduces an encoder function implemented following [2]. [1] https://webassembly.github.io/spec/core/binary/index.html [2] https://en.wikipedia.org/wiki/LEB128 Signed-off-by: Kohei Tokunaga --- MAINTAINERS | 1 + tcg/wasm.c | 66 ++++++++++++++++ tcg/wasm/tcg-target.c.inc | 160 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 227 insertions(+) create mode 100644 tcg/wasm.c diff --git a/MAINTAINERS b/MAINTAINERS index 89e4b51e22608..217bf2066c2d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4003,6 +4003,7 @@ WebAssembly TCG target M: Kohei Tokunaga S: Maintained F: tcg/wasm/ +F: tcg/wasm.c Block drivers ------------- diff --git a/tcg/wasm.c b/tcg/wasm.c new file mode 100644 index 0000000000000..9f3b1344d6576 --- /dev/null +++ b/tcg/wasm.c @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * WebAssembly backend with forked TCI, based on tci.c + * + * Copyright (c) 2009, 2011, 2016 Stefan Weil + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "tcg/tcg.h" + +static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); +} + +static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) +{ + const uint32_t *tb_ptr = v_tb_ptr; + tcg_target_ulong regs[TCG_TARGET_NB_REGS]; + uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) + / sizeof(uint64_t)]; + + regs[TCG_AREG0] = (tcg_target_ulong)env; + regs[TCG_REG_CALL_STACK] = (uintptr_t)stack; + + for (;;) { + uint32_t insn; + TCGOpcode opc; + TCGReg r0, r1, r2; + + insn = *tb_ptr++; + opc = extract32(insn, 0, 8); + + switch (opc) { + case INDEX_op_and: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] & regs[r2]; + break; + case INDEX_op_or: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] | regs[r2]; + break; + case INDEX_op_xor: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ^ regs[r2]; + break; + default: + g_assert_not_reached(); + } + } +} diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 4bcb5943609d7..a1757b4db7094 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -25,6 +25,10 @@ * THE SOFTWARE. */ +#include "qemu/queue.h" + +typedef uint32_t tcg_insn_unit_tci; + static const int tcg_target_reg_alloc_order[] = { TCG_REG_R2, TCG_REG_R3, @@ -109,3 +113,159 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type, } return false; } + +/* converts a TCG register to a wasm variable index */ +static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { + 0, /* TCG_REG_R0 */ + 1, /* TCG_REG_R1 */ + 2, /* TCG_REG_R2 */ + 3, /* TCG_REG_R3 */ + 4, /* TCG_REG_R4 */ + 5, /* TCG_REG_R5 */ + 6, /* TCG_REG_R6 */ + 7, /* TCG_REG_R7 */ + 8, /* TCG_REG_R8 */ + 9, /* TCG_REG_R9 */ + 10, /* TCG_REG_R10 */ + 11, /* TCG_REG_R11 */ + 12, /* TCG_REG_R12 */ + 13, /* TCG_REG_R13 */ + 14, /* TCG_REG_R14 */ + 15, /* TCG_REG_R15 */ +}; + +#define REG_IDX(r) tcg_target_reg_index[r] + +typedef enum { + OPC_GLOBAL_GET = 0x23, + OPC_GLOBAL_SET = 0x24, + + OPC_I64_AND = 0x83, + OPC_I64_OR = 0x84, + OPC_I64_XOR = 0x85, +} WasmInsn; + +#define BUF_SIZE 1024 +typedef struct LinkedBufEntry { + uint8_t data[BUF_SIZE]; + uint32_t size; + QSIMPLEQ_ENTRY(LinkedBufEntry) entry; +} LinkedBufEntry; + +typedef QSIMPLEQ_HEAD(, LinkedBufEntry) LinkedBuf; + +static void linked_buf_out8(LinkedBuf *linked_buf, uint8_t v) +{ + LinkedBufEntry *buf = QSIMPLEQ_LAST(linked_buf, LinkedBufEntry, entry); + if (!buf || (buf->size == BUF_SIZE)) { + LinkedBufEntry *e = tcg_malloc(sizeof(LinkedBufEntry)); + e->size = 0; + QSIMPLEQ_INSERT_TAIL(linked_buf, e, entry); + buf = e; + } + buf->data[buf->size++] = v; +} + +static void linked_buf_out_leb128(LinkedBuf *p, uint64_t v) +{ + uint8_t b; + do { + b = v & 0x7f; + v >>= 7; + if (v != 0) { + b |= 0x80; + } + linked_buf_out8(p, b); + } while (v != 0); +} + +/* + * wasm code is generataed in the dynamically allocated buffer which + * are managed as a linked list. + */ +static __thread LinkedBuf sub_buf; + +static void init_sub_buf(void) +{ + QSIMPLEQ_INIT(&sub_buf); +} +static void tcg_wasm_out8(TCGContext *s, uint8_t v) +{ + linked_buf_out8(&sub_buf, v); +} +static void tcg_wasm_out_leb128(TCGContext *s, uint64_t v) +{ + linked_buf_out_leb128(&sub_buf, v); +} + +static void tcg_wasm_out_op(TCGContext *s, WasmInsn opc) +{ + tcg_wasm_out8(s, opc); +} +static void tcg_wasm_out_op_idx(TCGContext *s, WasmInsn opc, uint32_t idx) +{ + tcg_wasm_out8(s, opc); + tcg_wasm_out_leb128(s, idx); +} + +static void tcg_wasm_out_o1_i2( + TCGContext *s, WasmInsn opc, TCGReg ret, TCGReg arg1, TCGReg arg2) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, opc); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, TCGReg r2) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + tcg_out32(s, insn); +} + +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_and, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_AND, a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_and, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_or, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_OR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_or, +}; + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_xor, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_XOR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_xor, +}; + +static void tcg_out_tb_start(TCGContext *s) +{ + init_sub_buf(); +} From 172f143f9806e41742137d975b2b80eb20f98a53 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Fri, 22 Aug 2025 17:33:17 +0900 Subject: [PATCH 10/35] tcg/wasm: Add add/sub/mul instructions The add, sub and mul operations are implemented using the corresponding instructions in Wasm. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 12 ++++++++++++ tcg/wasm/tcg-target.c.inc | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index 9f3b1344d6576..ba8a89d9206b1 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -59,6 +59,18 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] ^ regs[r2]; break; + case INDEX_op_add: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] + regs[r2]; + break; + case INDEX_op_sub: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] - regs[r2]; + break; + case INDEX_op_mul: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] * regs[r2]; + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index a1757b4db7094..d5cf324e7b72d 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -140,6 +140,9 @@ typedef enum { OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, + OPC_I64_ADD = 0x7c, + OPC_I64_SUB = 0x7d, + OPC_I64_MUL = 0x7e, OPC_I64_AND = 0x83, OPC_I64_OR = 0x84, OPC_I64_XOR = 0x85, @@ -265,6 +268,42 @@ static const TCGOutOpBinary outop_xor = { .out_rrr = tgen_xor, }; +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_add, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_ADD, a0, a1, a2); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_add, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_sub, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_SUB, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_mul, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_MUL, a0, a1, a2); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From cb7883a5da41bad585b28293458c7bdc52057cd9 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 13:59:25 +0900 Subject: [PATCH 11/35] tcg/wasm: Add shl/shr/sar instructions This commit implements the shl, shr and sar operations using Wasm instructions. Since the Wasm backend uses 64bit variables, right shifts on 32bit values extract the lower 32bit of the operand before shifting. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 31 +++++++++++++ tcg/wasm/tcg-target.c.inc | 93 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index ba8a89d9206b1..b63b88e0110fd 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -28,6 +28,15 @@ static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) *r2 = extract32(insn, 16, 4); } +static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, + uint8_t *i2, uint8_t *i3) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *i2 = extract32(insn, 16, 6); + *i3 = extract32(insn, 22, 6); +} + static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) { const uint32_t *tb_ptr = v_tb_ptr; @@ -42,6 +51,7 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) uint32_t insn; TCGOpcode opc; TCGReg r0, r1, r2; + uint8_t pos, len; insn = *tb_ptr++; opc = extract32(insn, 0, 8); @@ -71,6 +81,27 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] * regs[r2]; break; + case INDEX_op_extract: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + regs[r0] = extract64(regs[r1], pos, len); + break; + case INDEX_op_sextract: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + regs[r0] = sextract64(regs[r1], pos, len); + break; + case INDEX_op_shl: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] << (regs[r2] % TCG_TARGET_REG_BITS); + break; + case INDEX_op_shr: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] >> (regs[r2] % TCG_TARGET_REG_BITS); + break; + case INDEX_op_sar: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ((tcg_target_long)regs[r1] + >> (regs[r2] % TCG_TARGET_REG_BITS)); + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index d5cf324e7b72d..3a2a707619d53 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -140,12 +140,21 @@ typedef enum { OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, + OPC_I32_SHR_S = 0x75, + OPC_I32_SHR_U = 0x76, + OPC_I64_ADD = 0x7c, OPC_I64_SUB = 0x7d, OPC_I64_MUL = 0x7e, OPC_I64_AND = 0x83, OPC_I64_OR = 0x84, OPC_I64_XOR = 0x85, + OPC_I64_SHL = 0x86, + OPC_I64_SHR_S = 0x87, + OPC_I64_SHR_U = 0x88, + + OPC_I32_WRAP_I64 = 0xa7, + OPC_I64_EXTEND_I32_U = 0xad, } WasmInsn; #define BUF_SIZE 1024 @@ -219,6 +228,27 @@ static void tcg_wasm_out_o1_i2( tcg_wasm_out_op(s, opc); tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); } +static void tcg_wasm_out_o1_i2_type( + TCGContext *s, TCGType type, WasmInsn opc32, WasmInsn opc64, + TCGReg ret, TCGReg arg1, TCGReg arg2) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, opc32); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); + break; + case TCG_TYPE_I64: + tcg_wasm_out_o1_i2(s, opc64, ret, arg1, arg2); + break; + default: + g_assert_not_reached(); + } +} static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2) @@ -232,6 +262,21 @@ static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, tcg_out32(s, insn); } +static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0, + TCGReg r1, uint8_t b2, uint8_t b3) +{ + tcg_insn_unit_tci insn = 0; + + tcg_debug_assert(b2 == extract32(b2, 0, 6)); + tcg_debug_assert(b3 == extract32(b3, 0, 6)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 6, b2); + insn = deposit32(insn, 22, 6, b3); + tcg_out32(s, insn); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -304,6 +349,54 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_shl, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_SHL, a0, a1, a2); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_shl, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGReg orig_a1 = a1; + if (type < TCG_TYPE_REG) { + tcg_out_op_rrbb(s, INDEX_op_extract, TCG_REG_TMP, a1, 0, 32); + a1 = TCG_REG_TMP; + } + tcg_out_op_rrr(s, INDEX_op_shr, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_SHR_U, OPC_I64_SHR_U, + a0, orig_a1, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_shr, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGReg orig_a1 = a1; + if (type < TCG_TYPE_REG) { + tcg_out_op_rrbb(s, INDEX_op_sextract, TCG_REG_TMP, a1, 0, 32); + a1 = TCG_REG_TMP; + } + tcg_out_op_rrr(s, INDEX_op_sar, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_SHR_S, OPC_I64_SHR_S, + a0, orig_a1, a2); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sar, +}; + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From 9dbeb865a3d0a4cbf9b1e4c4da6c76d977bb927e Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 14:37:10 +0900 Subject: [PATCH 12/35] tcg/wasm: Add setcond/negsetcond/movcond instructions These TCG instructions are implemented by using Wasm's if and else instructions. TCI instructions are also generated in the same way as the original TCI backend. Since support for TCG_COND_TSTEQ and TCG_COND_TSTNE is not yet implemented, TCG_TARGET_HAS_tst is set to 0. Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 136 +++++++++++++++++++- tcg/wasm/tcg-target-has.h | 7 ++ tcg/wasm/tcg-target-opc.h.inc | 8 ++ tcg/wasm/tcg-target.c.inc | 230 ++++++++++++++++++++++++++++++++++ 4 files changed, 380 insertions(+), 1 deletion(-) create mode 100644 tcg/wasm/tcg-target-has.h create mode 100644 tcg/wasm/tcg-target-opc.h.inc diff --git a/tcg/wasm.c b/tcg/wasm.c index b63b88e0110fd..183dad10a2aa4 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -21,6 +21,12 @@ #include "qemu/osdep.h" #include "tcg/tcg.h" +static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); +} + static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) { *r0 = extract32(insn, 8, 4); @@ -37,6 +43,110 @@ static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, *i3 = extract32(insn, 22, 6); } +static void tci_args_rrrc(uint32_t insn, + TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *c3 = extract32(insn, 20, 4); +} + +static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, + TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *r3 = extract32(insn, 20, 4); + *r4 = extract32(insn, 24, 4); + *c5 = extract32(insn, 28, 4); +} + +static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) +{ + bool result = false; + int32_t i0 = u0; + int32_t i1 = u1; + switch (condition) { + case TCG_COND_EQ: + result = (u0 == u1); + break; + case TCG_COND_NE: + result = (u0 != u1); + break; + case TCG_COND_LT: + result = (i0 < i1); + break; + case TCG_COND_GE: + result = (i0 >= i1); + break; + case TCG_COND_LE: + result = (i0 <= i1); + break; + case TCG_COND_GT: + result = (i0 > i1); + break; + case TCG_COND_LTU: + result = (u0 < u1); + break; + case TCG_COND_GEU: + result = (u0 >= u1); + break; + case TCG_COND_LEU: + result = (u0 <= u1); + break; + case TCG_COND_GTU: + result = (u0 > u1); + break; + default: + g_assert_not_reached(); + } + return result; +} + +static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) +{ + bool result = false; + int64_t i0 = u0; + int64_t i1 = u1; + switch (condition) { + case TCG_COND_EQ: + result = (u0 == u1); + break; + case TCG_COND_NE: + result = (u0 != u1); + break; + case TCG_COND_LT: + result = (i0 < i1); + break; + case TCG_COND_GE: + result = (i0 >= i1); + break; + case TCG_COND_LE: + result = (i0 <= i1); + break; + case TCG_COND_GT: + result = (i0 > i1); + break; + case TCG_COND_LTU: + result = (u0 < u1); + break; + case TCG_COND_GEU: + result = (u0 >= u1); + break; + case TCG_COND_LEU: + result = (u0 <= u1); + break; + case TCG_COND_GTU: + result = (u0 > u1); + break; + default: + g_assert_not_reached(); + } + return result; +} + static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) { const uint32_t *tb_ptr = v_tb_ptr; @@ -50,8 +160,10 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) for (;;) { uint32_t insn; TCGOpcode opc; - TCGReg r0, r1, r2; + TCGReg r0, r1, r2, r3, r4; uint8_t pos, len; + TCGCond condition; + uint32_t tmp32; insn = *tb_ptr++; opc = extract32(insn, 0, 8); @@ -102,6 +214,28 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) regs[r0] = ((tcg_target_long)regs[r1] >> (regs[r2] % TCG_TARGET_REG_BITS)); break; + case INDEX_op_neg: + tci_args_rr(insn, &r0, &r1); + regs[r0] = -regs[r1]; + break; + case INDEX_op_setcond: + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); + regs[r0] = tci_compare64(regs[r1], regs[r2], condition); + break; + case INDEX_op_movcond: + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); + tmp32 = tci_compare64(regs[r1], regs[r2], condition); + regs[r0] = regs[tmp32 ? r3 : r4]; + break; + case INDEX_op_tci_setcond32: + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); + regs[r0] = tci_compare32(regs[r1], regs[r2], condition); + break; + case INDEX_op_tci_movcond32: + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); + tmp32 = tci_compare32(regs[r1], regs[r2], condition); + regs[r0] = regs[tmp32 ? r3 : r4]; + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target-has.h b/tcg/wasm/tcg-target-has.h new file mode 100644 index 0000000000000..7e3caf8790d23 --- /dev/null +++ b/tcg/wasm/tcg-target-has.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef TCG_TARGET_HAS_H +#define TCG_TARGET_HAS_H + +#define TCG_TARGET_HAS_tst 0 + +#endif diff --git a/tcg/wasm/tcg-target-opc.h.inc b/tcg/wasm/tcg-target-opc.h.inc new file mode 100644 index 0000000000000..57274d45693a1 --- /dev/null +++ b/tcg/wasm/tcg-target-opc.h.inc @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Based on tci/tcg-target-opc.h.inc + * + * These opcodes for use between the tci generator and interpreter. + */ +DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 3a2a707619d53..70de3bbf83378 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -137,9 +137,37 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { #define REG_IDX(r) tcg_target_reg_index[r] typedef enum { + OPC_IF = 0x04, + OPC_ELSE = 0x05, + OPC_END = 0x0b, OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, + OPC_I32_CONST = 0x41, + OPC_I64_CONST = 0x42, + + OPC_I32_EQ = 0x46, + OPC_I32_NE = 0x47, + OPC_I32_LT_S = 0x48, + OPC_I32_LT_U = 0x49, + OPC_I32_GT_S = 0x4a, + OPC_I32_GT_U = 0x4b, + OPC_I32_LE_S = 0x4c, + OPC_I32_LE_U = 0x4d, + OPC_I32_GE_S = 0x4e, + OPC_I32_GE_U = 0x4f, + + OPC_I64_EQ = 0x51, + OPC_I64_NE = 0x52, + OPC_I64_LT_S = 0x53, + OPC_I64_LT_U = 0x54, + OPC_I64_GT_S = 0x55, + OPC_I64_GT_U = 0x56, + OPC_I64_LE_S = 0x57, + OPC_I64_LE_U = 0x58, + OPC_I64_GE_S = 0x59, + OPC_I64_GE_U = 0x5a, + OPC_I32_SHR_S = 0x75, OPC_I32_SHR_U = 0x76, @@ -157,6 +185,10 @@ typedef enum { OPC_I64_EXTEND_I32_U = 0xad, } WasmInsn; +typedef enum { + BLOCK_I64 = 0x7e, +} WasmBlockType; + #define BUF_SIZE 1024 typedef struct LinkedBufEntry { uint8_t data[BUF_SIZE]; @@ -191,6 +223,23 @@ static void linked_buf_out_leb128(LinkedBuf *p, uint64_t v) } while (v != 0); } +static void linked_buf_out_sleb128(LinkedBuf *p, int64_t v) +{ + bool more = true; + uint8_t b; + while (more) { + b = v & 0x7f; + v >>= 7; + if (((v == 0) && ((b & 0x40) == 0)) || + ((v == -1) && ((b & 0x40) != 0))) { + more = false; + } else { + b |= 0x80; + } + linked_buf_out8(p, b); + } +} + /* * wasm code is generataed in the dynamically allocated buffer which * are managed as a linked list. @@ -209,6 +258,10 @@ static void tcg_wasm_out_leb128(TCGContext *s, uint64_t v) { linked_buf_out_leb128(&sub_buf, v); } +static void tcg_wasm_out_sleb128(TCGContext *s, int64_t v) +{ + linked_buf_out_sleb128(&sub_buf, v); +} static void tcg_wasm_out_op(TCGContext *s, WasmInsn opc) { @@ -219,6 +272,25 @@ static void tcg_wasm_out_op_idx(TCGContext *s, WasmInsn opc, uint32_t idx) tcg_wasm_out8(s, opc); tcg_wasm_out_leb128(s, idx); } +static void tcg_wasm_out_op_block(TCGContext *s, WasmInsn opc, WasmBlockType t) +{ + tcg_wasm_out8(s, opc); + tcg_wasm_out8(s, t); +} +static void tcg_wasm_out_op_const(TCGContext *s, WasmInsn opc, int64_t v) +{ + tcg_wasm_out8(s, opc); + switch (opc) { + case OPC_I32_CONST: + tcg_wasm_out_sleb128(s, (int32_t)v); + break; + case OPC_I64_CONST: + tcg_wasm_out_sleb128(s, v); + break; + default: + g_assert_not_reached(); + } +} static void tcg_wasm_out_o1_i2( TCGContext *s, WasmInsn opc, TCGReg ret, TCGReg arg1, TCGReg arg2) @@ -250,6 +322,85 @@ static void tcg_wasm_out_o1_i2_type( } } +static const struct { + WasmInsn i32; + WasmInsn i64; +} tcg_cond_to_inst[] = { + [TCG_COND_EQ] = { OPC_I32_EQ, OPC_I64_EQ }, + [TCG_COND_NE] = { OPC_I32_NE, OPC_I64_NE }, + [TCG_COND_LT] = { OPC_I32_LT_S, OPC_I64_LT_S }, + [TCG_COND_GE] = { OPC_I32_GE_S, OPC_I64_GE_S }, + [TCG_COND_LE] = { OPC_I32_LE_S, OPC_I64_LE_S }, + [TCG_COND_GT] = { OPC_I32_GT_S, OPC_I64_GT_S }, + [TCG_COND_LTU] = { OPC_I32_LT_U, OPC_I64_LT_U }, + [TCG_COND_GEU] = { OPC_I32_GE_U, OPC_I64_GE_U }, + [TCG_COND_LEU] = { OPC_I32_LE_U, OPC_I64_LE_U }, + [TCG_COND_GTU] = { OPC_I32_GT_U, OPC_I64_GT_U } +}; + +static void tcg_wasm_out_cond( + TCGContext *s, TCGType type, TCGCond cond, TCGReg arg1, TCGReg arg2) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, tcg_cond_to_inst[cond].i32); + break; + case TCG_TYPE_I64: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, tcg_cond_to_inst[cond].i64); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_wasm_out_setcond(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, TCGReg arg2, TCGCond cond) +{ + tcg_wasm_out_cond(s, type, cond, arg1, arg2); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_negsetcond(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, TCGReg arg2, TCGCond cond) +{ + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_cond(s, type, cond, arg1, arg2); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op(s, OPC_I64_SUB); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_movcond(TCGContext *s, TCGType type, TCGReg ret, + TCGReg c1, TCGReg c2, + TCGReg v1, TCGReg v2, + TCGCond cond) +{ + tcg_wasm_out_cond(s, type, cond, c1, c2); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(v1)); + tcg_wasm_out_op(s, OPC_ELSE); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(v2)); + tcg_wasm_out_op(s, OPC_END); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + tcg_out32(s, insn); +} + static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2) { @@ -277,6 +428,35 @@ static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0, tcg_out32(s, insn); } +static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, c3); + tcg_out32(s, insn); +} + +static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, TCGReg r2, + TCGReg r3, TCGReg r4, TCGCond c5) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, r3); + insn = deposit32(insn, 24, 4, r4); + insn = deposit32(insn, 28, 4, c5); + tcg_out32(s, insn); +} + static void tgen_and(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { @@ -397,6 +577,56 @@ static const TCGOutOpBinary outop_sar = { .out_rrr = tgen_sar, }; +static void tgen_setcond_tci(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_setcond32 + : INDEX_op_setcond); + tcg_out_op_rrrc(s, opc, dest, arg1, arg2, cond); +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond_tci(s, type, cond, dest, arg1, arg2); + tcg_wasm_out_setcond(s, type, dest, arg1, arg2, cond); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_setcond, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond_tci(s, type, cond, dest, arg1, arg2); + tcg_out_op_rr(s, INDEX_op_neg, dest, dest); + tcg_wasm_out_negsetcond(s, type, dest, arg1, arg2, cond); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_negsetcond, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_movcond32 + : INDEX_op_movcond); + tcg_out_op_rrrrrc(s, opc, ret, c1, c2, vt, vf, cond); + tcg_wasm_out_movcond(s, type, ret, c1, c2, vt, vf, cond); +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, r, r, r), + .out = tgen_movcond, +}; + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From f4f7a03f63e7d7e63c3449b4521b85b3ca0401e5 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 15:26:03 +0900 Subject: [PATCH 13/35] tcg/wasm: Add sextract instruction The sextract operation is genereted only when the corresponding Wasm instructions are available, as specified by TCG_TARGET_sextract_valid. Signed-off-by: Kohei Tokunaga --- tcg/wasm/tcg-target-has.h | 5 +++++ tcg/wasm/tcg-target.c.inc | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/tcg/wasm/tcg-target-has.h b/tcg/wasm/tcg-target-has.h index 7e3caf8790d23..cfb85388de5e7 100644 --- a/tcg/wasm/tcg-target-has.h +++ b/tcg/wasm/tcg-target-has.h @@ -4,4 +4,9 @@ #define TCG_TARGET_HAS_tst 0 +#define TCG_TARGET_extract_valid(type, ofs, len) 0 +#define TCG_TARGET_sextract_valid(type, ofs, len) \ + ((ofs == 0) && ((len == 8) || (len == 16) || (len == 32))) +#define TCG_TARGET_deposit_valid(type, ofs, len) 0 + #endif diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 70de3bbf83378..dd75deecd3009 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -182,7 +182,10 @@ typedef enum { OPC_I64_SHR_U = 0x88, OPC_I32_WRAP_I64 = 0xa7, + OPC_I64_EXTEND_I32_S = 0xac, OPC_I64_EXTEND_I32_U = 0xad, + OPC_I64_EXTEND8_S = 0xc2, + OPC_I64_EXTEND16_S = 0xc3, } WasmInsn; typedef enum { @@ -391,6 +394,33 @@ static void tcg_wasm_out_movcond(TCGContext *s, TCGType type, TCGReg ret, tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); } +static void tcg_wasm_out_sextract(TCGContext *s, TCGReg dest, TCGReg arg1, + int pos, int len) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + + if (pos == 0) { + switch (len) { + case 8: + tcg_wasm_out_op(s, OPC_I64_EXTEND8_S); + break; + case 16: + tcg_wasm_out_op(s, OPC_I64_EXTEND16_S); + break; + case 32: + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_S); + break; + default: + g_assert_not_reached(); + } + } else { + g_assert_not_reached(); + } + + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(dest)); +} + static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) { tcg_insn_unit_tci insn = 0; @@ -529,6 +559,18 @@ static const TCGOutOpBinary outop_mul = { .out_rrr = tgen_mul, }; +static void tcg_out_sextract(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, unsigned pos, unsigned len) +{ + tcg_out_op_rrbb(s, INDEX_op_sextract, rd, rs, pos, len); + tcg_wasm_out_sextract(s, rd, rs, pos, len); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tcg_out_sextract, +}; + static void tgen_shl(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1, TCGReg a2) { From a507277f4b40617655ba1c94963ef42c78ebbcca Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 16:38:50 +0900 Subject: [PATCH 14/35] tcg/wasm: Add load and store instructions Since Wasm load and store instructions don't support negative offsets, address calculations are performed separately before the memory access. When Emscripten's -sMEMORY64=2 is enabled, the address size must be 32bits. So this commit updates the build tools to propagate this flag to the C code via the WASM64_MEMORY64_2 macro. In this case, the emitted code casts pointers to 32bit before memory oprations. Additionally, the declaration of "--wasm64-32bit-address-limit" flag has been moved from the configure script to meson.build. So the flag name is updated to "--enable-wasm64-32bit-address-limit" to follow Meson's naming conventions. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- .gitlab-ci.d/buildtest.yml | 2 +- configure | 8 +- meson.build | 4 + meson_options.txt | 3 + scripts/meson-buildoptions.sh | 5 + tcg/wasm.c | 87 +++++++++++ tcg/wasm/tcg-target-mo.h | 20 +++ tcg/wasm/tcg-target-opc.h.inc | 2 + tcg/wasm/tcg-target.c.inc | 286 ++++++++++++++++++++++++++++++++++ 9 files changed, 411 insertions(+), 6 deletions(-) create mode 100644 tcg/wasm/tcg-target-mo.h diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index 77ae8f828160c..a97bb89714f5c 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -812,4 +812,4 @@ build-wasm64-32bit: job: wasm64-32bit-emsdk-cross-container variables: IMAGE: emsdk-wasm64-32bit-cross - CONFIGURE_ARGS: --static --cpu=wasm64 --wasm64-32bit-address-limit --disable-tools --enable-debug --enable-tcg-interpreter + CONFIGURE_ARGS: --static --cpu=wasm64 --enable-wasm64-32bit-address-limit --disable-tools --enable-debug --enable-tcg-interpreter diff --git a/configure b/configure index 8ac6cacc06b2f..77365e25093a3 100755 --- a/configure +++ b/configure @@ -243,7 +243,9 @@ for opt do ;; --without-default-features) default_feature="no" ;; - --wasm64-32bit-address-limit) wasm64_memory64="2" + --enable-wasm64-32bit-address-limit) wasm64_memory64="2" + ;; + --disable-wasm64-32bit-address-limit) wasm64_memory64="1" ;; esac done @@ -801,8 +803,6 @@ for opt do ;; --disable-rust) rust=disabled ;; - --wasm64-32bit-address-limit) - ;; # everything else has the same name in configure and meson --*) meson_option_parse "$opt" "$optarg" ;; @@ -928,8 +928,6 @@ Advanced options (experts only): --disable-containers don't use containers for cross-building --container-engine=TYPE which container engine to use [$container_engine] --gdb=GDB-path gdb to use for gdbstub tests [$gdb_bin] - --wasm64-32bit-address-limit Restrict wasm64 address space to 32-bit (default - is to use the whole 64-bit range). EOF meson_options_help cat << EOF diff --git a/meson.build b/meson.build index 44bb7ed334df4..5b048ea70f6be 100644 --- a/meson.build +++ b/meson.build @@ -393,6 +393,10 @@ elif host_os == 'windows' if compiler.get_id() == 'clang' and compiler.get_linker_id() != 'ld.lld' error('On windows, you need to use lld with clang - use msys2 clang64/clangarm64 env') endif +elif host_os == 'emscripten' + if cpu == 'wasm64' and get_option('wasm64_32bit_address_limit') + qemu_common_flags += '-DWASM64_MEMORY64_2' + endif endif # Choose instruction set (currently x86-only) diff --git a/meson_options.txt b/meson_options.txt index fff1521e580de..82771340badf6 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -386,3 +386,6 @@ option('rust', type: 'feature', value: 'disabled', description: 'Rust support') option('strict_rust_lints', type: 'boolean', value: false, description: 'Enable stricter set of Rust warnings') + +option('wasm64_32bit_address_limit', type: 'boolean', value: false, + description: 'Restrict wasm64 address space to 32-bit (default is to use the whole 64-bit range).') diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 0ebe6bc52a6b9..64845aa0b9d05 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -56,6 +56,9 @@ meson_options_help() { printf "%s\n" ' dtrace/ftrace/log/nop/simple/syslog/ust)' printf "%s\n" ' --enable-tsan enable thread sanitizer' printf "%s\n" ' --enable-ubsan enable undefined behaviour sanitizer' + printf "%s\n" ' --enable-wasm64-32bit-address-limit' + printf "%s\n" ' Restrict wasm64 address space to 32-bit (default' + printf "%s\n" ' is to use the whole 64-bit range).' printf "%s\n" ' --firmwarepath=VALUES search PATH for firmware files [share/qemu-' printf "%s\n" ' firmware]' printf "%s\n" ' --iasl=VALUE Path to ACPI disassembler' @@ -571,6 +574,8 @@ _meson_option_parse() { --disable-vte) printf "%s" -Dvte=disabled ;; --enable-vvfat) printf "%s" -Dvvfat=enabled ;; --disable-vvfat) printf "%s" -Dvvfat=disabled ;; + --enable-wasm64-32bit-address-limit) printf "%s" -Dwasm64_32bit_address_limit=true ;; + --disable-wasm64-32bit-address-limit) printf "%s" -Dwasm64_32bit_address_limit=false ;; --enable-werror) printf "%s" -Dwerror=true ;; --disable-werror) printf "%s" -Dwerror=false ;; --enable-whpx) printf "%s" -Dwhpx=enabled ;; diff --git a/tcg/wasm.c b/tcg/wasm.c index 183dad10a2aa4..fa7413fc1db10 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -20,6 +20,14 @@ #include "qemu/osdep.h" #include "tcg/tcg.h" +#include "tcg/tcg-ldst.h" + +static void tci_args_rl(uint32_t insn, const void *tb_ptr, + TCGReg *r0, void **l1) +{ + *r0 = extract32(insn, 8, 4); + *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr; +} static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1) { @@ -27,6 +35,12 @@ static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1) *r1 = extract32(insn, 12, 4); } +static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1) +{ + *r0 = extract32(insn, 8, 4); + *i1 = sextract32(insn, 12, 20); +} + static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) { *r0 = extract32(insn, 8, 4); @@ -34,6 +48,13 @@ static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) *r2 = extract32(insn, 16, 4); } +static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *i2 = sextract32(insn, 16, 16); +} + static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, uint8_t *i2, uint8_t *i3) { @@ -161,9 +182,12 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) uint32_t insn; TCGOpcode opc; TCGReg r0, r1, r2, r3, r4; + tcg_target_ulong t1; uint8_t pos, len; TCGCond condition; uint32_t tmp32; + int32_t ofs; + void *ptr; insn = *tb_ptr++; opc = extract32(insn, 0, 8); @@ -236,6 +260,69 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tmp32 = tci_compare32(regs[r1], regs[r2], condition); regs[r0] = regs[tmp32 ? r3 : r4]; break; + case INDEX_op_tci_movi: + tci_args_ri(insn, &r0, &t1); + regs[r0] = t1; + break; + case INDEX_op_tci_movl: + tci_args_rl(insn, tb_ptr, &r0, &ptr); + regs[r0] = *(tcg_target_ulong *)ptr; + break; + case INDEX_op_ld: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(tcg_target_ulong *)ptr; + break; + case INDEX_op_ld8u: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(uint8_t *)ptr; + break; + case INDEX_op_ld8s: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(int8_t *)ptr; + break; + case INDEX_op_ld16u: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(uint16_t *)ptr; + break; + case INDEX_op_ld16s: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(int16_t *)ptr; + break; + case INDEX_op_st: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + *(tcg_target_ulong *)ptr = regs[r0]; + break; + case INDEX_op_st8: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + *(uint8_t *)ptr = regs[r0]; + break; + case INDEX_op_st16: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + *(uint16_t *)ptr = regs[r0]; + break; + case INDEX_op_ld32u: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(uint32_t *)ptr; + break; + case INDEX_op_ld32s: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(int32_t *)ptr; + break; + case INDEX_op_st32: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + *(uint32_t *)ptr = regs[r0]; + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target-mo.h b/tcg/wasm/tcg-target-mo.h new file mode 100644 index 0000000000000..525f7022931ce --- /dev/null +++ b/tcg/wasm/tcg-target-mo.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Define target-specific memory model + * + * Based on tci/tcg-target-mo.h + * + * Copyright (c) 2009, 2011 Stefan Weil + */ + +#ifndef TCG_TARGET_MO_H +#define TCG_TARGET_MO_H + +/* + * We could notice __i386__ or __s390x__ and reduce the barriers depending + * on the host. But if you want performance, you use the normal backend. + * We prefer consistency across hosts on this. + */ +#define TCG_TARGET_DEFAULT_MO 0 + +#endif diff --git a/tcg/wasm/tcg-target-opc.h.inc b/tcg/wasm/tcg-target-opc.h.inc index 57274d45693a1..122b45749aae8 100644 --- a/tcg/wasm/tcg-target-opc.h.inc +++ b/tcg/wasm/tcg-target-opc.h.inc @@ -4,5 +4,7 @@ * * These opcodes for use between the tci generator and interpreter. */ +DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index dd75deecd3009..6bab20a6a9db5 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -143,6 +143,18 @@ typedef enum { OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, + OPC_I64_LOAD = 0x29, + OPC_I64_LOAD8_S = 0x30, + OPC_I64_LOAD8_U = 0x31, + OPC_I64_LOAD16_S = 0x32, + OPC_I64_LOAD16_U = 0x33, + OPC_I64_LOAD32_S = 0x34, + OPC_I64_LOAD32_U = 0x35, + OPC_I64_STORE = 0x37, + OPC_I64_STORE8 = 0x3c, + OPC_I64_STORE16 = 0x3d, + OPC_I64_STORE32 = 0x3e, + OPC_I32_CONST = 0x41, OPC_I64_CONST = 0x42, @@ -168,6 +180,7 @@ typedef enum { OPC_I64_GE_S = 0x59, OPC_I64_GE_U = 0x5a, + OPC_I32_ADD = 0x6a, OPC_I32_SHR_S = 0x75, OPC_I32_SHR_U = 0x76, @@ -421,6 +434,84 @@ static void tcg_wasm_out_sextract(TCGContext *s, TCGReg dest, TCGReg arg1, tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(dest)); } +/* + * The size of the offset field of Wasm's load/store instruction defers + * depending on the "-sMEMORY64" flag value: 64bit when "-sMEMORY64=1" + * and 32bit when "-sMEMORY64=2". + */ +#if defined(WASM64_MEMORY64_2) +typedef uint32_t wasm_ldst_offset_t; +#else +typedef uint64_t wasm_ldst_offset_t; +#endif +static void tcg_wasm_out_op_ldst( + TCGContext *s, WasmInsn instr, uint32_t a, wasm_ldst_offset_t o) +{ + tcg_wasm_out_op(s, instr); + tcg_wasm_out_leb128(s, a); + tcg_wasm_out_leb128(s, (wasm_ldst_offset_t)o); +} + +/* + * tcg_wasm_out_norm_ptr emits instructions to adjust the 64bit pointer value + * at the top of the stack to satisfy Wasm's memory addressing requirements. + */ +static intptr_t tcg_wasm_out_norm_ptr(TCGContext *s, intptr_t offset) +{ +#if defined(WASM64_MEMORY64_2) + /* + * If Emscripten's "-sMEMORY64=2" is enabled, + * the address size is limited to 32bit. + */ + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); +#endif + /* + * Wasm's load/store instructions don't support negative value in + * the offset field. So this function calculates the target address + * using the base and the offset and makes the offset field 0. + */ + if (offset < 0) { +#if defined(WASM64_MEMORY64_2) + tcg_wasm_out_op_const(s, OPC_I32_CONST, offset); + tcg_wasm_out_op(s, OPC_I32_ADD); +#else + tcg_wasm_out_op_const(s, OPC_I64_CONST, offset); + tcg_wasm_out_op(s, OPC_I64_ADD); +#endif + offset = 0; + } + return offset; +} + +static void tcg_wasm_out_ld( + TCGContext *s, WasmInsn opc, TCGReg val, TCGReg base, intptr_t offset) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(base)); + offset = tcg_wasm_out_norm_ptr(s, offset); + tcg_wasm_out_op_ldst(s, opc, 0, offset); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(val)); +} + +static void tcg_wasm_out_st( + TCGContext *s, WasmInsn opc, TCGReg val, TCGReg base, intptr_t offset) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(base)); + offset = tcg_wasm_out_norm_ptr(s, offset); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(val)); + tcg_wasm_out_op_ldst(s, opc, 0, offset); +} + +static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) +{ + tcg_insn_unit_tci insn = 0; + + tcg_debug_assert(i1 == sextract32(i1, 0, 20)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 20, i1); + tcg_out32(s, insn); +} + static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) { tcg_insn_unit_tci insn = 0; @@ -443,6 +534,19 @@ static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, tcg_out32(s, insn); } +static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, intptr_t i2) +{ + tcg_insn_unit_tci insn = 0; + + tcg_debug_assert(i2 == sextract32(i2, 0, 16)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 16, i2); + tcg_out32(s, insn); +} + static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, uint8_t b2, uint8_t b3) { @@ -669,6 +773,188 @@ static const TCGOutOpMovcond outop_movcond = { .out = tgen_movcond, }; +static void tcg_tci_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + switch (type) { + case TCG_TYPE_I32: + arg = (int32_t)arg; + /* fall through */ + case TCG_TYPE_I64: + break; + default: + g_assert_not_reached(); + } + + if (arg == sextract32(arg, 0, 20)) { + tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg); + } else { + tcg_insn_unit_tci insn = 0; + + new_pool_label(s, arg, 20, s->code_ptr, 0); + insn = deposit32(insn, 0, 8, INDEX_op_tci_movl); + insn = deposit32(insn, 8, 4, ret); + tcg_out32(s, insn); + } +} + +static void stack_bounds_check(TCGReg base, intptr_t offset) +{ + if (base == TCG_REG_CALL_STACK) { + tcg_debug_assert(offset >= 0); + tcg_debug_assert(offset < (TCG_STATIC_CALL_ARGS_SIZE + + TCG_STATIC_FRAME_SIZE)); + } +} + +static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, + TCGReg base, intptr_t offset) +{ + stack_bounds_check(base, offset); + if (offset != sextract32(offset, 0, 16)) { + tcg_tci_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset); + tcg_out_op_rrr(s, INDEX_op_add, TCG_REG_TMP, TCG_REG_TMP, base); + base = TCG_REG_TMP; + offset = 0; + } + tcg_out_op_rrs(s, op, val, base, offset); +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base, + intptr_t offset) +{ + TCGOpcode op = INDEX_op_ld; + WasmInsn wasm_opc = OPC_I64_LOAD; + + if (type == TCG_TYPE_I32) { + op = INDEX_op_ld32u; + wasm_opc = OPC_I64_LOAD32_U; + } + tcg_out_ldst(s, op, val, base, offset); + tcg_wasm_out_ld(s, wasm_opc, val, base, offset); +} + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld8u, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD8_U, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld8s, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD8_S, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld16u, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD16_U, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld16s, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD16_S, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld32u, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD32_U, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld32s, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD32_S, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + +static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_st8, data, base, offset); + tcg_wasm_out_st(s, OPC_I64_STORE8, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_st16, data, base, offset); + tcg_wasm_out_st(s, OPC_I64_STORE16, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base, + intptr_t offset) +{ + TCGOpcode op = INDEX_op_st; + WasmInsn wasm_opc = OPC_I64_STORE; + + if (type == TCG_TYPE_I32) { + op = INDEX_op_st32; + wasm_opc = OPC_I64_STORE32; + } + tcg_out_ldst(s, op, val, base, offset); + tcg_wasm_out_st(s, wasm_opc, val, base, offset); +} + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + return false; +} + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From 13eb7d5403cdad50342504eb76aef9bc0f8e0996 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 16:59:09 +0900 Subject: [PATCH 15/35] tcg/wasm: Add mov/movi instructions Theese operations are implemented using the corresponding instructions in Wasm. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 4 ++++ tcg/wasm/tcg-target.c.inc | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index fa7413fc1db10..2c8a7b814e2ca 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -260,6 +260,10 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tmp32 = tci_compare32(regs[r1], regs[r2], condition); regs[r0] = regs[tmp32 ? r3 : r4]; break; + case INDEX_op_mov: + tci_args_rr(insn, &r0, &r1); + regs[r0] = regs[r1]; + break; case INDEX_op_tci_movi: tci_args_ri(insn, &r0, &t1); regs[r0] = t1; diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 6bab20a6a9db5..def1f5cd5eae9 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -501,6 +501,28 @@ static void tcg_wasm_out_st( tcg_wasm_out_op_ldst(s, opc, 0, offset); } +static void tcg_wasm_out_mov(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_const(s, OPC_I64_CONST, (int32_t)arg); + break; + case TCG_TYPE_I64: + tcg_wasm_out_op_const(s, OPC_I64_CONST, arg); + break; + default: + g_assert_not_reached(); + } + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) { tcg_insn_unit_tci insn = 0; @@ -798,6 +820,20 @@ static void tcg_tci_out_movi(TCGContext *s, TCGType type, } } +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + tcg_out_op_rr(s, INDEX_op_mov, ret, arg); + tcg_wasm_out_mov(s, ret, arg); + return true; +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + tcg_tci_out_movi(s, type, ret, arg); + tcg_wasm_out_movi(s, type, ret, arg); +} + static void stack_bounds_check(TCGReg base, intptr_t offset) { if (base == TCG_REG_CALL_STACK) { From 004734452e90077ea9f98ab7b093ce16fc1f5bae Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 17:16:02 +0900 Subject: [PATCH 16/35] tcg/wasm: Add ext instructions The ext operations are implemented using the corresponding instructions in Wasm. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- tcg/wasm/tcg-target-has.h | 1 + tcg/wasm/tcg-target.c.inc | 79 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) diff --git a/tcg/wasm/tcg-target-has.h b/tcg/wasm/tcg-target-has.h index cfb85388de5e7..a29ceb2ea5154 100644 --- a/tcg/wasm/tcg-target-has.h +++ b/tcg/wasm/tcg-target-has.h @@ -3,6 +3,7 @@ #define TCG_TARGET_HAS_H #define TCG_TARGET_HAS_tst 0 +#define TCG_TARGET_HAS_extr_i64_i32 0 #define TCG_TARGET_extract_valid(type, ofs, len) 0 #define TCG_TARGET_sextract_valid(type, ofs, len) \ diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index def1f5cd5eae9..e41b3a0c27afc 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -434,6 +434,22 @@ static void tcg_wasm_out_sextract(TCGContext *s, TCGReg dest, TCGReg arg1, tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(dest)); } +static void tcg_wasm_out_extract(TCGContext *s, TCGReg dest, TCGReg arg1, + int pos, int len) +{ + int64_t mask = ~0ULL >> (64 - len); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + if (pos > 0) { + tcg_wasm_out_op_const(s, OPC_I64_CONST, pos); + tcg_wasm_out_op(s, OPC_I64_SHR_U); + } + if ((pos + len) < 64) { + tcg_wasm_out_op_const(s, OPC_I64_CONST, mask); + tcg_wasm_out_op(s, OPC_I64_AND); + } + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(dest)); +} + /* * The size of the offset field of Wasm's load/store instruction defers * depending on the "-sMEMORY64" flag value: 64bit when "-sMEMORY64=1" @@ -991,6 +1007,69 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } +static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) +{ + tcg_out_sextract(s, type, rd, rs, 0, 8); + tcg_wasm_out_sextract(s, rd, rs, 0, 8); +} + +static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, rd, rs, 0, 8); + tcg_wasm_out_extract(s, rd, rs, 0, 8); +} + +static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) +{ + tcg_out_sextract(s, type, rd, rs, 0, 16); + tcg_wasm_out_sextract(s, rd, rs, 0, 16); +} + +static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, rd, rs, 0, 16); + tcg_wasm_out_extract(s, rd, rs, 0, 16); +} + +static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_sextract(s, TCG_TYPE_I64, rd, rs, 0, 32); + tcg_wasm_out_sextract(s, rd, rs, 0, 32); +} + +static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, rd, rs, 0, 32); + tcg_wasm_out_extract(s, rd, rs, 0, 32); +} + +static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_ext32s(s, rd, rs); +} + +static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_ext32u(s, rd, rs); +} + +static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_op_rr(s, INDEX_op_mov, rd, rs); + tcg_wasm_out_extract(s, rd, rs, 0, 32); +} + +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, a0, a1, 32, 32); + tcg_wasm_out_extract(s, a0, a1, 32, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From 32eaa622a5acc240179be619c09319bbb48f3f14 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 17:58:49 +0900 Subject: [PATCH 17/35] tcg/wasm: Add div/rem instructions The div and rem operations are implemented using the corresponding instructions in Wasm. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 32 +++++++++++++++++ tcg/wasm/tcg-target-opc.h.inc | 4 +++ tcg/wasm/tcg-target.c.inc | 68 +++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index 2c8a7b814e2ca..8c8dcb81c72ac 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -327,6 +327,38 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) ptr = (void *)(regs[r1] + ofs); *(uint32_t *)ptr = regs[r0]; break; + case INDEX_op_divs: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2]; + break; + case INDEX_op_divu: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2]; + break; + case INDEX_op_rems: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2]; + break; + case INDEX_op_remu: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; + break; + case INDEX_op_tci_divs32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2]; + break; + case INDEX_op_tci_divu32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2]; + break; + case INDEX_op_tci_rems32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2]; + break; + case INDEX_op_tci_remu32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target-opc.h.inc b/tcg/wasm/tcg-target-opc.h.inc index 122b45749aae8..5ed8c67535384 100644 --- a/tcg/wasm/tcg-target-opc.h.inc +++ b/tcg/wasm/tcg-target-opc.h.inc @@ -8,3 +8,7 @@ DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index e41b3a0c27afc..38459a60d6158 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -181,12 +181,20 @@ typedef enum { OPC_I64_GE_U = 0x5a, OPC_I32_ADD = 0x6a, + OPC_I32_DIV_S = 0x6d, + OPC_I32_DIV_U = 0x6e, + OPC_I32_REM_S = 0x6f, + OPC_I32_REM_U = 0x70, OPC_I32_SHR_S = 0x75, OPC_I32_SHR_U = 0x76, OPC_I64_ADD = 0x7c, OPC_I64_SUB = 0x7d, OPC_I64_MUL = 0x7e, + OPC_I64_DIV_S = 0x7f, + OPC_I64_DIV_U = 0x80, + OPC_I64_REM_S = 0x81, + OPC_I64_REM_U = 0x82, OPC_I64_AND = 0x83, OPC_I64_OR = 0x84, OPC_I64_XOR = 0x85, @@ -1070,6 +1078,66 @@ static const TCGOutOpUnary outop_extrh_i64_i32 = { .out_rr = tgen_extrh_i64_i32, }; +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_divs32 + : INDEX_op_divs); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_DIV_S, OPC_I64_DIV_S, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_divu32 + : INDEX_op_divu); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_DIV_U, OPC_I64_DIV_U, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rems32 + : INDEX_op_rems); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_REM_S, OPC_I64_REM_S, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_remu32 + : INDEX_op_remu); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_REM_U, OPC_I64_REM_U, a0, a1, a2); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From 2cb39a5cb4df42d6a7b3bdc3d6449fb9a60e861f Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Tue, 26 Aug 2025 12:17:48 +0900 Subject: [PATCH 18/35] tcg/wasm: Add neg/ctpop instructions The neg/ctpop operations are implemented using the corresponding instructions in Wasm. TCI instructions are also generated in the same way as the original TCI backend. The Wasm backend implements only TCG_TARGET_REG_BITS=64 so the ctpop instruction is generated only for 64bit operations, as declared in cset_ctpop. Therefore, this commit adds only the 64bit version of ctpop implementation. Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 4 ++++ tcg/wasm/tcg-target.c.inc | 44 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index 8c8dcb81c72ac..a5e72d8fe5829 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -359,6 +359,10 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; break; + case INDEX_op_ctpop: + tci_args_rr(insn, &r0, &r1); + regs[r0] = ctpop64(regs[r1]); + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 38459a60d6158..27f3a7414b667 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -188,6 +188,7 @@ typedef enum { OPC_I32_SHR_S = 0x75, OPC_I32_SHR_U = 0x76, + OPC_I64_POPCNT = 0x7b, OPC_I64_ADD = 0x7c, OPC_I64_SUB = 0x7d, OPC_I64_MUL = 0x7e, @@ -547,6 +548,21 @@ static void tcg_wasm_out_movi(TCGContext *s, TCGType type, tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); } +static void tcg_wasm_out_neg(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg)); + tcg_wasm_out_op(s, OPC_I64_SUB); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_ctpop64(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg)); + tcg_wasm_out_op(s, OPC_I64_POPCNT); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) { tcg_insn_unit_tci insn = 0; @@ -1138,6 +1154,34 @@ static const TCGOutOpBinary outop_remu = { .out_rrr = tgen_remu, }; +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_neg, a0, a1); + tcg_wasm_out_neg(s, a0, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_ctpop, a0, a1); + tcg_wasm_out_ctpop64(s, a0, a1); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From ad0fb42234f6a5569489cfb5c4d04361575e4905 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 20:30:05 +0900 Subject: [PATCH 19/35] tcg/wasm: Add rot/clz/ctz instructions The rot/clz/ctz operations are implemented using the corresponding instructions in Wasm. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 34 +++++++++++ tcg/wasm/tcg-target-opc.h.inc | 4 ++ tcg/wasm/tcg-target.c.inc | 107 ++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index a5e72d8fe5829..2688ded58a620 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -363,6 +363,40 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tci_args_rr(insn, &r0, &r1); regs[r0] = ctpop64(regs[r1]); break; + case INDEX_op_clz: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; + break; + case INDEX_op_ctz: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; + break; + case INDEX_op_tci_clz32: + tci_args_rrr(insn, &r0, &r1, &r2); + tmp32 = regs[r1]; + regs[r0] = tmp32 ? clz32(tmp32) : regs[r2]; + break; + case INDEX_op_tci_ctz32: + tci_args_rrr(insn, &r0, &r1, &r2); + tmp32 = regs[r1]; + regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; + break; + case INDEX_op_rotl: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = rol64(regs[r1], regs[r2] & 63); + break; + case INDEX_op_rotr: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ror64(regs[r1], regs[r2] & 63); + break; + case INDEX_op_tci_rotl32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = rol32(regs[r1], regs[r2] & 31); + break; + case INDEX_op_tci_rotr32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ror32(regs[r1], regs[r2] & 31); + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target-opc.h.inc b/tcg/wasm/tcg-target-opc.h.inc index 5ed8c67535384..092a5086ecde4 100644 --- a/tcg/wasm/tcg-target-opc.h.inc +++ b/tcg/wasm/tcg-target-opc.h.inc @@ -12,3 +12,7 @@ DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_clz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_ctz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 27f3a7414b667..d547e7bf09f46 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -158,6 +158,7 @@ typedef enum { OPC_I32_CONST = 0x41, OPC_I64_CONST = 0x42, + OPC_I32_EQZ = 0x45, OPC_I32_EQ = 0x46, OPC_I32_NE = 0x47, OPC_I32_LT_S = 0x48, @@ -169,6 +170,7 @@ typedef enum { OPC_I32_GE_S = 0x4e, OPC_I32_GE_U = 0x4f, + OPC_I64_EQZ = 0x50, OPC_I64_EQ = 0x51, OPC_I64_NE = 0x52, OPC_I64_LT_S = 0x53, @@ -180,6 +182,8 @@ typedef enum { OPC_I64_GE_S = 0x59, OPC_I64_GE_U = 0x5a, + OPC_I32_CLZ = 0x67, + OPC_I32_CTZ = 0x68, OPC_I32_ADD = 0x6a, OPC_I32_DIV_S = 0x6d, OPC_I32_DIV_U = 0x6e, @@ -187,7 +191,11 @@ typedef enum { OPC_I32_REM_U = 0x70, OPC_I32_SHR_S = 0x75, OPC_I32_SHR_U = 0x76, + OPC_I32_ROTL = 0x77, + OPC_I32_ROTR = 0x78, + OPC_I64_CLZ = 0x79, + OPC_I64_CTZ = 0x7a, OPC_I64_POPCNT = 0x7b, OPC_I64_ADD = 0x7c, OPC_I64_SUB = 0x7d, @@ -202,6 +210,8 @@ typedef enum { OPC_I64_SHL = 0x86, OPC_I64_SHR_S = 0x87, OPC_I64_SHR_U = 0x88, + OPC_I64_ROTL = 0x89, + OPC_I64_ROTR = 0x8a, OPC_I32_WRAP_I64 = 0xa7, OPC_I64_EXTEND_I32_S = 0xac, @@ -212,6 +222,7 @@ typedef enum { typedef enum { BLOCK_I64 = 0x7e, + BLOCK_I32 = 0x7f, } WasmBlockType; #define BUF_SIZE 1024 @@ -563,6 +574,42 @@ static void tcg_wasm_out_ctpop64(TCGContext *s, TCGReg ret, TCGReg arg) tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); } +static void tcg_wasm_out_cz( + TCGContext *s, TCGType type, WasmInsn opc32, WasmInsn opc64, + TCGReg ret, TCGReg arg1, TCGReg arg2) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I32); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, OPC_ELSE); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, opc32); + tcg_wasm_out_op(s, OPC_END); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); + break; + case TCG_TYPE_I64: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I64_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_ELSE); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, opc64); + tcg_wasm_out_op(s, OPC_END); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); + break; + default: + g_assert_not_reached(); + } +} + static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) { tcg_insn_unit_tci insn = 0; @@ -1182,6 +1229,66 @@ static const TCGOutOpUnary outop_ctpop = { .out_rr = tgen_ctpop, }; +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotl32 + : INDEX_op_rotl); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_ROTL, OPC_I64_ROTL, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotl, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotr32 + : INDEX_op_rotr); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_ROTR, OPC_I64_ROTR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotr, +}; + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_clz32 + : INDEX_op_clz); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_cz(s, type, OPC_I32_CLZ, OPC_I64_CLZ, a0, a1, a2); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_clz, +}; + +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_ctz32 + : INDEX_op_ctz); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_cz(s, type, OPC_I32_CTZ, OPC_I64_CTZ, a0, a1, a2); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_ctz, +}; + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From 50b395e34f495770da40de232b67adaf4a47cdf7 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 21:09:27 +0900 Subject: [PATCH 20/35] tcg/wasm: Add br/brcond instructions Wasm does not support direct jumps to arbitrary code addresses, so br and brcond are implemented using Wasm's control flow instructions. As illustrated in the pseudo-code below, each TB wraps Wasm instructions inside a large loop. Each set of codes separated by TCG labels is placed inside an "if" block. Br is implemented by breaking out of the current block and entering the target block: loop if ... code after the first label end if ... code after the second label end ... end Each block is assigned an unique integer ID. The br implementation sets the destination block's ID in BLOCK_IDX Wasm variable and breaks from the current if block. As control flow continues, each if block checks whether the BLOCK_IDX matches its own ID. If so, execution resumes within that block. The tcg_out_tb_start function generates the start of the global loop and the first if block. To properly close these blocks, this commit also introduces a new callback tcg_out_tb_end which emits the "end" instructions for the final if block and the loop. Another new callback tcg_out_label_cb is used to emit block boundaries, specifically the end of the previous block and the if of the next block, at label positions. It also records the mapping between label IDs and block IDs in a LabelInfo list. Since the block ID for a label might not be known when a br instruction is generated, a placeholder is emitted instead. These placeholders are tracked in a BlockPlaceholder list and resolved later using LabelInfo. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- tcg/aarch64/tcg-target.c.inc | 11 ++ tcg/arm/tcg-target.c.inc | 11 ++ tcg/i386/tcg-target.c.inc | 11 ++ tcg/loongarch64/tcg-target.c.inc | 11 ++ tcg/mips/tcg-target.c.inc | 11 ++ tcg/ppc/tcg-target.c.inc | 11 ++ tcg/riscv/tcg-target.c.inc | 11 ++ tcg/s390x/tcg-target.c.inc | 11 ++ tcg/sparc64/tcg-target.c.inc | 11 ++ tcg/tcg.c | 7 ++ tcg/tci/tcg-target.c.inc | 11 ++ tcg/wasm.c | 16 +++ tcg/wasm/tcg-target.c.inc | 196 +++++++++++++++++++++++++++++++ 13 files changed, 329 insertions(+) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 3b088b7bd9727..9323161607885 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -3514,6 +3514,17 @@ static void tcg_out_tb_start(TCGContext *s) tcg_out_bti(s, BTI_J); } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 836894b16ade7..bd8428491aa99 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -3441,6 +3441,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + typedef struct { DebugFrameHeader h; uint8_t fde_def_cfa[4]; diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 088c6c9264b01..cf8b50e162b07 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -4759,6 +4759,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { memset(p, 0x90, count); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 10c69211ac5ba..75f6a97b2b39d 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -2658,6 +2658,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { for (int i = 0; i < count; ++i) { diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 400eafbab4b6f..d1241912ac3d7 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2745,6 +2745,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_target_init(TCGContext *s) { tcg_target_detect_isa(); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index b8b23d44d5e2d..20cc2594b8392 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2859,6 +2859,17 @@ static void tcg_out_tb_start(TCGContext *s) } } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 31b9f7d87a046..63e7438291e12 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2983,6 +2983,17 @@ static void tcg_out_tb_start(TCGContext *s) init_setting_vtype(s); } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static bool vtype_check(unsigned vtype) { unsigned long tmp; diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 84a9e73a46e90..457e568d30f49 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3830,6 +3830,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { memset(p, 0x07, count * sizeof(tcg_insn_unit)); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 5e5c3f1cda5e3..ae695b115b1aa 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1017,6 +1017,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; diff --git a/tcg/tcg.c b/tcg/tcg.c index afac55a203ab0..7d3e7f8cb1dfb 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -116,6 +116,7 @@ static void tcg_register_jit_int(const void *buf, size_t size, /* Forward declarations for functions declared and used in tcg-target.c.inc. */ static void tcg_out_tb_start(TCGContext *s); +static int tcg_out_tb_end(TCGContext *s); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2); static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); @@ -187,6 +188,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); static bool tcg_target_const_match(int64_t val, int ct, TCGType type, TCGCond cond, int vece); +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l); #ifndef CONFIG_USER_ONLY #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) @@ -361,6 +363,7 @@ static void tcg_out_label(TCGContext *s, TCGLabel *l) tcg_debug_assert(!l->has_value); l->has_value = 1; l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); + tcg_out_label_cb(s, l); } TCGLabel *gen_new_label(void) @@ -7047,6 +7050,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) if (!tcg_resolve_relocs(s)) { return -2; } + i = tcg_out_tb_end(s); + if (i < 0) { + return i; + } #ifndef CONFIG_TCG_INTERPRETER /* flush instruction cache */ diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 35c66a48369da..d99d06c1da7e8 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -1301,6 +1301,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + bool tcg_target_has_memory_bswap(MemOp memop) { return true; diff --git a/tcg/wasm.c b/tcg/wasm.c index 2688ded58a620..ca67436192e8e 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -22,6 +22,12 @@ #include "tcg/tcg.h" #include "tcg/tcg-ldst.h" +static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0) +{ + int diff = sextract32(insn, 12, 20); + *l0 = diff ? (void *)tb_ptr + diff : NULL; +} + static void tci_args_rl(uint32_t insn, const void *tb_ptr, TCGReg *r0, void **l1) { @@ -397,6 +403,16 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = ror32(regs[r1], regs[r2] & 31); break; + case INDEX_op_br: + tci_args_l(insn, tb_ptr, &ptr); + tb_ptr = ptr; + continue; + case INDEX_op_brcond: + tci_args_rl(insn, tb_ptr, &r0, &ptr); + if (regs[r0]) { + tb_ptr = ptr; + } + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index d547e7bf09f46..ca10f97ed8612 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -136,10 +136,16 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { #define REG_IDX(r) tcg_target_reg_index[r] +/* Global variable used for storing the current block index */ +#define BLOCK_IDX 16 + typedef enum { + OPC_UNREACHABLE = 0x00, + OPC_LOOP = 0x03, OPC_IF = 0x04, OPC_ELSE = 0x05, OPC_END = 0x0b, + OPC_BR = 0x0c, OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, @@ -221,6 +227,7 @@ typedef enum { } WasmInsn; typedef enum { + BLOCK_NORET = 0x40, BLOCK_I64 = 0x7e, BLOCK_I32 = 0x7f, } WasmBlockType; @@ -276,6 +283,17 @@ static void linked_buf_out_sleb128(LinkedBuf *p, int64_t v) } } +static int linked_buf_len(LinkedBuf *p) +{ + int total = 0; + LinkedBufEntry *e; + + QSIMPLEQ_FOREACH(e, p, entry) { + total += e->size; + } + return total; +} + /* * wasm code is generataed in the dynamically allocated buffer which * are managed as a linked list. @@ -286,6 +304,10 @@ static void init_sub_buf(void) { QSIMPLEQ_INIT(&sub_buf); } +static int sub_buf_len(void) +{ + return linked_buf_len(&sub_buf); +} static void tcg_wasm_out8(TCGContext *s, uint8_t v) { linked_buf_out8(&sub_buf, v); @@ -610,6 +632,134 @@ static void tcg_wasm_out_cz( } } +typedef struct LabelInfo { + int label; + int block; + QSIMPLEQ_ENTRY(LabelInfo) entry; +} LabelInfo; + +static __thread QSIMPLEQ_HEAD(, LabelInfo) label_info; + +static void init_label_info(void) +{ + QSIMPLEQ_INIT(&label_info); +} + +static void add_label(int label, int block) +{ + LabelInfo *e = tcg_malloc(sizeof(LabelInfo)); + e->label = label; + e->block = block; + QSIMPLEQ_INSERT_TAIL(&label_info, e, entry); +} + +typedef struct BlockPlaceholder { + int label; + int pos; + QSIMPLEQ_ENTRY(BlockPlaceholder) entry; +} BlockPlaceholder; + +static __thread QSIMPLEQ_HEAD(, BlockPlaceholder) block_placeholder; +static __thread int64_t cur_block_idx; + +static void init_blocks(void) +{ + QSIMPLEQ_INIT(&block_placeholder); + cur_block_idx = 0; +} + +static void add_block_placeholder(int label, int pos) +{ + BlockPlaceholder *e = tcg_malloc(sizeof(BlockPlaceholder)); + e->label = label; + e->pos = pos; + QSIMPLEQ_INSERT_TAIL(&block_placeholder, e, entry); +} + +static int get_block_of_label(int label) +{ + LabelInfo *e; + QSIMPLEQ_FOREACH(e, &label_info, entry) { + if (e->label == label) { + return e->block; + } + } + return -1; +} + +static void tcg_wasm_out_new_block(TCGContext *s) +{ + tcg_wasm_out_op(s, OPC_END); /* close this block */ + + /* next block */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, BLOCK_IDX); + tcg_wasm_out_op_const(s, OPC_I64_CONST, ++cur_block_idx); + tcg_wasm_out_op(s, OPC_I64_LE_U); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + add_label(l->id, cur_block_idx + 1); + tcg_wasm_out_new_block(s); +} + +static void tcg_wasm_out_br_to_label(TCGContext *s, TCGLabel *l, bool br_if) +{ + int toploop_depth = 1; + if (br_if) { + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + toploop_depth++; + } + tcg_wasm_out8(s, OPC_I64_CONST); + + add_block_placeholder(l->id, sub_buf_len()); + + tcg_wasm_out8(s, 0x80); /* placeholder for the target block idx */ + tcg_wasm_out8(s, 0x80); + tcg_wasm_out8(s, 0x80); + tcg_wasm_out8(s, 0x80); + tcg_wasm_out8(s, 0x00); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + if (get_block_of_label(l->id) != -1) { + /* + * The label is placed before this br, branch to the top of loop + */ + tcg_wasm_out_op_idx(s, OPC_BR, toploop_depth); + } else { + /* + * The label will be generated after this br, + * branch to the end of the current block + */ + tcg_wasm_out_op_idx(s, OPC_BR, toploop_depth - 1); + } + if (br_if) { + tcg_wasm_out_op(s, OPC_END); + } +} + +static void tcg_wasm_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_wasm_out_br_to_label(s, l, false); +} + +static void tcg_wasm_out_brcond(TCGContext *s, TCGType type, + TCGReg arg1, TCGReg arg2, + TCGCond cond, TCGLabel *l) +{ + tcg_wasm_out_cond(s, type, cond, arg1, arg2); + tcg_wasm_out_br_to_label(s, l, true); +} + +static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) +{ + tcg_insn_unit_tci insn = 0; + + tcg_out_reloc(s, s->code_ptr, 20, l0, 0); + insn = deposit32(insn, 0, 8, op); + tcg_out32(s, insn); +} + static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) { tcg_insn_unit_tci insn = 0; @@ -621,6 +771,16 @@ static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) tcg_out32(s, insn); } +static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1) +{ + tcg_insn_unit_tci insn = 0; + + tcg_out_reloc(s, s->code_ptr, 20, l1, 0); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + tcg_out32(s, insn); +} + static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) { tcg_insn_unit_tci insn = 0; @@ -1289,7 +1449,43 @@ static const TCGOutOpBinary outop_ctz = { .out_rrr = tgen_ctz, }; +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg0, TCGReg arg1, TCGLabel *l) +{ + tgen_setcond_tci(s, type, cond, TCG_REG_TMP, arg0, arg1); + tcg_out_op_rl(s, INDEX_op_brcond, TCG_REG_TMP, l); + tcg_wasm_out_brcond(s, type, arg0, arg1, cond, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, r), + .out_rr = tgen_brcond, +}; + +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_op_l(s, INDEX_op_br, l); + tcg_wasm_out_br(s, l); +} + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); + init_blocks(); + init_label_info(); + + tcg_wasm_out_op_block(s, OPC_LOOP, BLOCK_NORET); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, BLOCK_IDX); + tcg_wasm_out_op(s, OPC_I64_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); +} + +static int tcg_out_tb_end(TCGContext *s) +{ + tcg_wasm_out_op(s, OPC_END); /* end if */ + tcg_wasm_out_op(s, OPC_END); /* end loop */ + tcg_wasm_out_op(s, OPC_UNREACHABLE); + tcg_wasm_out_op(s, OPC_END); /* end func */ + + return 0; } From f15853e3bc8c5e6b3a622de90dd14261cb9c82be Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Mon, 25 Aug 2025 16:09:00 +0900 Subject: [PATCH 21/35] tcg/wasm: Add exit_tb/goto_tb/goto_ptr instructions In the Wasm backend, each TB is compiled to a separeted Wasm module. Control transfer between TBs (i.e. from one Wasm module to another) is handled by the caller of the module. The goto_tb and goto_ptr operations are implemented by returning control to the caller using the return instruction. The destination TB's pointer is passed to the caller via a shared WasmContext structure which is accessible from both the Wasm module and the caller. This WasmContext must be provided to the module as an argument. If the destination TB is the current TB itself, there is no need to return control to the caller. Instead, execution can jump directly to the top of the loop within the TB. The exit_tb operation sets the pointer in WasmContext to 0, indicating that there is no destination TB. TCI instructions are also generated in the same way as the original TCI backend. Signed-off-by: Kohei Tokunaga --- MAINTAINERS | 1 + tcg/wasm.c | 20 ++++++ tcg/wasm.h | 15 +++++ tcg/wasm/tcg-target.c.inc | 136 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 172 insertions(+) create mode 100644 tcg/wasm.h diff --git a/MAINTAINERS b/MAINTAINERS index 217bf2066c2d0..d528b9ec903b7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4004,6 +4004,7 @@ M: Kohei Tokunaga S: Maintained F: tcg/wasm/ F: tcg/wasm.c +F: tcg/wasm.h Block drivers ------------- diff --git a/tcg/wasm.c b/tcg/wasm.c index ca67436192e8e..c54c5c5b2c128 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -28,6 +28,11 @@ static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0) *l0 = diff ? (void *)tb_ptr + diff : NULL; } +static void tci_args_r(uint32_t insn, TCGReg *r0) +{ + *r0 = extract32(insn, 8, 4); +} + static void tci_args_rl(uint32_t insn, const void *tb_ptr, TCGReg *r0, void **l1) { @@ -413,6 +418,21 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tb_ptr = ptr; } break; + case INDEX_op_exit_tb: + tci_args_l(insn, tb_ptr, &ptr); + return (uintptr_t)ptr; + case INDEX_op_goto_tb: + tci_args_l(insn, tb_ptr, &ptr); + tb_ptr = *(void **)ptr; + break; + case INDEX_op_goto_ptr: + tci_args_r(insn, &r0); + ptr = (void *)regs[r0]; + if (!ptr) { + return 0; + } + tb_ptr = ptr; + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm.h b/tcg/wasm.h new file mode 100644 index 0000000000000..9da38e4d0ed96 --- /dev/null +++ b/tcg/wasm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef TCG_WASM_H +#define TCG_WASM_H + +/* + * WasmContext is a data shared among QEMU and wasm modules. + */ +struct WasmContext { + /* + * Pointer to the TB to be executed. + */ + void *tb_ptr; +}; + +#endif diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index ca10f97ed8612..c077c8ad7c115 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -26,6 +26,7 @@ */ #include "qemu/queue.h" +#include "../wasm.h" typedef uint32_t tcg_insn_unit_tci; @@ -139,6 +140,9 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { /* Global variable used for storing the current block index */ #define BLOCK_IDX 16 +/* Local variable pointing to WasmContext */ +#define CTX_IDX 0 + typedef enum { OPC_UNREACHABLE = 0x00, OPC_LOOP = 0x03, @@ -146,6 +150,8 @@ typedef enum { OPC_ELSE = 0x05, OPC_END = 0x0b, OPC_BR = 0x0c, + OPC_RETURN = 0x0f, + OPC_LOCAL_GET = 0x20, OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, @@ -751,6 +757,81 @@ static void tcg_wasm_out_brcond(TCGContext *s, TCGType type, tcg_wasm_out_br_to_label(s, l, true); } +#define CTX_OFFSET(f) offsetof(struct WasmContext, f) + +static intptr_t tcg_wasm_out_get_ctx(TCGContext *s, intptr_t off) +{ + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, CTX_IDX); + return tcg_wasm_out_norm_ptr(s, off); +} + +static void tcg_wasm_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + intptr_t ofs; + + /* Store ctx.tb_ptr = 0 which indicates there is no next TB */ + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tb_ptr)); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + + /* Return the control to the caller */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, arg); + tcg_wasm_out_op(s, OPC_RETURN); +} + +static void tcg_wasm_out_goto(TCGContext *s, TCGReg target, int block_depth) +{ + intptr_t ofs; + + /* Check if the target TB is the same as the current TB */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(target)); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tb_ptr)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op(s, OPC_I64_EQ); + + /* + * If the target TB is the same as the current TB, no need to return to the + * caller. Just branch to the top of the current TB. + */ + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_op_idx(s, OPC_BR, block_depth); /* br to the top of loop */ + tcg_wasm_out_op(s, OPC_END); + + /* Store the target TB to ctx.tb_ptr and return */ + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tb_ptr)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(target)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op(s, OPC_RETURN); +} + +static void tcg_wasm_out_goto_ptr(TCGContext *s, TCGReg arg) +{ + tcg_wasm_out_goto(s, arg, 2); +} + +static void tcg_wasm_out_goto_tb( + TCGContext *s, int which, uintptr_t cur_reset_ptr) +{ + intptr_t ofs; + + /* Set the target TB in the tmp variable. */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, get_jmp_target_addr(s, which)); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_TMP)); + + /* Goto the target TB if it's registered. */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_REG_TMP)); + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_reset_ptr); + tcg_wasm_out_op(s, OPC_I64_NE); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_goto(s, TCG_REG_TMP, 3); + tcg_wasm_out_op(s, OPC_END); +} + static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) { tcg_insn_unit_tci insn = 0; @@ -760,6 +841,35 @@ static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) tcg_out32(s, insn); } +static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0) +{ + tcg_insn_unit_tci insn = 0; + intptr_t diff; + + /* Special case for exit_tb: map null -> 0. */ + if (p0 == NULL) { + diff = 0; + } else { + diff = p0 - (void *)(s->code_ptr + 4); + tcg_debug_assert(diff != 0); + if (diff != sextract32(diff, 0, 20)) { + tcg_raise_tb_overflow(s); + } + } + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 12, 20, diff); + tcg_out32(s, insn); +} + +static void tcg_out_op_r(TCGContext *s, TCGOpcode op, TCGReg r0) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + tcg_out32(s, insn); +} + static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) { tcg_insn_unit_tci insn = 0; @@ -1468,6 +1578,32 @@ static void tcg_out_br(TCGContext *s, TCGLabel *l) tcg_wasm_out_br(s, l); } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + tcg_out_op_p(s, INDEX_op_exit_tb, (void *)arg); + tcg_wasm_out_exit_tb(s, arg); +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* indirect jump method. */ + tcg_out_op_p(s, INDEX_op_goto_tb, (void *)get_jmp_target_addr(s, which)); + set_jmp_reset_offset(s, which); + tcg_wasm_out_goto_tb(s, which, (intptr_t)s->code_ptr); +} + +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_op_r(s, INDEX_op_goto_ptr, a0); + tcg_wasm_out_goto_ptr(s, a0); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* Always indirect, nothing to do */ +} + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From 942c6e41ff9418ae82537b2b8eb81af543997dcc Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 21:37:58 +0900 Subject: [PATCH 22/35] tcg/wasm: Add call instruction To call QEMU functions from a TB's Wasm module, the functions must be imported into the module. Wasm's call instruction can invoke an imported function using a locally assigned function index. When a call TCG operation is generated, the Wasm backend assigns the ID (starting from 0) to the target function. The mapping between the function pointer and its assigned ID is recorded in a list of HelperInfo. Since Wasm's call instruction requires arguments to be pushed onto the Wasm stack, the backend retrieves the function arguments from TCG's stack array and pushes them to the Wasm stack before the call. After the function returns, the result is retrieved from the Wasm stack and set in the corresponding TCG variable. In the Emscripten build configured with !has_int128_type, a 128bit value is represented by the Int128 struct. Such values are passed to the function via pointer parameters and returned via a prepended pointer argument, as described in [1]. For this prepended buffer area, the module expects a pre-allocated Int128 buffer from the caller via ctx.buf128. Helper functions expect the target of the return instruction via the GETPC macro (the tci_tb_ptr variable in TCI). However, unlike other architectures, Wasm doesn't have a register pointing to the return target. To emulate this behaviour, the Wasm module sets the instruction pointer to the corresponding TCI instruction (s->code_ptr) in tci_tb_ptr passed via the WasmContext. TCI instructions are also generated in the same way as the original TCI backend. [1] https://github.com/WebAssembly/tool-conventions/blob/060cf4073e46931160c2e9ecd43177ee1fe93866/BasicCABI.md#function-arguments-and-return-values Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 63 +++++++++++++ tcg/wasm.h | 10 +++ tcg/wasm/tcg-target.c.inc | 183 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 256 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index c54c5c5b2c128..db0c213d92932 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -21,6 +21,10 @@ #include "qemu/osdep.h" #include "tcg/tcg.h" #include "tcg/tcg-ldst.h" +#include "tcg/helper-info.h" +#include + +__thread uintptr_t tci_tb_ptr; static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0) { @@ -33,6 +37,13 @@ static void tci_args_r(uint32_t insn, TCGReg *r0) *r0 = extract32(insn, 8, 4); } +static void tci_args_nl(uint32_t insn, const void *tb_ptr, + uint8_t *n0, void **l1) +{ + *n0 = extract32(insn, 8, 4); + *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr; +} + static void tci_args_rl(uint32_t insn, const void *tb_ptr, TCGReg *r0, void **l1) { @@ -204,6 +215,58 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) opc = extract32(insn, 0, 8); switch (opc) { + case INDEX_op_call: + { + void *call_slots[MAX_CALL_IARGS]; + ffi_cif *cif; + void *func; + unsigned i, s, n; + + tci_args_nl(insn, tb_ptr, &len, &ptr); + func = ((void **)ptr)[0]; + cif = ((void **)ptr)[1]; + + n = cif->nargs; + for (i = s = 0; i < n; ++i) { + ffi_type *t = cif->arg_types[i]; + call_slots[i] = &stack[s]; + s += DIV_ROUND_UP(t->size, 8); + } + + /* Helper functions may need to access the "return address" */ + tci_tb_ptr = (uintptr_t)tb_ptr; + ffi_call(cif, func, stack, call_slots); + } + + switch (len) { + case 0: /* void */ + break; + case 1: /* uint32_t */ + /* + * The result winds up "left-aligned" in the stack[0] slot. + * Note that libffi has an odd special case in that it will + * always widen an integral result to ffi_arg. + */ + if (sizeof(ffi_arg) == 8) { + regs[TCG_REG_R0] = (uint32_t)stack[0]; + } else { + regs[TCG_REG_R0] = *(uint32_t *)stack; + } + break; + case 2: /* uint64_t */ + /* + * For TCG_TARGET_REG_BITS == 32, the register pair + * must stay in host memory order. + */ + memcpy(®s[TCG_REG_R0], stack, 8); + break; + case 3: /* Int128 */ + memcpy(®s[TCG_REG_R0], stack, 16); + break; + default: + g_assert_not_reached(); + } + break; case INDEX_op_and: tci_args_rrr(insn, &r0, &r1, &r2); regs[r0] = regs[r1] & regs[r2]; diff --git a/tcg/wasm.h b/tcg/wasm.h index 9da38e4d0ed96..a3631b34a8e65 100644 --- a/tcg/wasm.h +++ b/tcg/wasm.h @@ -10,6 +10,16 @@ struct WasmContext { * Pointer to the TB to be executed. */ void *tb_ptr; + + /* + * Pointer to the tci_tb_ptr variable. + */ + void *tci_tb_ptr; + + /* + * Buffer to store 128bit return value on call. + */ + void *buf128; }; #endif diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index c077c8ad7c115..0606b7de793ff 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -28,6 +28,14 @@ #include "qemu/queue.h" #include "../wasm.h" +/* Used for function call generation. */ +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_STACK_ALIGN 8 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL + typedef uint32_t tcg_insn_unit_tci; static const int tcg_target_reg_alloc_order[] = { @@ -143,6 +151,9 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { /* Local variable pointing to WasmContext */ #define CTX_IDX 0 +/* Function index */ +#define HELPER_IDX_START 0 /* The first index of helper functions */ + typedef enum { OPC_UNREACHABLE = 0x00, OPC_LOOP = 0x03, @@ -151,6 +162,7 @@ typedef enum { OPC_END = 0x0b, OPC_BR = 0x0c, OPC_RETURN = 0x0f, + OPC_CALL = 0x10, OPC_LOCAL_GET = 0x20, OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, @@ -832,6 +844,147 @@ static void tcg_wasm_out_goto_tb( tcg_wasm_out_op(s, OPC_END); } +static void push_arg_i64(TCGContext *s, int *stack_offset) +{ + intptr_t ofs; + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_REG_CALL_STACK)); + ofs = tcg_wasm_out_norm_ptr(s, *stack_offset); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + *stack_offset = *stack_offset + 8; +} + +static void gen_call(TCGContext *s, + const TCGHelperInfo *info, uint32_t func_idx) +{ + unsigned typemask = info->typemask; + int rettype = typemask & 7; + int stack_offset = 0; + intptr_t ofs; + + if (rettype == dh_typecode_i128) { + /* receive 128bit return value via the buffer */ + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(buf128)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + } + + for (typemask >>= 3; typemask; typemask >>= 3) { + switch (typemask & 7) { + case dh_typecode_void: + break; + case dh_typecode_i32: + case dh_typecode_s32: + push_arg_i64(s, &stack_offset); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + break; + case dh_typecode_i64: + case dh_typecode_s64: + push_arg_i64(s, &stack_offset); + break; + case dh_typecode_i128: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_REG_CALL_STACK)); + tcg_wasm_out_op_const(s, OPC_I64_CONST, stack_offset); + tcg_wasm_out_op(s, OPC_I64_ADD); + stack_offset += 16; + break; + case dh_typecode_ptr: + push_arg_i64(s, &stack_offset); + break; + default: + g_assert_not_reached(); + } + } + + tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); + + switch (rettype) { + case dh_typecode_void: + break; + case dh_typecode_i32: + case dh_typecode_s32: + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_S); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R0)); + break; + case dh_typecode_i64: + case dh_typecode_s64: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R0)); + break; + case dh_typecode_i128: + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(buf128)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R0)); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(buf128)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + ofs = tcg_wasm_out_norm_ptr(s, 8); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R1)); + break; + case dh_typecode_ptr: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R0)); + break; + default: + g_assert_not_reached(); + } +} + +typedef struct HelperInfo { + intptr_t idx_on_qemu; + QSIMPLEQ_ENTRY(HelperInfo) entry; +} HelperInfo; + +static __thread QSIMPLEQ_HEAD(, HelperInfo) helpers; +__thread uint32_t helper_idx; + +static void init_helpers(void) +{ + QSIMPLEQ_INIT(&helpers); + helper_idx = HELPER_IDX_START; +} + +static uint32_t register_helper(TCGContext *s, intptr_t helper_idx_on_qemu) +{ + tcg_debug_assert(helper_idx_on_qemu >= 0); + + HelperInfo *e = tcg_malloc(sizeof(HelperInfo)); + e->idx_on_qemu = helper_idx_on_qemu; + QSIMPLEQ_INSERT_TAIL(&helpers, e, entry); + + return helper_idx++; +} + +static int64_t get_helper_idx(TCGContext *s, intptr_t helper_idx_on_qemu) +{ + uint32_t idx = HELPER_IDX_START; + HelperInfo *e; + + QSIMPLEQ_FOREACH(e, &helpers, entry) { + if (e->idx_on_qemu == helper_idx_on_qemu) { + return idx; + } + idx++; + } + return -1; +} + +static void tcg_wasm_out_call(TCGContext *s, intptr_t func, + const TCGHelperInfo *info) +{ + intptr_t ofs; + int64_t func_idx = get_helper_idx(s, func); + if (func_idx < 0) { + func_idx = register_helper(s, func); + } + + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tci_tb_ptr)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_const(s, OPC_I64_CONST, (uint64_t)s->code_ptr); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + + gen_call(s, info, func_idx); +} + static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) { tcg_insn_unit_tci insn = 0; @@ -1604,11 +1757,41 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, /* Always indirect, nothing to do */ } +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, + const TCGHelperInfo *info) +{ + ffi_cif *cif = info->cif; + tcg_insn_unit_tci insn = 0; + uint8_t which; + + if (cif->rtype == &ffi_type_void) { + which = 0; + } else { + tcg_debug_assert(cif->rtype->size == 4 || + cif->rtype->size == 8 || + cif->rtype->size == 16); + which = ctz32(cif->rtype->size) - 1; + } + new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif); + insn = deposit32(insn, 0, 8, INDEX_op_call); + insn = deposit32(insn, 8, 4, which); + tcg_out32(s, insn); + tcg_wasm_out_call(s, (intptr_t)func, info); +} + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); init_blocks(); init_label_info(); + init_helpers(); tcg_wasm_out_op_block(s, OPC_LOOP, BLOCK_NORET); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, BLOCK_IDX); From 85196f153dd3d5c11c9b4701a4f56dc082348b31 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Thu, 21 Aug 2025 21:45:38 +0900 Subject: [PATCH 23/35] tcg/wasm: Add qemu_ld/qemu_st instructions This commit adds qemu_ld and qemu_st by calling the helper functions corresponding to MemOp. Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 70 ++++++++++++++++++ tcg/wasm/tcg-target-has.h | 1 + tcg/wasm/tcg-target.c.inc | 145 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index db0c213d92932..793c1807c257d 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -63,6 +63,14 @@ static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1) *i1 = sextract32(insn, 12, 20); } +static void tci_args_rrm(uint32_t insn, TCGReg *r0, + TCGReg *r1, MemOpIdx *m2) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *m2 = extract32(insn, 16, 16); +} + static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) { *r0 = extract32(insn, 8, 4); @@ -190,6 +198,56 @@ static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) return result; } +static uint64_t tci_qemu_ld(CPUArchState *env, uint64_t taddr, + MemOpIdx oi, const void *tb_ptr) +{ + MemOp mop = get_memop(oi); + uintptr_t ra = (uintptr_t)tb_ptr; + + switch (mop & MO_SSIZE) { + case MO_UB: + return helper_ldub_mmu(env, taddr, oi, ra); + case MO_SB: + return helper_ldsb_mmu(env, taddr, oi, ra); + case MO_UW: + return helper_lduw_mmu(env, taddr, oi, ra); + case MO_SW: + return helper_ldsw_mmu(env, taddr, oi, ra); + case MO_UL: + return helper_ldul_mmu(env, taddr, oi, ra); + case MO_SL: + return helper_ldsl_mmu(env, taddr, oi, ra); + case MO_UQ: + return helper_ldq_mmu(env, taddr, oi, ra); + default: + g_assert_not_reached(); + } +} + +static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val, + MemOpIdx oi, const void *tb_ptr) +{ + MemOp mop = get_memop(oi); + uintptr_t ra = (uintptr_t)tb_ptr; + + switch (mop & MO_SIZE) { + case MO_UB: + helper_stb_mmu(env, taddr, val, oi, ra); + break; + case MO_UW: + helper_stw_mmu(env, taddr, val, oi, ra); + break; + case MO_UL: + helper_stl_mmu(env, taddr, val, oi, ra); + break; + case MO_UQ: + helper_stq_mmu(env, taddr, val, oi, ra); + break; + default: + g_assert_not_reached(); + } +} + static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) { const uint32_t *tb_ptr = v_tb_ptr; @@ -208,6 +266,8 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) uint8_t pos, len; TCGCond condition; uint32_t tmp32; + uint64_t taddr; + MemOpIdx oi; int32_t ofs; void *ptr; @@ -496,6 +556,16 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) } tb_ptr = ptr; break; + case INDEX_op_qemu_ld: + tci_args_rrm(insn, &r0, &r1, &oi); + taddr = regs[r1]; + regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr); + break; + case INDEX_op_qemu_st: + tci_args_rrm(insn, &r0, &r1, &oi); + taddr = regs[r1]; + tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr); + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target-has.h b/tcg/wasm/tcg-target-has.h index a29ceb2ea5154..8fe9b4540318a 100644 --- a/tcg/wasm/tcg-target-has.h +++ b/tcg/wasm/tcg-target-has.h @@ -4,6 +4,7 @@ #define TCG_TARGET_HAS_tst 0 #define TCG_TARGET_HAS_extr_i64_i32 0 +#define TCG_TARGET_HAS_qemu_ldst_i128 0 #define TCG_TARGET_extract_valid(type, ofs, len) 0 #define TCG_TARGET_sextract_valid(type, ofs, len) \ diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 0606b7de793ff..e1ee2f6485ee0 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -985,6 +985,99 @@ static void tcg_wasm_out_call(TCGContext *s, intptr_t func, gen_call(s, info, func_idx); } +static void *qemu_ld_helper_ptr(uint32_t oi) +{ + MemOp mop = get_memop(oi); + switch (mop & MO_SSIZE) { + case MO_UB: + return helper_ldub_mmu; + case MO_SB: + return helper_ldsb_mmu; + case MO_UW: + return helper_lduw_mmu; + case MO_SW: + return helper_ldsw_mmu; + case MO_UL: + return helper_ldul_mmu; + case MO_SL: + return helper_ldsl_mmu; + case MO_UQ: + return helper_ldq_mmu; + default: + g_assert_not_reached(); + } +} + +static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) +{ + intptr_t helper_idx; + int64_t func_idx; + + helper_idx = (intptr_t)qemu_ld_helper_ptr(oi); + func_idx = get_helper_idx(s, helper_idx); + if (func_idx < 0) { + func_idx = register_helper(s, helper_idx); + } + + /* call the target helper */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + tcg_wasm_out_op_const(s, OPC_I32_CONST, oi); + tcg_wasm_out_op_const(s, OPC_I64_CONST, (intptr_t)s->code_ptr); + + tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(data_reg)); +} + +static void *qemu_st_helper_ptr(uint32_t oi) +{ + MemOp mop = get_memop(oi); + switch (mop & MO_SIZE) { + case MO_8: + return helper_stb_mmu; + case MO_16: + return helper_stw_mmu; + case MO_32: + return helper_stl_mmu; + case MO_64: + return helper_stq_mmu; + default: + g_assert_not_reached(); + } +} + +static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) +{ + intptr_t helper_idx; + int64_t func_idx; + MemOp mop = get_memop(oi); + + helper_idx = (intptr_t)qemu_st_helper_ptr(oi); + func_idx = get_helper_idx(s, helper_idx); + if (func_idx < 0) { + func_idx = register_helper(s, helper_idx); + } + + /* call the target helper */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + switch (mop & MO_SSIZE) { + case MO_UQ: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(data_reg)); + break; + default: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(data_reg)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + break; + } + tcg_wasm_out_op_const(s, OPC_I32_CONST, oi); + tcg_wasm_out_op_const(s, OPC_I64_CONST, (intptr_t)s->code_ptr); + + tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); +} + static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) { tcg_insn_unit_tci insn = 0; @@ -1054,6 +1147,19 @@ static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) tcg_out32(s, insn); } +static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, TCGArg m2) +{ + tcg_insn_unit_tci insn = 0; + + tcg_debug_assert(m2 == extract32(m2, 0, 16)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 16, m2); + tcg_out32(s, insn); +} + static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1, TCGReg r2) { @@ -1786,6 +1892,45 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, tcg_wasm_out_call(s, (intptr_t)func, info); } +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_op_rrm(s, INDEX_op_qemu_ld, data, addr, oi); + tcg_wasm_out_qemu_ld(s, data, addr, oi); +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_op_rrm(s, INDEX_op_qemu_st, data, addr, oi); + tcg_wasm_out_qemu_st(s, data, addr, oi); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(r, r), + .out = tgen_qemu_st, +}; + +bool tcg_target_has_memory_bswap(MemOp memop) +{ + return true; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + g_assert_not_reached(); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + g_assert_not_reached(); +} + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From 3db8ec74078663cbaabcc680e53f1d588f5f5970 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Fri, 22 Aug 2025 15:56:14 +0900 Subject: [PATCH 24/35] tcg/wasm: Add mb instruction This commit generates the mb operation. In Wasm, it uses the atomic.fence instruction as the fence operator [1]. TCI instruction is also generated in the same way as the original TCI backend using smp_mb(). [1] https://webassembly.github.io/threads/core/syntax/instructions.html#atomic-memory-instructions Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 4 ++++ tcg/wasm/tcg-target.c.inc | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index 793c1807c257d..1cc2e45e77812 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -566,6 +566,10 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) taddr = regs[r1]; tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr); break; + case INDEX_op_mb: + /* Ensure ordering for all kinds */ + smp_mb(); + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index e1ee2f6485ee0..1d639561db6f5 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -1116,6 +1116,11 @@ static void tcg_out_op_r(TCGContext *s, TCGOpcode op, TCGReg r0) tcg_out32(s, insn); } +static void tcg_out_op_v(TCGContext *s, TCGOpcode op) +{ + tcg_out32(s, (uint8_t)op); +} + static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) { tcg_insn_unit_tci insn = 0; @@ -1931,6 +1936,20 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) g_assert_not_reached(); } +static void tcg_out_mb(TCGContext *s, unsigned a0) +{ + tcg_out_op_v(s, INDEX_op_mb); + + /* + * Wasm's threading proposal provides atomic.fence instruction as the fence + * operator. + * https://webassembly.github.io/threads/core/syntax/instructions.html#atomic-memory-instructions + */ + tcg_wasm_out8(s, 0xfe); + tcg_wasm_out8(s, 0x03); + tcg_wasm_out8(s, 0x00); +} + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From a3656a34217da7ac63ecdab31370b35b70b3ac94 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Fri, 22 Aug 2025 15:42:47 +0900 Subject: [PATCH 25/35] tcg/wasm: Mark unimplemented instructions This commit adds the C_NotImplemented constraint and provides stubs for the functions that aren't implemented in the Wasm backend. Signed-off-by: Kohei Tokunaga --- tcg/wasm/tcg-target.c.inc | 119 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 1d639561db6f5..598db7b4bd355 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -1950,6 +1950,125 @@ static void tcg_out_mb(TCGContext *s, unsigned a0) tcg_wasm_out8(s, 0x00); } +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_NotImplemented, +}; + +static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) +{ + return false; +} + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); From 6cad2aa3c86c1b67dc2742a7749bef1e752d1ac9 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Sat, 23 Aug 2025 12:47:00 +0900 Subject: [PATCH 26/35] tcg/wasm: Add initialization of fundamental registers This commit adds initialization of TCG_AREG0 and TCG_REG_CALL_STACK at the beginning of each TB. The CPUArchState struct and the stack array are passed from the caller via the WasmContext structure. The BLOCK_IDX variable is initialized to 0 as TB execution begins at the first block. Signed-off-by: Kohei Tokunaga --- tcg/wasm.h | 10 ++++++++++ tcg/wasm/tcg-target.c.inc | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/tcg/wasm.h b/tcg/wasm.h index a3631b34a8e65..f2b67c10995bf 100644 --- a/tcg/wasm.h +++ b/tcg/wasm.h @@ -20,6 +20,16 @@ struct WasmContext { * Buffer to store 128bit return value on call. */ void *buf128; + + /* + * Pointer to the CPUArchState struct. + */ + CPUArchState *env; + + /* + * Pointer to a stack array. + */ + uint64_t *stack; }; #endif diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 598db7b4bd355..ba7a5efb5a1c4 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -2071,11 +2071,30 @@ static void tcg_out_set_carry(TCGContext *s) static void tcg_out_tb_start(TCGContext *s) { + intptr_t ofs; + init_sub_buf(); init_blocks(); init_label_info(); init_helpers(); + /* Initialize fundamental registers */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + tcg_wasm_out_op(s, OPC_I64_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(env)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_AREG0)); + + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(stack)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_CALL_STACK)); + tcg_wasm_out_op(s, OPC_END); + + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_op_block(s, OPC_LOOP, BLOCK_NORET); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, BLOCK_IDX); tcg_wasm_out_op(s, OPC_I64_EQZ); From 4d80cd33e4a7d49d4d1e778d58fbedd8b6528bdc Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Sat, 23 Aug 2025 12:52:05 +0900 Subject: [PATCH 27/35] tcg/wasm: Write wasm binary to TB This commit updates tcg_out_tb_start and tcg_out_tb_end to emit Wasm binaries into the TB code buffer. The generated Wasm binary defines a function of type wasm_tb_func which takes a WasmContext, executes the TB, and returns a result. In the Wasm backend, each TB starts with a WasmTBHeader which contains pointers to the following data: - TCI code - Wasm code - Array of helper function pointers imported into the Wasm instance tcg_out_tb_start writes the WasmTBHeader to the code buffer. tcg_out_tb_end generates the full Wasm executable binary by creating the Wasm module header following the spec[1][2][3] and copying the Wasm code body from sub_buf to the TB. This Wasm binary is placed after the TCI code which was emitted earlier. Additionally, an array of imported function pointers is appended to the TB. They are used during Wasm module instantiation. Function are imported to Wasm with names like "helper.0", "helper.1", etc., where the number corresponds to the array index. Each function's type signature must also be encoded in the Wasm module header. To support this, every emission of "call", "qemu_ld" and "qemu_st" operations also records the target function's type information in a buffer which will be copied to the code buffer during tcg_out_tb_end. Memory is shared between QEMU and the TBs and is imported to the Wasm module with the name "env.memory". [1] https://webassembly.github.io/spec/core/binary/modules.html [2] https://github.com/WebAssembly/threads/blob/b2567bff61ee6fbe731934f0ed17a5d48dc9ab01/proposals/threads/Overview.md [3] https://github.com/WebAssembly/memory64/blob/9003cd5e24e53b84cd9027ea3dd7ae57159a6db1/proposals/memory64/Overview.md Signed-off-by: Kohei Tokunaga --- tcg/wasm.h | 26 +++ tcg/wasm/tcg-target.c.inc | 406 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 432 insertions(+) diff --git a/tcg/wasm.h b/tcg/wasm.h index f2b67c10995bf..b5d9ce75da9c8 100644 --- a/tcg/wasm.h +++ b/tcg/wasm.h @@ -32,4 +32,30 @@ struct WasmContext { uint64_t *stack; }; +/* Instantiated Wasm function of a TB */ +typedef uintptr_t (*wasm_tb_func)(struct WasmContext *); + +/* + * A TB of the Wasm backend starts from a header which contains pointers for + * each data stored in the following region in the TB. + */ +struct WasmTBHeader { + /* + * Pointer to the region containing TCI instructions. + */ + void *tci_ptr; + + /* + * Pointer to the region containing Wasm instructions. + */ + void *wasm_ptr; + int wasm_size; + + /* + * Pointer to the array containing imported function pointers. + */ + void *import_ptr; + int import_size; +}; + #endif diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index ba7a5efb5a1c4..7663f03eafb63 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -154,6 +154,8 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { /* Function index */ #define HELPER_IDX_START 0 /* The first index of helper functions */ +#define PTR_TYPE 0x7e + typedef enum { OPC_UNREACHABLE = 0x00, OPC_LOOP = 0x03, @@ -312,6 +314,19 @@ static int linked_buf_len(LinkedBuf *p) return total; } +static int linked_buf_write(LinkedBuf *p, void *dst) +{ + int total = 0; + LinkedBufEntry *e; + + QSIMPLEQ_FOREACH(e, p, entry) { + memcpy(dst, e->data, e->size); + dst += e->size; + total += e->size; + } + return total; +} + /* * wasm code is generataed in the dynamically allocated buffer which * are managed as a linked list. @@ -928,6 +943,99 @@ static void gen_call(TCGContext *s, } } +static __thread LinkedBuf types_buf; + +static void init_types_buf(void) +{ + QSIMPLEQ_INIT(&types_buf); +} + +static void types_buf_out8(uint8_t v) +{ + linked_buf_out8(&types_buf, v); +} + +static void gen_func_type_call(TCGContext *s, const TCGHelperInfo *info) +{ + unsigned typemask = info->typemask; + int rettype = typemask & 7; + uint32_t vec_size = 0; + + if (rettype == dh_typecode_i128) { + vec_size++; + } + for (int m = typemask >> 3; m; m >>= 3) { + if ((m & 7) != dh_typecode_void) { + vec_size++; + } + } + + types_buf_out8(0x60); + linked_buf_out_leb128(&types_buf, vec_size); + + if (rettype == dh_typecode_i128) { + types_buf_out8(PTR_TYPE); + } + + for (int m = typemask >> 3; m; m >>= 3) { + switch (m & 7) { + case dh_typecode_void: + break; + case dh_typecode_i32: + case dh_typecode_s32: + types_buf_out8(0x7f); + break; + case dh_typecode_i64: + case dh_typecode_s64: + types_buf_out8(0x7e); + break; + case dh_typecode_i128: + types_buf_out8(PTR_TYPE); + break; + case dh_typecode_ptr: + types_buf_out8(PTR_TYPE); + break; + default: + g_assert_not_reached(); + } + } + + switch (rettype) { + case dh_typecode_void: + case dh_typecode_i128: + types_buf_out8(0x0); + break; + case dh_typecode_i32: + case dh_typecode_s32: + types_buf_out8(0x1); + types_buf_out8(0x7f); + break; + case dh_typecode_i64: + case dh_typecode_s64: + types_buf_out8(0x1); + types_buf_out8(0x7e); + break; + case dh_typecode_ptr: + types_buf_out8(0x1); + types_buf_out8(PTR_TYPE); + break; + default: + g_assert_not_reached(); + } +} + +static __thread LinkedBuf imports_buf; + +static void init_imports_buf(void) +{ + QSIMPLEQ_INIT(&imports_buf); +} + +static void imports_buf_out8(uint8_t v) +{ + linked_buf_out8(&imports_buf, v); +} + typedef struct HelperInfo { intptr_t idx_on_qemu; QSIMPLEQ_ENTRY(HelperInfo) entry; @@ -944,15 +1052,56 @@ static void init_helpers(void) static uint32_t register_helper(TCGContext *s, intptr_t helper_idx_on_qemu) { + uint32_t typeidx = helper_idx + 1; + char buf[11]; /* enough for decimal int max + NULL*/ + int n = snprintf(buf, sizeof(buf), "%d", helper_idx - HELPER_IDX_START); + tcg_debug_assert(helper_idx_on_qemu >= 0); HelperInfo *e = tcg_malloc(sizeof(HelperInfo)); e->idx_on_qemu = helper_idx_on_qemu; QSIMPLEQ_INSERT_TAIL(&helpers, e, entry); + tcg_debug_assert(n < sizeof(buf)); + imports_buf_out8(6); /* helper */ + imports_buf_out8(0x68); + imports_buf_out8(0x65); + imports_buf_out8(0x6c); + imports_buf_out8(0x70); + imports_buf_out8(0x65); + imports_buf_out8(0x72); + linked_buf_out_leb128(&imports_buf, (uint32_t)n); + for (int i = 0; i < n; i++) { + imports_buf_out8(buf[i]); + } + imports_buf_out8(0); /* type(0) */ + linked_buf_out_leb128(&imports_buf, typeidx); + return helper_idx++; } +static int helpers_len(void) +{ + int n = 0; + HelperInfo *e; + + QSIMPLEQ_FOREACH(e, &helpers, entry) { + n++; + } + return n; +} + +static int helpers_write_to_array(intptr_t *dst) +{ + intptr_t *start = dst; + HelperInfo *e; + + QSIMPLEQ_FOREACH(e, &helpers, entry) { + *dst++ = e->idx_on_qemu; + } + return (intptr_t)dst - (intptr_t)start; +} + static int64_t get_helper_idx(TCGContext *s, intptr_t helper_idx_on_qemu) { uint32_t idx = HELPER_IDX_START; @@ -974,6 +1123,7 @@ static void tcg_wasm_out_call(TCGContext *s, intptr_t func, int64_t func_idx = get_helper_idx(s, func); if (func_idx < 0) { func_idx = register_helper(s, func); + gen_func_type_call(s, info); } ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tci_tb_ptr)); @@ -985,6 +1135,39 @@ static void tcg_wasm_out_call(TCGContext *s, intptr_t func, gen_call(s, info, func_idx); } +static void gen_func_type_qemu_ld(TCGContext *s, uint32_t oi) +{ + types_buf_out8(0x60); + types_buf_out8(0x4); + types_buf_out8(PTR_TYPE); + types_buf_out8(0x7e); + types_buf_out8(0x7f); + types_buf_out8(PTR_TYPE); + types_buf_out8(0x1); + types_buf_out8(0x7e); +} + +static void gen_func_type_qemu_st(TCGContext *s, uint32_t oi) +{ + MemOp mop = get_memop(oi); + + types_buf_out8(0x60); + types_buf_out8(0x5); + types_buf_out8(PTR_TYPE); + types_buf_out8(0x7e); + switch (mop & MO_SSIZE) { + case MO_UQ: + types_buf_out8(0x7e); + break; + default: + types_buf_out8(0x7f); + break; + } + types_buf_out8(0x7f); + types_buf_out8(PTR_TYPE); + types_buf_out8(0x0); +} + static void *qemu_ld_helper_ptr(uint32_t oi) { MemOp mop = get_memop(oi); @@ -1018,6 +1201,7 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, func_idx = get_helper_idx(s, helper_idx); if (func_idx < 0) { func_idx = register_helper(s, helper_idx); + gen_func_type_qemu_ld(s, oi); } /* call the target helper */ @@ -1058,6 +1242,7 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, func_idx = get_helper_idx(s, helper_idx); if (func_idx < 0) { func_idx = register_helper(s, helper_idx); + gen_func_type_qemu_st(s, oi); } /* call the target helper */ @@ -2069,14 +2254,164 @@ static void tcg_out_set_carry(TCGContext *s) g_assert_not_reached(); } +static const uint8_t mod_1[] = { + 0x0, 0x61, 0x73, 0x6d, /* magic */ + 0x01, 0x0, 0x0, 0x0, /* version */ + + 0x01, /* type section */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placehodler for size */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placehodler for num of types vec */ + 0x60, /* 0: Type of "start" function */ + 0x01, PTR_TYPE, /* arg: ctx pointer */ + 0x01, PTR_TYPE, /* return: res */ +}; + +#define MOD_1_PH_TYPE_SECTION_SIZE_OFF 9 +#define MOD_1_PH_TYPE_VEC_NUM_OFF 14 + +static const uint8_t mod_2[] = { + 0x02, /* import section */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placehodler for size */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placehodler for imports num */ + 0x03, 0x65, 0x6e, 0x76, /* module: "env" */ + 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, /* name: "memory" */ +#if defined(WASM64_MEMORY64_2) + /* 32bit memory is used for Emscripten's "-sMEMORY64=2" configuration. */ + 0x02, 0x03, /* shared mem */ + 0x00, 0x80, 0x80, 0x04, /* min: 0, max: 65536 pages */ +#else + /* + * 64bit memory is used for Emscripten's "-sMEMORY64=1" configuration. + * Note: the maximum 64bit memory size of the engine implementations is + * limited to 262144 pages(16GiB) + * https://webassembly.github.io/memory64/js-api/#limits + */ + 0x02, 0x07, /* shared mem(64bit) */ + 0x00, 0x80, 0x80, 0x10, /* min: 0, max: 262144 pages */ +#endif +}; + +#define MOD_2_PH_IMPORT_SECTION_SIZE_OFF 1 +#define MOD_2_PH_IMPORT_VEC_NUM_OFF 6 + +static const uint8_t mod_3[] = { + 0x03, /* function section */ + 2, 1, 0x00, /* function type 0 */ + + 0x06, /* global section */ + 86, /* section size */ + 17, /* num of global vars */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + + 0x07, /* export section */ + 13, /* size of section */ + 1, /* num of funcs */ + 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, /* "start" function */ + 0x00, 0x80, 0x80, 0x80, 0x80, 0x00, /* placeholder for func index*/ + + 0x0a, /* code section */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placeholder for section size*/ + 1, /* num of codes */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placeholder for code size */ + 0x0, /* local variables (none) */ +}; + +#define MOD_3_PH_EXPORT_START_FUNC_IDX 102 +#define MOD_3_PH_CODE_SECTION_SIZE_OFF 108 +#define MOD_3_PH_CODE_SIZE_OFF 114 +#define MOD_3_VARIABLES_SIZE 5 +#define MOD_3_CODE_SECTION_SIZE_ADD 11 + +static void fill_uint32_leb128(uint8_t *b, uint32_t v) +{ + do { + *b |= v & 0x7f; + v >>= 7; + b++; + } while (v != 0); +} + +typedef struct FillValueU32 { + int64_t offset; + uint32_t value; +} FillValueU32; + +static int write_mod(TCGContext *s, const uint8_t mod[], int len, + FillValueU32 values[], int values_len) +{ + void *base = s->code_ptr; + + if (unlikely(((void *)s->code_ptr + len) + > s->code_gen_highwater)) { + return -1; + } + + memcpy(s->code_ptr, mod, len); + s->code_ptr += len; + + for (int i = 0; i < values_len; i++) { + fill_uint32_leb128(base + values[i].offset, values[i].value); + } + + return 0; +} + +static int write_mod_code(TCGContext *s) +{ + void *base = s->code_ptr; + int code_size = sub_buf_len(); + BlockPlaceholder *e; + + if (unlikely(((void *)s->code_ptr + code_size) > s->code_gen_highwater)) { + return -1; + } + linked_buf_write(&sub_buf, s->code_ptr); + s->code_ptr += code_size; + + QSIMPLEQ_FOREACH(e, &block_placeholder, entry) { + uint8_t *ph = e->pos + base; + int blk = get_block_of_label(e->label); + tcg_debug_assert(blk >= 0); + fill_uint32_leb128(ph, blk); + } + + return 0; +} + static void tcg_out_tb_start(TCGContext *s) { intptr_t ofs; + struct WasmTBHeader *h; init_sub_buf(); init_blocks(); init_label_info(); init_helpers(); + init_types_buf(); + init_imports_buf(); + + /* TB starts from a header */ + h = (struct WasmTBHeader *)(s->code_ptr); + s->code_ptr += sizeof(struct WasmTBHeader); + + /* Followed by TCI code */ + h->tci_ptr = s->code_ptr; /* Initialize fundamental registers */ tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); @@ -2103,10 +2438,81 @@ static void tcg_out_tb_start(TCGContext *s) static int tcg_out_tb_end(TCGContext *s) { + int res; + struct WasmTBHeader *h = (struct WasmTBHeader *)(s->code_buf); + tcg_wasm_out_op(s, OPC_END); /* end if */ tcg_wasm_out_op(s, OPC_END); /* end loop */ tcg_wasm_out_op(s, OPC_UNREACHABLE); tcg_wasm_out_op(s, OPC_END); /* end func */ + /* write wasm blob */ + h->wasm_ptr = s->code_ptr; + + res = write_mod(s, mod_1, sizeof(mod_1), (FillValueU32[]) { + { + MOD_1_PH_TYPE_SECTION_SIZE_OFF, + linked_buf_len(&types_buf) + + sizeof(mod_1) - MOD_1_PH_TYPE_VEC_NUM_OFF + }, + { + MOD_1_PH_TYPE_VEC_NUM_OFF, + HELPER_IDX_START + helpers_len() + 1/* start */ + }, + }, 2); + if (res < 0) { + return res; + } + s->code_ptr += linked_buf_write(&types_buf, s->code_ptr); + + res = write_mod(s, mod_2, sizeof(mod_2), (FillValueU32[]) { + { + MOD_2_PH_IMPORT_SECTION_SIZE_OFF, + linked_buf_len(&imports_buf) + + sizeof(mod_2) - MOD_2_PH_IMPORT_VEC_NUM_OFF + }, + { + MOD_2_PH_IMPORT_VEC_NUM_OFF, + HELPER_IDX_START + helpers_len() + 1/* memory */ + }, + }, 2); + if (res < 0) { + return res; + } + s->code_ptr += linked_buf_write(&imports_buf, s->code_ptr); + + res = write_mod(s, mod_3, sizeof(mod_3), (FillValueU32[]) { + { + MOD_3_PH_EXPORT_START_FUNC_IDX, + HELPER_IDX_START + helpers_len() + }, + { + MOD_3_PH_CODE_SECTION_SIZE_OFF, + sub_buf_len() + MOD_3_CODE_SECTION_SIZE_ADD + }, + { + MOD_3_PH_CODE_SIZE_OFF, + sub_buf_len() + MOD_3_VARIABLES_SIZE + }, + }, 3); + if (res < 0) { + return res; + } + + res = write_mod_code(s); + if (res < 0) { + return res; + } + h->wasm_size = (intptr_t)s->code_ptr - (intptr_t)h->wasm_ptr; + + /* record imported helper functions */ + if (unlikely(((void *)s->code_ptr + helpers_len() * 4) + > s->code_gen_highwater)) { + return -1; + } + h->import_ptr = s->code_ptr; + s->code_ptr += helpers_write_to_array((intptr_t *)s->code_ptr); + h->import_size = (intptr_t)s->code_ptr - (intptr_t)h->import_ptr; + return 0; } From c0d05186cd431515eb3f97a7c860ec5b6deac0a5 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Sat, 23 Aug 2025 12:53:10 +0900 Subject: [PATCH 28/35] tcg/wasm: Implement instantiation of Wasm binary instantiate_wasm is a function that instantiates a TB's Wasm binary, importing the functions as specified by its arguments. Following the header definition in wasm/tcg-target.c.inc, QEMU's memory is imported into the module as "env.memory", and helper functions are imported as "helper.". The instantiated Wasm module is imported to QEMU using Emscripten's "addFunction" feature[1] which returns a function pointer. This allows QEMU to call this module directly from C code via that pointer. Since the subarray() method doesn't accept a BigInt value which is used for the 64bit pointer value, it is converted to a Number (i53) using bigintToI53Checked method of Emscripten. Although this conversion (64bit to 53bit) drops higher bits, the maximum memory size of the engine implementations is currently limited to 16GiB[2] so we can assume that the pointers are within the Number's range. Note that since FireFox 138, WebAssembly.Module no longer accepts a SharedArrayBuffer as input [3] as reported by Nicolas Vandeginste in my fork[4]. This commit ensures that WebAssembly.Module() is passed a Uint8Array created from the binary data on a SharedArrayBuffer. [1] https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html#calling-javascript-functions-as-function-pointers-from-c [2] https://webassembly.github.io/memory64/js-api/#limits [3] https://bugzilla.mozilla.org/show_bug.cgi?id=1965217 [4] https://github.com/ktock/qemu-wasm/pull/25 Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tcg/wasm.c b/tcg/wasm.c index 1cc2e45e77812..15db1f9a8a599 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -23,6 +23,43 @@ #include "tcg/tcg-ldst.h" #include "tcg/helper-info.h" #include +#include + +#define EM_JS_PRE(ret, name, args, body...) EM_JS(ret, name, args, body) + +#define DEC_PTR(p) bigintToI53Checked(p) +#define ENC_PTR(p) BigInt(p) +#if defined(WASM64_MEMORY64_2) +#define ENC_WASM_TABLE_IDX(i) Number(i) +#else +#define ENC_WASM_TABLE_IDX(i) i +#endif + +EM_JS_PRE(void*, instantiate_wasm, (void *wasm_begin, + int wasm_size, + void *import_vec_begin, + int import_vec_size), +{ + const memory_v = new DataView(HEAP8.buffer); + const wasm = HEAP8.subarray(DEC_PTR(wasm_begin), + DEC_PTR(wasm_begin) + wasm_size); + var helper = {}; + const entsize = TCG_TARGET_REG_BITS / 8; + for (var i = 0; i < import_vec_size / entsize; i++) { + const idx = memory_v.getBigInt64( + DEC_PTR(import_vec_begin) + i * entsize, true); + helper[i] = wasmTable.get(ENC_WASM_TABLE_IDX(idx)); + } + const mod = new WebAssembly.Module(new Uint8Array(wasm)); + const inst = new WebAssembly.Instance(mod, { + "env" : { + "memory" : wasmMemory, + }, + "helper" : helper, + }); + + return ENC_PTR(addFunction(inst.exports.start, 'ii')); +}); __thread uintptr_t tci_tb_ptr; From 18770eb1458b4824b97608563ce6a65dc381e994 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Sat, 23 Aug 2025 12:57:05 +0900 Subject: [PATCH 29/35] tcg/wasm: Allow switching coroutine from a helper Emscripten's Fiber coroutine implements coroutine switching using Asyncify's stack unwinding and rewinding features [1]. When a coroutine yields (i.e. switches out), Asyncify unwinds the stack, returning control to Emscripten's JS code (Fiber.trampoline()). Then execution resumes in the target coroutine by rewinding the stack. Stack unwinding is implemented by a sequence of immediate function returns, while rewinding re-enters the functions in the call stack, skipping any code between the function's entry point and the original call position [2]. This commit updates the TB's Wasm module to allow helper functions to trigger coroutine switching. Particaully, the TB handles the unwinding and rewinding flows as follows: - The TB check the Asyncify.state JS object after each helper call. If unwinding is in progress, the TB immediately returns to the caller so that the unwinding can continue. - Each function call is preceded by a block boundary and an update of the BLOCK_IDX variable. This enables rewinding to skip any code between the function's entry point and the original call position. Additionally, this commit introduces WasmContext.do_init which is a flag indicating whether the TB should reset the BLOCK_IDX variable to 0 (i.e. start from the beginning). call_wasm_tb is a newly introduced wrapper function for the Wasm module's entrypoint and this sets "do_init = 1" to ensure normal TB execution begins at the first block. During a rewinding, the C code does not set do_init to 1, allowing the TB to preserve the BLOCK_IDX value from the previous unwinding and correctly resume execution from the last unwound block. [1] https://emscripten.org/docs/api_reference/fiber.h.html [2] https://kripken.github.io/blog/wasm/2019/07/16/asyncify.html#new-asyncify Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 3 ++ tcg/wasm.h | 11 ++++++++ tcg/wasm/tcg-target.c.inc | 58 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/tcg/wasm.c b/tcg/wasm.c index 15db1f9a8a599..82987e9dff151 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -44,6 +44,9 @@ EM_JS_PRE(void*, instantiate_wasm, (void *wasm_begin, const wasm = HEAP8.subarray(DEC_PTR(wasm_begin), DEC_PTR(wasm_begin) + wasm_size); var helper = {}; + helper.u = () => { + return (Asyncify.state != Asyncify.State.Unwinding) ? 1 : 0; + }; const entsize = TCG_TARGET_REG_BITS / 8; for (var i = 0; i < import_vec_size / entsize; i++) { const idx = memory_v.getBigInt64( diff --git a/tcg/wasm.h b/tcg/wasm.h index b5d9ce75da9c8..fdde908557268 100644 --- a/tcg/wasm.h +++ b/tcg/wasm.h @@ -30,11 +30,22 @@ struct WasmContext { * Pointer to a stack array. */ uint64_t *stack; + + /* + * Flag indicating whether to initialize the block index(1) or not(0). + */ + uint32_t do_init; }; /* Instantiated Wasm function of a TB */ typedef uintptr_t (*wasm_tb_func)(struct WasmContext *); +static inline uintptr_t call_wasm_tb(wasm_tb_func f, struct WasmContext *ctx) +{ + ctx->do_init = 1; /* reset the block index (rewinding will skip this) */ + return f(ctx); +} + /* * A TB of the Wasm backend starts from a header which contains pointers for * each data stored in the following region in the TB. diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 7663f03eafb63..6af4d6eb07567 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -152,7 +152,8 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { #define CTX_IDX 0 /* Function index */ -#define HELPER_IDX_START 0 /* The first index of helper functions */ +#define CHECK_UNWINDING_IDX 0 /* A function to check the Asyncify status */ +#define HELPER_IDX_START 1 /* The first index of helper functions */ #define PTR_TYPE 0x7e @@ -169,6 +170,7 @@ typedef enum { OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, + OPC_I32_LOAD = 0x28, OPC_I64_LOAD = 0x29, OPC_I64_LOAD8_S = 0x30, OPC_I64_LOAD8_U = 0x31, @@ -176,6 +178,7 @@ typedef enum { OPC_I64_LOAD16_U = 0x33, OPC_I64_LOAD32_S = 0x34, OPC_I64_LOAD32_U = 0x35, + OPC_I32_STORE = 0x36, OPC_I64_STORE = 0x37, OPC_I64_STORE8 = 0x3c, OPC_I64_STORE16 = 0x3d, @@ -1116,6 +1119,17 @@ static int64_t get_helper_idx(TCGContext *s, intptr_t helper_idx_on_qemu) return -1; } +static void tcg_wasm_out_handle_unwinding(TCGContext *s) +{ + tcg_wasm_out_op_idx(s, OPC_CALL, CHECK_UNWINDING_IDX); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + /* returns if unwinding */ + tcg_wasm_out_op(s, OPC_RETURN); + tcg_wasm_out_op(s, OPC_END); +} + static void tcg_wasm_out_call(TCGContext *s, intptr_t func, const TCGHelperInfo *info) { @@ -1132,7 +1146,16 @@ static void tcg_wasm_out_call(TCGContext *s, intptr_t func, tcg_wasm_out_op_const(s, OPC_I64_CONST, (uint64_t)s->code_ptr); tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + gen_call(s, info, func_idx); + tcg_wasm_out_handle_unwinding(s); } static void gen_func_type_qemu_ld(TCGContext *s, uint32_t oi) @@ -1204,6 +1227,14 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, gen_func_type_qemu_ld(s, oi); } + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + /* call the target helper */ tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); @@ -1212,6 +1243,7 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(data_reg)); + tcg_wasm_out_handle_unwinding(s); } static void *qemu_st_helper_ptr(uint32_t oi) @@ -1245,6 +1277,14 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, gen_func_type_qemu_st(s, oi); } + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + /* call the target helper */ tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); @@ -1261,6 +1301,7 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_const(s, OPC_I64_CONST, (intptr_t)s->code_ptr); tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); + tcg_wasm_out_handle_unwinding(s); } static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) @@ -2264,6 +2305,9 @@ static const uint8_t mod_1[] = { 0x60, /* 0: Type of "start" function */ 0x01, PTR_TYPE, /* arg: ctx pointer */ 0x01, PTR_TYPE, /* return: res */ + 0x60, /* 1: Type of the asyncify helper */ + 0x0, /* no argument */ + 0x01, 0x7f, /* return: res (i32) */ }; #define MOD_1_PH_TYPE_SECTION_SIZE_OFF 9 @@ -2289,6 +2333,9 @@ static const uint8_t mod_2[] = { 0x02, 0x07, /* shared mem(64bit) */ 0x00, 0x80, 0x80, 0x10, /* min: 0, max: 262144 pages */ #endif + 0x06, 0x68, 0x65, 0x6c, 0x70, 0x65, 0x72, /* module: "helper" */ + 0x01, 0x75, /* name: "u" */ + 0x00, 0x01, /* func type 1 */ }; #define MOD_2_PH_IMPORT_SECTION_SIZE_OFF 1 @@ -2427,8 +2474,17 @@ static void tcg_out_tb_start(TCGContext *s) tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_CALL_STACK)); tcg_wasm_out_op(s, OPC_END); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(do_init)); + tcg_wasm_out_op_ldst(s, OPC_I32_LOAD, 0, ofs); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 0); + tcg_wasm_out_op(s, OPC_I32_NE); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(do_init)); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 0); + tcg_wasm_out_op_ldst(s, OPC_I32_STORE, 0, ofs); + tcg_wasm_out_op(s, OPC_END); tcg_wasm_out_op_block(s, OPC_LOOP, BLOCK_NORET); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, BLOCK_IDX); From d7d4fc4a608aed5fc04c1fe4987be876a8af4ba3 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Fri, 22 Aug 2025 15:55:30 +0900 Subject: [PATCH 30/35] tcg/wasm: Enable instantiation of TBs executed many times This commit enables the instantiation and execution of TBs in wasm.c. As in TCI, the tcg_qemu_tb_exec function serves as the entrypoint for the TB execution, handling both instantiation and invocation of the Wasm module. Since browsers cause out of memory error if too many Wasm instances are created, this commit restricts instantiation to TBs that are called many times. This commit adds a counter (or its array if there are multiple threads) to the TB. Each time a TB is executed on TCI, the counter on TB is incremented. If it reaches to a threshold, that TB is instantiated as Wasm via instantiate_wasm. The total number of instances are tracked by the instances_global variable and its maximum number is limited by MAX_INSTANCES. When a Wasm module is instantiated, instances_global is incremented and the instance's function pointer is recorded to an array of WasmInstanceInfo. Each TB refers to the WasmInstanceInfo entry via WasmTBHeader's info_ptr (or its array if there are multiple threads). This allows tcg_qemu_tb_exec to resolve the instance's function pointer from the TB. When a new instantiation would exceed the limit, the Wasm backend doesn't perform instantiation (i.e. TB continues execution on TCI). Instead, it triggers the removal of older Wasm instances using Emscripten's removeFunction function. Once the removal is completed and detected via FinalizationRegistry API[1], instances_global is decremented, allowing new modules to be instantiated. [1] https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/FinalizationRegistry Signed-off-by: Kohei Tokunaga --- tcg/wasm.c | 244 +++++++++++++++++++++++++++++++++++++- tcg/wasm.h | 45 +++++++ tcg/wasm/tcg-target.c.inc | 21 ++++ 3 files changed, 307 insertions(+), 3 deletions(-) diff --git a/tcg/wasm.c b/tcg/wasm.c index 82987e9dff151..00f422218360a 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -24,6 +24,10 @@ #include "tcg/helper-info.h" #include #include +#include "wasm.h" + +/* TBs executed more than this value will be compiled to wasm */ +#define INSTANTIATE_NUM 1500 #define EM_JS_PRE(ret, name, args, body...) EM_JS(ret, name, args, body) @@ -61,6 +65,8 @@ EM_JS_PRE(void*, instantiate_wasm, (void *wasm_begin, "helper" : helper, }); + Module.__wasm_tb.inst_gc_registry.register(inst, "tbinstance"); + return ENC_PTR(addFunction(inst.exports.start, 'ii')); }); @@ -288,9 +294,53 @@ static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val, } } -static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) +static __thread int thread_idx; + +static inline int32_t get_counter_local(void *tb_ptr) +{ + return get_counter(tb_ptr, thread_idx); +} + +static inline void set_counter_local(void *tb_ptr, int v) +{ + set_counter(tb_ptr, thread_idx, v); +} + +static inline struct WasmInstanceInfo *get_info_local(void *tb_ptr) +{ + return get_info(tb_ptr, thread_idx); +} + +static inline void set_info_local(void *tb_ptr, struct WasmInstanceInfo *info) +{ + set_info(tb_ptr, thread_idx, info); +} + +/* + * inc_counter increments the execution counter in the specified TB. + * If the counter reaches the limit, it returns true otherwise returns false. + */ +static inline bool inc_counter(void *tb_ptr) { - const uint32_t *tb_ptr = v_tb_ptr; + int32_t counter = get_counter_local(tb_ptr); + if ((counter >= 0) && (counter < INSTANTIATE_NUM)) { + set_counter_local(tb_ptr, counter + 1); + } else { + return true; /* enter to wasm TB */ + } + return false; +} + +static __thread struct WasmContext ctx = { + .tb_ptr = 0, + .stack = NULL, + .do_init = 1, + .buf128 = NULL, +}; + +static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env) +{ + uint32_t *tb_ptr = get_tci_ptr(ctx.tb_ptr); tcg_target_ulong regs[TCG_TARGET_NB_REGS]; uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) / sizeof(uint64_t)]; @@ -583,18 +633,32 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) break; case INDEX_op_exit_tb: tci_args_l(insn, tb_ptr, &ptr); + ctx.tb_ptr = 0; return (uintptr_t)ptr; case INDEX_op_goto_tb: tci_args_l(insn, tb_ptr, &ptr); - tb_ptr = *(void **)ptr; + if (tb_ptr != *(void **)ptr) { + tb_ptr = *(void **)ptr; + ctx.tb_ptr = tb_ptr; + if (inc_counter(tb_ptr)) { + return 0; /* enter to wasm TB */ + } + tb_ptr = get_tci_ptr(tb_ptr); + } break; case INDEX_op_goto_ptr: tci_args_r(insn, &r0); ptr = (void *)regs[r0]; if (!ptr) { + ctx.tb_ptr = 0; return 0; } tb_ptr = ptr; + ctx.tb_ptr = tb_ptr; + if (inc_counter(tb_ptr)) { + return 0; /* enter to wasm TB */ + } + tb_ptr = get_tci_ptr(tb_ptr); break; case INDEX_op_qemu_ld: tci_args_rrm(insn, &r0, &r1, &oi); @@ -615,3 +679,177 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) } } } + +/* + * The maximum number of instances that can exist simultaneously + * + * If this limit is reached and a new instance is required, older instances are + * removed to allow creation of new ones without exceeding the browser's limit. + */ +#define MAX_INSTANCES 12000 + +static int instances_global; + +/* Avoid overwrapping of begin/end pointers */ +#define INSTANCES_BUF_MAX (MAX_INSTANCES + 1) + +static __thread struct WasmInstanceInfo instances[INSTANCES_BUF_MAX]; +static __thread int instances_begin; +static __thread int instances_end; + +static void add_instance(wasm_tb_func tb_func, void *tb_ptr) +{ + instances[instances_end].tb_func = tb_func; + instances[instances_end].tb_ptr = tb_ptr; + set_info_local(tb_ptr, &(instances[instances_end])); + instances_end = (instances_end + 1) % INSTANCES_BUF_MAX; + + qatomic_inc(&instances_global); +} + +static __thread int instance_pending_gc; +static __thread int instance_done_gc; + +static void remove_old_instances(void) +{ + int num; + if (instance_pending_gc > 0) { + return; + } + if (instances_begin <= instances_end) { + num = instances_end - instances_begin; + } else { + num = instances_end + (INSTANCES_BUF_MAX - instances_begin); + } + /* removes the half of the oldest instances in the buffer */ + num /= 2; + for (int i = 0; i < num; i++) { + EM_ASM({ removeFunction($0); }, instances[instances_begin].tb_func); + instances[instances_begin].tb_ptr = NULL; + instances_begin = (instances_begin + 1) % INSTANCES_BUF_MAX; + } + instance_pending_gc += num; +} + +static bool can_add_instance(void) +{ + return qatomic_read(&instances_global) < MAX_INSTANCES; +} + +static wasm_tb_func get_instance_from_tb(void *tb_ptr) +{ + struct WasmInstanceInfo *elm = get_info_local(tb_ptr); + if (elm == NULL) { + return NULL; + } + if (elm->tb_ptr != tb_ptr) { + /* + * This TB was instantiated before, but has been removed. Set counter to + * the max value so that this will be instantiated. + */ + set_counter_local(tb_ptr, INSTANTIATE_NUM); + set_info_local(tb_ptr, NULL); + return NULL; + } + return elm->tb_func; +} + +static void check_gc_completion(void) +{ + if (instance_done_gc > 0) { + qatomic_sub(&instances_global, instance_done_gc); + instance_pending_gc -= instance_done_gc; + instance_done_gc = 0; + } +} + +EM_JS_PRE(void, init_wasm_js, (void *instance_done_gc), +{ + Module.__wasm_tb = { + inst_gc_registry: new FinalizationRegistry((i) => { + if (i == "tbinstance") { + const memory_v = new DataView(HEAP8.buffer); + let v = memory_v.getInt32(DEC_PTR(instance_done_gc), true); + memory_v.setInt32(DEC_PTR(instance_done_gc), v + 1, true); + } + }) + }; +}); + +#define MAX_EXEC_NUM 50000 +static __thread int exec_cnt = MAX_EXEC_NUM; +static inline void trysleep(void) +{ + /* + * Even during running TBs continuously, try to return the control + * to the browser periodically and allow browsers doing tasks. + */ + if (--exec_cnt == 0) { + if (!can_add_instance()) { + emscripten_sleep(0); + check_gc_completion(); + } + exec_cnt = MAX_EXEC_NUM; + } +} + +static int thread_idx_max; + +static void init_wasm(void) +{ + thread_idx = qatomic_fetch_inc(&thread_idx_max); + ctx.stack = g_malloc(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE); + ctx.buf128 = g_malloc(16); + ctx.tci_tb_ptr = (uint32_t *)&tci_tb_ptr; + init_wasm_js(&instance_done_gc); +} + +static __thread bool initdone; + +uintptr_t tcg_qemu_tb_exec(CPUArchState *env, const void *v_tb_ptr) +{ + if (!initdone) { + init_wasm(); + initdone = true; + } + ctx.env = env; + ctx.tb_ptr = (void *)v_tb_ptr; + while (true) { + trysleep(); + uintptr_t res; + wasm_tb_func tb_func = get_instance_from_tb(ctx.tb_ptr); + if (tb_func) { + /* + * Call the Wasm instance + */ + res = call_wasm_tb(tb_func, &ctx); + } else if (!inc_counter(ctx.tb_ptr)) { + /* + * Run it on TCI because the counter value is small + */ + res = tcg_qemu_tb_exec_tci(env); + } else if (!can_add_instance()) { + /* + * Too many instances has been created, try removing older + * instances and keep running this TB on TCI + */ + remove_old_instances(); + check_gc_completion(); + res = tcg_qemu_tb_exec_tci(env); + } else { + /* + * Instantiate and run the Wasm module + */ + struct WasmTBHeader *header = (struct WasmTBHeader *)ctx.tb_ptr; + tb_func = (wasm_tb_func)instantiate_wasm(header->wasm_ptr, + header->wasm_size, + header->import_ptr, + header->import_size); + add_instance(tb_func, ctx.tb_ptr); + res = call_wasm_tb(tb_func, &ctx); + } + if (!ctx.tb_ptr) { + return res; + } + } +} diff --git a/tcg/wasm.h b/tcg/wasm.h index fdde908557268..88163c28df0d5 100644 --- a/tcg/wasm.h +++ b/tcg/wasm.h @@ -46,6 +46,14 @@ static inline uintptr_t call_wasm_tb(wasm_tb_func f, struct WasmContext *ctx) return f(ctx); } +/* + * WasmInstanceInfo holds the relationship between TB and Wasm instance. + */ +struct WasmInstanceInfo { + void *tb_ptr; + wasm_tb_func tb_func; +}; + /* * A TB of the Wasm backend starts from a header which contains pointers for * each data stored in the following region in the TB. @@ -67,6 +75,43 @@ struct WasmTBHeader { */ void *import_ptr; int import_size; + + /* + * Counter holds how many times the TB is executed before the instantiation + * for each thread. + */ + int32_t *counter_ptr; + + /* + * Pointer to the instance information on each thread. + */ + struct WasmInstanceInfo **info_ptr; }; +static inline void *get_tci_ptr(void *tb_ptr) +{ + return ((struct WasmTBHeader *)tb_ptr)->tci_ptr; +} + +static inline int32_t get_counter(void *tb_ptr, int idx) +{ + return ((struct WasmTBHeader *)tb_ptr)->counter_ptr[idx]; +} + +static inline void set_counter(void *tb_ptr, int idx, int v) +{ + ((struct WasmTBHeader *)tb_ptr)->counter_ptr[idx] = v; +} + +static inline struct WasmInstanceInfo *get_info(void *tb_ptr, int idx) +{ + return ((struct WasmTBHeader *)tb_ptr)->info_ptr[idx]; +} + +static inline void set_info(void *tb_ptr, int idx, + struct WasmInstanceInfo *info) +{ + ((struct WasmTBHeader *)tb_ptr)->info_ptr[idx] = info; +} + #endif diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 6af4d6eb07567..784df9e630e10 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -28,6 +28,9 @@ #include "qemu/queue.h" #include "../wasm.h" +/* This is included to get the number of threads via tcg_max_ctxs. */ +#include "../tcg-internal.h" + /* Used for function call generation. */ #define TCG_TARGET_CALL_STACK_OFFSET 0 #define TCG_TARGET_STACK_ALIGN 8 @@ -2295,6 +2298,11 @@ static void tcg_out_set_carry(TCGContext *s) g_assert_not_reached(); } +/* Generate global QEMU prologue and epilogue code. */ +static inline void tcg_target_qemu_prologue(TCGContext *s) +{ +} + static const uint8_t mod_1[] = { 0x0, 0x61, 0x73, 0x6d, /* magic */ 0x01, 0x0, 0x0, 0x0, /* version */ @@ -2443,6 +2451,7 @@ static int write_mod_code(TCGContext *s) static void tcg_out_tb_start(TCGContext *s) { + int size; intptr_t ofs; struct WasmTBHeader *h; @@ -2457,6 +2466,18 @@ static void tcg_out_tb_start(TCGContext *s) h = (struct WasmTBHeader *)(s->code_ptr); s->code_ptr += sizeof(struct WasmTBHeader); + /* locate counters */ + h->counter_ptr = (int32_t *)s->code_ptr; + size = tcg_max_ctxs * sizeof(int32_t); + memset(s->code_ptr, 0, size); + s->code_ptr += size; + + /* locate the instance information */ + h->info_ptr = (struct WasmInstanceInfo **)s->code_ptr; + size = tcg_max_ctxs * sizeof(void *); + memset(s->code_ptr, 0, size); + s->code_ptr += size; + /* Followed by TCI code */ h->tci_ptr = s->code_ptr; From f49e907f4969e8ae4c1d3385814680b9df837db9 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Sat, 23 Aug 2025 13:08:31 +0900 Subject: [PATCH 31/35] tcg/wasm: Enable TLB lookup This commit enables Wasm module's qemu_ld and qemu_st to perform TLB lookups, following the approach used in other backends such as RISC-V. Unlike other backends, the Wasm backend cannot use ldst labels, as jumping to specific code addresses (e.g. raddr) is not possible in Wasm. Instead, each TLB lookup is followed by a if branch: if the lookup succeeds, the memory is accessed directly; otherwise, a fallback helper function is invoked. Support for MO_BSWAP is not yet implemented, so has_memory_bswap is set to false. Signed-off-by: Kohei Tokunaga --- tcg/wasm/tcg-target.c.inc | 225 +++++++++++++++++++++++++++++++++++++- 1 file changed, 222 insertions(+), 3 deletions(-) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 784df9e630e10..25691307b490a 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -3,8 +3,12 @@ * Tiny Code Generator for QEMU * * Copyright (c) 2009, 2011 Stefan Weil + * Copyright (c) 2018 SiFive, Inc + * Copyright (c) 2008-2009 Arnaud Patard + * Copyright (c) 2009 Aurelien Jarno + * Copyright (c) 2008 Fabrice Bellard * - * Based on tci/tcg-target.c.inc + * Based on tci/tcg-target.c.inc and riscv/tcg-target.c.inc * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -154,6 +158,11 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { /* Local variable pointing to WasmContext */ #define CTX_IDX 0 +/* Temporary local variables */ +#define TMP32_LOCAL_0_IDX 1 +#define TMP64_LOCAL_0_IDX 2 +#define TMP64_LOCAL_1_IDX 3 + /* Function index */ #define CHECK_UNWINDING_IDX 0 /* A function to check the Asyncify status */ #define HELPER_IDX_START 1 /* The first index of helper functions */ @@ -170,6 +179,8 @@ typedef enum { OPC_RETURN = 0x0f, OPC_CALL = 0x10, OPC_LOCAL_GET = 0x20, + OPC_LOCAL_SET = 0x21, + OPC_LOCAL_TEE = 0x22, OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, @@ -1217,11 +1228,156 @@ static void *qemu_ld_helper_ptr(uint32_t oi) } } +#define MIN_TLB_MASK_TABLE_OFS INT_MIN + +static uint8_t prepare_host_addr_wasm(TCGContext *s, uint8_t *hit_var, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) +{ + MemOp opc = get_memop(oi); + TCGAtomAlign aa; + unsigned a_mask; + unsigned s_bits = opc & MO_SIZE; + unsigned s_mask = (1u << s_bits) - 1; + int mem_index = get_mmuidx(oi); + int fast_ofs = tlb_mask_table_ofs(s, mem_index); + int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); + int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); + int add_off = offsetof(CPUTLBEntry, addend); + tcg_target_long compare_mask; + int offset; + + uint8_t tmp1 = TMP64_LOCAL_0_IDX; + uint8_t tmp2 = TMP64_LOCAL_1_IDX; + + if (!tcg_use_softmmu) { + g_assert_not_reached(); + } + + *hit_var = TMP32_LOCAL_0_IDX; + tcg_wasm_out_op_const(s, OPC_I32_CONST, 0); + tcg_wasm_out_op_idx(s, OPC_LOCAL_SET, *hit_var); + + aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false); + a_mask = (1u << aa.align) - 1; + + /* Get the CPUTLBEntry offset */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + tcg_wasm_out_op_const(s, OPC_I64_CONST, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_wasm_out_op(s, OPC_I64_SHR_U); + + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + offset = tcg_wasm_out_norm_ptr(s, mask_ofs); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, offset); + tcg_wasm_out_op(s, OPC_I64_AND); + + /* Get the pointer to the target CPUTLBEntry */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + offset = tcg_wasm_out_norm_ptr(s, table_ofs); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, offset); + tcg_wasm_out_op(s, OPC_I64_ADD); + tcg_wasm_out_op_idx(s, OPC_LOCAL_TEE, tmp1); + + /* Load the tlb copmarator */ + offset = tcg_wasm_out_norm_ptr(s, is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, offset); + + /* + * For aligned accesses, we check the first byte and include the + * alignment bits within the address. For unaligned access, we + * check that we don't cross pages using the address of the last + * byte of the access. + */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + if (a_mask < s_mask) { + tcg_wasm_out_op_const(s, OPC_I64_CONST, s_mask - a_mask); + tcg_wasm_out_op(s, OPC_I64_ADD); + } + compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; + tcg_wasm_out_op_const(s, OPC_I64_CONST, compare_mask); + tcg_wasm_out_op(s, OPC_I64_AND); + + /* Compare masked address with the TLB entry. */ + tcg_wasm_out_op(s, OPC_I64_EQ); + + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + + /* TLB Hit - translate address using addend. */ + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, tmp1); + offset = tcg_wasm_out_norm_ptr(s, add_off); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, offset); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + tcg_wasm_out_op(s, OPC_I64_ADD); + tcg_wasm_out_op_idx(s, OPC_LOCAL_SET, tmp2); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 1); + tcg_wasm_out_op_idx(s, OPC_LOCAL_SET, *hit_var); + + tcg_wasm_out_op(s, OPC_END); + + return tmp2; +} + +static void tcg_wasm_out_qemu_ld_direct( + TCGContext *s, TCGReg r, uint8_t base, MemOp opc) +{ + intptr_t ofs; + switch (opc & (MO_SSIZE)) { + case MO_UB: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD8_U, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_SB: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD8_S, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_UW: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD16_U, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_SW: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD16_S, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_UL: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD32_U, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_SL: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD32_S, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_UQ: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + default: + g_assert_not_reached(); + } +} + static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi) { intptr_t helper_idx; int64_t func_idx; + MemOp mop = get_memop(oi); + uint8_t base_var, hit_var; helper_idx = (intptr_t)qemu_ld_helper_ptr(oi); func_idx = get_helper_idx(s, helper_idx); @@ -1230,6 +1386,14 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, gen_func_type_qemu_ld(s, oi); } + base_var = prepare_host_addr_wasm(s, &hit_var, addr_reg, oi, true); + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, hit_var); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 1); + tcg_wasm_out_op(s, OPC_I32_EQ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_qemu_ld_direct(s, data_reg, base_var, mop); /* fast path */ + tcg_wasm_out_op(s, OPC_END); + /* * update the block index so that the possible rewinding will * skip this block @@ -1238,6 +1402,10 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); tcg_wasm_out_new_block(s); + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, hit_var); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + /* call the target helper */ tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); @@ -1247,6 +1415,8 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(data_reg)); tcg_wasm_out_handle_unwinding(s); + + tcg_wasm_out_op(s, OPC_END); } static void *qemu_st_helper_ptr(uint32_t oi) @@ -1266,12 +1436,47 @@ static void *qemu_st_helper_ptr(uint32_t oi) } } +static void tcg_wasm_out_qemu_st_direct( + TCGContext *s, TCGReg lo, uint8_t base, MemOp opc) +{ + intptr_t ofs; + switch (opc & (MO_SSIZE)) { + case MO_8: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(lo)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE8, 0, ofs); + break; + case MO_16: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(lo)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE16, 0, ofs); + break; + case MO_32: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(lo)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE32, 0, ofs); + break; + case MO_64: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(lo)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + break; + default: + g_assert_not_reached(); + } +} + static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, MemOpIdx oi) { intptr_t helper_idx; int64_t func_idx; MemOp mop = get_memop(oi); + uint8_t base_var, hit_var; helper_idx = (intptr_t)qemu_st_helper_ptr(oi); func_idx = get_helper_idx(s, helper_idx); @@ -1280,6 +1485,14 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, gen_func_type_qemu_st(s, oi); } + base_var = prepare_host_addr_wasm(s, &hit_var, addr_reg, oi, false); + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, hit_var); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 1); + tcg_wasm_out_op(s, OPC_I32_EQ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_qemu_st_direct(s, data_reg, base_var, mop); /* fast path */ + tcg_wasm_out_op(s, OPC_END); + /* * update the block index so that the possible rewinding will * skip this block @@ -1288,6 +1501,10 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); tcg_wasm_out_new_block(s); + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, hit_var); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + /* call the target helper */ tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); @@ -1305,6 +1522,8 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); tcg_wasm_out_handle_unwinding(s); + + tcg_wasm_out_op(s, OPC_END); } static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) @@ -2152,7 +2371,7 @@ static const TCGOutOpQemuLdSt outop_qemu_st = { bool tcg_target_has_memory_bswap(MemOp memop) { - return true; + return false; } static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) @@ -2384,7 +2603,7 @@ static const uint8_t mod_3[] = { 0x80, 0x80, 0x80, 0x80, 0x00, /* placeholder for section size*/ 1, /* num of codes */ 0x80, 0x80, 0x80, 0x80, 0x00, /* placeholder for code size */ - 0x0, /* local variables (none) */ + 0x2, 0x1, 0x7f, 0x2, 0x7e, /* local variables (32bit*1, 64bit*2) */ }; #define MOD_3_PH_EXPORT_START_FUNC_IDX 102 From e54f87bfc2c59e953ae66ba7273b259e440b3605 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Mon, 25 Aug 2025 15:16:27 +0900 Subject: [PATCH 32/35] tcg/wasm: Add tcg_target_init function This commit adds tcg_target_init, aligning it with the Wasm backend's register and stack usage. Signed-off-by: Kohei Tokunaga --- tcg/wasm/tcg-target.c.inc | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 25691307b490a..3bb4864da592d 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -2812,3 +2812,32 @@ static int tcg_out_tb_end(TCGContext *s) return 0; } + +static void tcg_target_init(TCGContext *s) +{ + /* The current code uses uint8_t for tcg operations. */ + tcg_debug_assert(tcg_op_defs_max <= UINT8_MAX); + + /* Registers available for 32 bit operations. */ + tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1; + /* Registers available for 64 bit operations. */ + tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1; + /* + * The TCI "registers" are in the local stack frame and + * cannot be clobbered by the called helper functions. Additionally, Wasm + * modules for a TB and QEMU itself (i.e. helpers) are separated so also + * those variables aren't clobbered by the called helper functions. + * However, the TB assumes a 128-bit return value and assigns to + * the return value registers. + */ + tcg_target_call_clobber_regs = + MAKE_64BIT_MASK(TCG_REG_R0, 128 / TCG_TARGET_REG_BITS); + + s->reserved_regs = 0; + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + + /* The call arguments come first, followed by the temp storage. */ + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + TCG_STATIC_FRAME_SIZE); +} From e7333cfd991ce89b4003417eba0c8bdec927fa0b Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Fri, 22 Aug 2025 15:34:45 +0900 Subject: [PATCH 33/35] meson.build: enable to build Wasm backend Enable to use tcg/wasm as the TCG backend for the WebAssembly (wasm64) build. Signed-off-by: Kohei Tokunaga --- include/accel/tcg/getpc.h | 2 +- include/tcg/helper-info.h | 4 ++-- include/tcg/tcg.h | 2 +- meson.build | 6 ++++-- tcg/meson.build | 5 +++++ tcg/region.c | 10 +++++----- tcg/tcg.c | 14 +++++++------- 7 files changed, 25 insertions(+), 18 deletions(-) diff --git a/include/accel/tcg/getpc.h b/include/accel/tcg/getpc.h index 0fc08addcf321..3901655715338 100644 --- a/include/accel/tcg/getpc.h +++ b/include/accel/tcg/getpc.h @@ -9,7 +9,7 @@ #define ACCEL_TCG_GETPC_H /* GETPC is the true target of the return instruction that we'll execute. */ -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) extern __thread uintptr_t tci_tb_ptr; # define GETPC() tci_tb_ptr #else diff --git a/include/tcg/helper-info.h b/include/tcg/helper-info.h index 909fe73afa359..9b4e8832a8cb1 100644 --- a/include/tcg/helper-info.h +++ b/include/tcg/helper-info.h @@ -9,7 +9,7 @@ #ifndef TCG_HELPER_INFO_H #define TCG_HELPER_INFO_H -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) #include #endif #include "tcg-target-reg-bits.h" @@ -48,7 +48,7 @@ struct TCGHelperInfo { const char *name; /* Used with g_once_init_enter. */ -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) ffi_cif *cif; #else uintptr_t init; diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index a6d9aa50d4731..b91818d982030 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -963,7 +963,7 @@ static inline size_t tcg_current_code_size(TCGContext *s) #define TB_EXIT_IDXMAX 1 #define TB_EXIT_REQUESTED 3 -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) uintptr_t tcg_qemu_tb_exec(CPUArchState *env, const void *tb_ptr); #else typedef uintptr_t tcg_prologue_fn(CPUArchState *env, const void *tb_ptr); diff --git a/meson.build b/meson.build index 5b048ea70f6be..42b9004a20e9d 100644 --- a/meson.build +++ b/meson.build @@ -920,9 +920,9 @@ if have_tcg if not get_option('tcg_interpreter') error('Unsupported CPU @0@, try --enable-tcg-interpreter'.format(cpu)) endif - elif host_arch == 'wasm32' or host_arch == 'wasm64' + elif host_arch == 'wasm32' if not get_option('tcg_interpreter') - error('WebAssembly host requires --enable-tcg-interpreter') + error('wasm32 host requires --enable-tcg-interpreter') endif elif get_option('tcg_interpreter') warning('Use of the TCG interpreter is not recommended on this host') @@ -938,6 +938,8 @@ if have_tcg tcg_arch = 'i386' elif host_arch == 'ppc64' tcg_arch = 'ppc' + elif host_arch == 'wasm64' + tcg_arch = 'wasm' endif add_project_arguments('-iquote', meson.current_source_dir() / 'tcg' / tcg_arch, language: all_languages) diff --git a/tcg/meson.build b/tcg/meson.build index 706a6eb260ec7..1563f4fd30285 100644 --- a/tcg/meson.build +++ b/tcg/meson.build @@ -20,6 +20,11 @@ if get_option('tcg_interpreter') method: 'pkg-config') tcg_ss.add(libffi) tcg_ss.add(files('tci.c')) +elif host_os == 'emscripten' + libffi = dependency('libffi', version: '>=3.0', required: true, + method: 'pkg-config') + specific_ss.add(libffi) + specific_ss.add(files('wasm.c')) endif tcg_ss.add(when: libdw, if_true: files('debuginfo.c')) diff --git a/tcg/region.c b/tcg/region.c index 7ea0b37a84c33..68cb6f18b773c 100644 --- a/tcg/region.c +++ b/tcg/region.c @@ -94,7 +94,7 @@ bool in_code_gen_buffer(const void *p) return (size_t)(p - region.start_aligned) <= region.total_size; } -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) static int host_prot_read_exec(void) { #if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI) @@ -569,7 +569,7 @@ static int alloc_code_gen_buffer_anon(size_t size, int prot, return prot; } -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) #ifdef CONFIG_POSIX #include "qemu/memfd.h" @@ -667,11 +667,11 @@ static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) return PROT_READ | PROT_WRITE; } #endif /* CONFIG_DARWIN */ -#endif /* CONFIG_TCG_INTERPRETER */ +#endif /* !CONFIG_TCG_INTERPRETER && !EMSCRIPTEN */ static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp) { -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) # ifdef CONFIG_DARWIN return alloc_code_gen_buffer_splitwx_vmremap(size, errp); # endif @@ -813,7 +813,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_threads) * Work with the page protections set up with the initial mapping. */ need_prot = PROT_READ | PROT_WRITE; -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) if (tcg_splitwx_diff == 0) { need_prot |= host_prot_read_exec(); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 7d3e7f8cb1dfb..bd8f8e565f34d 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -256,7 +256,7 @@ TCGv_env tcg_env; const void *tcg_code_gen_epilogue; uintptr_t tcg_splitwx_diff; -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) tcg_prologue_fn *tcg_qemu_tb_exec; #endif @@ -1443,7 +1443,7 @@ static TCGHelperInfo info_helper_st128_mmu = { | dh_typemask(ptr, 5) /* uintptr_t ra */ }; -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) static ffi_type *typecode_to_ffi(int argmask) { /* @@ -1520,7 +1520,7 @@ static ffi_cif *init_ffi_layout(TCGHelperInfo *info) #else #define HELPER_INFO_INIT(I) (&(I)->init) #define HELPER_INFO_INIT_VAL(I) 1 -#endif /* CONFIG_TCG_INTERPRETER */ +#endif /* CONFIG_TCG_INTERPRETER || EMSCRIPTEN */ static inline bool arg_slot_reg_p(unsigned arg_slot) { @@ -1897,7 +1897,7 @@ void tcg_prologue_init(void) s->code_buf = s->code_gen_ptr; s->data_gen_ptr = NULL; -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); #endif @@ -1916,7 +1916,7 @@ void tcg_prologue_init(void) prologue_size = tcg_current_code_size(s); perf_report_prologue(s->code_gen_ptr, prologue_size); -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), (uintptr_t)s->code_buf, prologue_size); #endif @@ -1953,7 +1953,7 @@ void tcg_prologue_init(void) } } -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) /* * Assert that goto_ptr is implemented completely, setting an epilogue. * For tci, we use NULL as the signal to return from the interpreter, @@ -7055,7 +7055,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) return i; } -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) /* flush instruction cache */ flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), (uintptr_t)s->code_buf, From bbc6981d708b999145275459b632336808dfbb74 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Sat, 23 Aug 2025 13:09:25 +0900 Subject: [PATCH 34/35] meson.build: Propagate optimization flag for linking on Emscripten Emscripten uses the optimization flag at the link time to enable optimizations via Binaryen [1]. While meson.build currently recognizes the -Doptimization option, it does not propagate it to the linking. This commit updates meson.build to propagate the optimization flag to the linking when targeting WebAssembly. [1] https://emscripten.org/docs/optimizing/Optimizing-Code.html#how-emscripten-optimizes Signed-off-by: Kohei Tokunaga --- meson.build | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/meson.build b/meson.build index 42b9004a20e9d..ffd16fc167930 100644 --- a/meson.build +++ b/meson.build @@ -878,6 +878,12 @@ elif host_os == 'openbsd' # Disable OpenBSD W^X if available emulator_link_args = cc.get_supported_link_arguments('-Wl,-z,wxneeded') endif +elif host_os == 'emscripten' + # Emscripten uses the optimization flag also during the link time. + # https://emscripten.org/docs/optimizing/Optimizing-Code.html#how-emscripten-optimizes + if get_option('optimization') != 'plain' + emulator_link_args += ['-O' + get_option('optimization')] + endif endif ############################################### From cfa10fb5386f161cdf136ec0aa1e0360e429ad47 Mon Sep 17 00:00:00 2001 From: Kohei Tokunaga Date: Sat, 23 Aug 2025 13:10:03 +0900 Subject: [PATCH 35/35] .gitlab-ci.d: build wasm backend in CI This commit adds the build tests for the wasm backend. Signed-off-by: Kohei Tokunaga --- .gitlab-ci.d/buildtest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index a97bb89714f5c..16a3322277f06 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -803,7 +803,7 @@ build-wasm64-64bit: job: wasm64-64bit-emsdk-cross-container variables: IMAGE: emsdk-wasm64-64bit-cross - CONFIGURE_ARGS: --static --cpu=wasm64 --disable-tools --enable-debug --enable-tcg-interpreter + CONFIGURE_ARGS: --static --cpu=wasm64 --disable-tools --enable-debug build-wasm64-32bit: extends: .wasm_build_job_template @@ -812,4 +812,4 @@ build-wasm64-32bit: job: wasm64-32bit-emsdk-cross-container variables: IMAGE: emsdk-wasm64-32bit-cross - CONFIGURE_ARGS: --static --cpu=wasm64 --enable-wasm64-32bit-address-limit --disable-tools --enable-debug --enable-tcg-interpreter + CONFIGURE_ARGS: --static --cpu=wasm64 --enable-wasm64-32bit-address-limit --disable-tools --enable-debug