diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml index d888a60063715..16a3322277f06 100644 --- a/.gitlab-ci.d/buildtest.yml +++ b/.gitlab-ci.d/buildtest.yml @@ -787,11 +787,29 @@ coverity: # Always manual on forks even if $QEMU_CI == "2" - when: manual -build-wasm: +build-wasm32-32bit: extends: .wasm_build_job_template timeout: 2h needs: - job: wasm-emsdk-cross-container + job: wasm32-32bit-emsdk-cross-container variables: - IMAGE: emsdk-wasm32-cross - CONFIGURE_ARGS: --static --disable-tools --enable-debug --enable-tcg-interpreter + IMAGE: emsdk-wasm32-32bit-cross + CONFIGURE_ARGS: --static --cpu=wasm32 --disable-tools --enable-debug --enable-tcg-interpreter + +build-wasm64-64bit: + extends: .wasm_build_job_template + timeout: 2h + needs: + job: wasm64-64bit-emsdk-cross-container + variables: + IMAGE: emsdk-wasm64-64bit-cross + CONFIGURE_ARGS: --static --cpu=wasm64 --disable-tools --enable-debug + +build-wasm64-32bit: + extends: .wasm_build_job_template + timeout: 2h + needs: + job: wasm64-32bit-emsdk-cross-container + variables: + IMAGE: emsdk-wasm64-32bit-cross + CONFIGURE_ARGS: --static --cpu=wasm64 --enable-wasm64-32bit-address-limit --disable-tools --enable-debug diff --git a/.gitlab-ci.d/container-cross.yml b/.gitlab-ci.d/container-cross.yml index 8d3be53b75b23..84c4be49f43fc 100644 --- a/.gitlab-ci.d/container-cross.yml +++ b/.gitlab-ci.d/container-cross.yml @@ -92,7 +92,23 @@ win64-fedora-cross-container: variables: NAME: fedora-win64-cross -wasm-emsdk-cross-container: +wasm32-32bit-emsdk-cross-container: extends: .container_job_template variables: - NAME: emsdk-wasm32-cross + NAME: emsdk-wasm32-32bit-cross + BUILD_ARGS: --build-arg TARGET_CPU=wasm32 + DOCKERFILE: emsdk-wasm-cross + +wasm64-64bit-emsdk-cross-container: + extends: .container_job_template + variables: + NAME: emsdk-wasm64-64bit-cross + BUILD_ARGS: --build-arg TARGET_CPU=wasm64 --build-arg WASM64_MEMORY64=1 + DOCKERFILE: emsdk-wasm-cross + +wasm64-32bit-emsdk-cross-container: + extends: .container_job_template + variables: + NAME: emsdk-wasm64-32bit-cross + BUILD_ARGS: --build-arg TARGET_CPU=wasm64 --build-arg WASM64_MEMORY64=2 + DOCKERFILE: emsdk-wasm-cross diff --git a/.gitlab-ci.d/container-template.yml b/.gitlab-ci.d/container-template.yml index 4eec72f383dd7..01ca8404136fb 100644 --- a/.gitlab-ci.d/container-template.yml +++ b/.gitlab-ci.d/container-template.yml @@ -10,12 +10,14 @@ - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/qemu/$NAME:latest" - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD" - until docker info; do sleep 1; done + - export DOCKERFILE_NAME=${DOCKERFILE:-$NAME} script: - echo "TAG:$TAG" - echo "COMMON_TAG:$COMMON_TAG" - docker build --tag "$TAG" --cache-from "$TAG" --cache-from "$COMMON_TAG" --build-arg BUILDKIT_INLINE_CACHE=1 - -f "tests/docker/dockerfiles/$NAME.docker" "." + $BUILD_ARGS + -f "tests/docker/dockerfiles/$DOCKERFILE_NAME.docker" "." - docker push "$TAG" after_script: - docker logout diff --git a/MAINTAINERS b/MAINTAINERS index a07086ed76213..d528b9ec903b7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -647,7 +647,7 @@ F: include/system/os-wasm.h F: os-wasm.c F: util/coroutine-wasm.c F: configs/meson/emscripten.txt -F: tests/docker/dockerfiles/emsdk-wasm32-cross.docker +F: tests/docker/dockerfiles/emsdk-wasm-cross.docker Alpha Machines -------------- @@ -3999,6 +3999,13 @@ F: tcg/tci/ F: tcg/tci.c F: disas/tci.c +WebAssembly TCG target +M: Kohei Tokunaga +S: Maintained +F: tcg/wasm/ +F: tcg/wasm.c +F: tcg/wasm.h + Block drivers ------------- VMDK diff --git a/configure b/configure index 274a7787642e2..77365e25093a3 100755 --- a/configure +++ b/configure @@ -182,6 +182,10 @@ EXTRA_CXXFLAGS="" EXTRA_OBJCFLAGS="" EXTRA_LDFLAGS="" +# The value is propagated to Emscripten's "-sMEMORY64" flag. +# https://emscripten.org/docs/tools_reference/settings_reference.html#memory64 +wasm64_memory64=1 + # Default value for a variable defining feature "foo". # * foo="no" feature will only be used if --enable-foo arg is given # * foo="" feature will be searched for, and if found, will be used @@ -239,6 +243,10 @@ for opt do ;; --without-default-features) default_feature="no" ;; + --enable-wasm64-32bit-address-limit) wasm64_memory64="2" + ;; + --disable-wasm64-32bit-address-limit) wasm64_memory64="1" + ;; esac done @@ -365,7 +373,6 @@ elif check_define __APPLE__; then host_os=darwin elif check_define EMSCRIPTEN ; then host_os=emscripten - cpu=wasm32 cross_compile="yes" else # This is a fatal error, but don't report it yet, because we @@ -425,6 +432,8 @@ elif check_define __aarch64__ ; then cpu="aarch64" elif check_define __loongarch64 ; then cpu="loongarch64" +elif check_define EMSCRIPTEN ; then + error_exit "wasm32 or wasm64 must be specified to the cpu flag" else # Using uname is really broken, but it is just a fallback for architectures # that are going to use TCI anyway @@ -535,6 +544,9 @@ case "$cpu" in wasm32) CPU_CFLAGS="-m32" ;; + wasm64) + CPU_CFLAGS="-m64 -sMEMORY64=$wasm64_memory64" + ;; esac if test -n "$host_arch" && { diff --git a/include/accel/tcg/getpc.h b/include/accel/tcg/getpc.h index 0fc08addcf321..3901655715338 100644 --- a/include/accel/tcg/getpc.h +++ b/include/accel/tcg/getpc.h @@ -9,7 +9,7 @@ #define ACCEL_TCG_GETPC_H /* GETPC is the true target of the return instruction that we'll execute. */ -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) extern __thread uintptr_t tci_tb_ptr; # define GETPC() tci_tb_ptr #else diff --git a/include/tcg/helper-info.h b/include/tcg/helper-info.h index 909fe73afa359..9b4e8832a8cb1 100644 --- a/include/tcg/helper-info.h +++ b/include/tcg/helper-info.h @@ -9,7 +9,7 @@ #ifndef TCG_HELPER_INFO_H #define TCG_HELPER_INFO_H -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) #include #endif #include "tcg-target-reg-bits.h" @@ -48,7 +48,7 @@ struct TCGHelperInfo { const char *name; /* Used with g_once_init_enter. */ -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) ffi_cif *cif; #else uintptr_t init; diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index a6d9aa50d4731..b91818d982030 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -963,7 +963,7 @@ static inline size_t tcg_current_code_size(TCGContext *s) #define TB_EXIT_IDXMAX 1 #define TB_EXIT_REQUESTED 3 -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) uintptr_t tcg_qemu_tb_exec(CPUArchState *env, const void *tb_ptr); #else typedef uintptr_t tcg_prologue_fn(CPUArchState *env, const void *tb_ptr); diff --git a/meson.build b/meson.build index 50c774a19557a..ffd16fc167930 100644 --- a/meson.build +++ b/meson.build @@ -52,7 +52,7 @@ qapi_trace_events = [] bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin'] supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux', 'emscripten'] supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 'x86_64', - 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64', 'wasm32'] + 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64', 'wasm32', 'wasm64'] cpu = host_machine.cpu_family() @@ -393,6 +393,10 @@ elif host_os == 'windows' if compiler.get_id() == 'clang' and compiler.get_linker_id() != 'ld.lld' error('On windows, you need to use lld with clang - use msys2 clang64/clangarm64 env') endif +elif host_os == 'emscripten' + if cpu == 'wasm64' and get_option('wasm64_32bit_address_limit') + qemu_common_flags += '-DWASM64_MEMORY64_2' + endif endif # Choose instruction set (currently x86-only) @@ -874,6 +878,12 @@ elif host_os == 'openbsd' # Disable OpenBSD W^X if available emulator_link_args = cc.get_supported_link_arguments('-Wl,-z,wxneeded') endif +elif host_os == 'emscripten' + # Emscripten uses the optimization flag also during the link time. + # https://emscripten.org/docs/optimizing/Optimizing-Code.html#how-emscripten-optimizes + if get_option('optimization') != 'plain' + emulator_link_args += ['-O' + get_option('optimization')] + endif endif ############################################### @@ -918,7 +928,7 @@ if have_tcg endif elif host_arch == 'wasm32' if not get_option('tcg_interpreter') - error('WebAssembly host requires --enable-tcg-interpreter') + error('wasm32 host requires --enable-tcg-interpreter') endif elif get_option('tcg_interpreter') warning('Use of the TCG interpreter is not recommended on this host') @@ -934,6 +944,8 @@ if have_tcg tcg_arch = 'i386' elif host_arch == 'ppc64' tcg_arch = 'ppc' + elif host_arch == 'wasm64' + tcg_arch = 'wasm' endif add_project_arguments('-iquote', meson.current_source_dir() / 'tcg' / tcg_arch, language: all_languages) diff --git a/meson_options.txt b/meson_options.txt index fff1521e580de..82771340badf6 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -386,3 +386,6 @@ option('rust', type: 'feature', value: 'disabled', description: 'Rust support') option('strict_rust_lints', type: 'boolean', value: false, description: 'Enable stricter set of Rust warnings') + +option('wasm64_32bit_address_limit', type: 'boolean', value: false, + description: 'Restrict wasm64 address space to 32-bit (default is to use the whole 64-bit range).') diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 0ebe6bc52a6b9..64845aa0b9d05 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -56,6 +56,9 @@ meson_options_help() { printf "%s\n" ' dtrace/ftrace/log/nop/simple/syslog/ust)' printf "%s\n" ' --enable-tsan enable thread sanitizer' printf "%s\n" ' --enable-ubsan enable undefined behaviour sanitizer' + printf "%s\n" ' --enable-wasm64-32bit-address-limit' + printf "%s\n" ' Restrict wasm64 address space to 32-bit (default' + printf "%s\n" ' is to use the whole 64-bit range).' printf "%s\n" ' --firmwarepath=VALUES search PATH for firmware files [share/qemu-' printf "%s\n" ' firmware]' printf "%s\n" ' --iasl=VALUE Path to ACPI disassembler' @@ -571,6 +574,8 @@ _meson_option_parse() { --disable-vte) printf "%s" -Dvte=disabled ;; --enable-vvfat) printf "%s" -Dvvfat=enabled ;; --disable-vvfat) printf "%s" -Dvvfat=disabled ;; + --enable-wasm64-32bit-address-limit) printf "%s" -Dwasm64_32bit_address_limit=true ;; + --disable-wasm64-32bit-address-limit) printf "%s" -Dwasm64_32bit_address_limit=false ;; --enable-werror) printf "%s" -Dwerror=true ;; --disable-werror) printf "%s" -Dwerror=false ;; --enable-whpx) printf "%s" -Dwhpx=enabled ;; diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 3b088b7bd9727..9323161607885 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -3514,6 +3514,17 @@ static void tcg_out_tb_start(TCGContext *s) tcg_out_bti(s, BTI_J); } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 836894b16ade7..bd8428491aa99 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -3441,6 +3441,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + typedef struct { DebugFrameHeader h; uint8_t fde_def_cfa[4]; diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 088c6c9264b01..cf8b50e162b07 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -4759,6 +4759,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { memset(p, 0x90, count); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 10c69211ac5ba..75f6a97b2b39d 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -2658,6 +2658,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { for (int i = 0; i < count; ++i) { diff --git a/tcg/meson.build b/tcg/meson.build index 706a6eb260ec7..1563f4fd30285 100644 --- a/tcg/meson.build +++ b/tcg/meson.build @@ -20,6 +20,11 @@ if get_option('tcg_interpreter') method: 'pkg-config') tcg_ss.add(libffi) tcg_ss.add(files('tci.c')) +elif host_os == 'emscripten' + libffi = dependency('libffi', version: '>=3.0', required: true, + method: 'pkg-config') + specific_ss.add(libffi) + specific_ss.add(files('wasm.c')) endif tcg_ss.add(when: libdw, if_true: files('debuginfo.c')) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 400eafbab4b6f..d1241912ac3d7 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2745,6 +2745,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_target_init(TCGContext *s) { tcg_target_detect_isa(); diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index b8b23d44d5e2d..20cc2594b8392 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2859,6 +2859,17 @@ static void tcg_out_tb_start(TCGContext *s) } } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); diff --git a/tcg/region.c b/tcg/region.c index 7ea0b37a84c33..68cb6f18b773c 100644 --- a/tcg/region.c +++ b/tcg/region.c @@ -94,7 +94,7 @@ bool in_code_gen_buffer(const void *p) return (size_t)(p - region.start_aligned) <= region.total_size; } -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) static int host_prot_read_exec(void) { #if defined(CONFIG_LINUX) && defined(HOST_AARCH64) && defined(PROT_BTI) @@ -569,7 +569,7 @@ static int alloc_code_gen_buffer_anon(size_t size, int prot, return prot; } -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) #ifdef CONFIG_POSIX #include "qemu/memfd.h" @@ -667,11 +667,11 @@ static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp) return PROT_READ | PROT_WRITE; } #endif /* CONFIG_DARWIN */ -#endif /* CONFIG_TCG_INTERPRETER */ +#endif /* !CONFIG_TCG_INTERPRETER && !EMSCRIPTEN */ static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp) { -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) # ifdef CONFIG_DARWIN return alloc_code_gen_buffer_splitwx_vmremap(size, errp); # endif @@ -813,7 +813,7 @@ void tcg_region_init(size_t tb_size, int splitwx, unsigned max_threads) * Work with the page protections set up with the initial mapping. */ need_prot = PROT_READ | PROT_WRITE; -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) if (tcg_splitwx_diff == 0) { need_prot |= host_prot_read_exec(); } diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 31b9f7d87a046..63e7438291e12 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -2983,6 +2983,17 @@ static void tcg_out_tb_start(TCGContext *s) init_setting_vtype(s); } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static bool vtype_check(unsigned vtype) { unsigned long tmp; diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 84a9e73a46e90..457e568d30f49 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3830,6 +3830,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { memset(p, 0x07, count * sizeof(tcg_insn_unit)); diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 5e5c3f1cda5e3..ae695b115b1aa 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1017,6 +1017,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; diff --git a/tcg/tcg.c b/tcg/tcg.c index afac55a203ab0..bd8f8e565f34d 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -116,6 +116,7 @@ static void tcg_register_jit_int(const void *buf, size_t size, /* Forward declarations for functions declared and used in tcg-target.c.inc. */ static void tcg_out_tb_start(TCGContext *s); +static int tcg_out_tb_end(TCGContext *s); static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, intptr_t arg2); static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); @@ -187,6 +188,7 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot); static bool tcg_target_const_match(int64_t val, int ct, TCGType type, TCGCond cond, int vece); +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l); #ifndef CONFIG_USER_ONLY #define guest_base ({ qemu_build_not_reached(); (uintptr_t)0; }) @@ -254,7 +256,7 @@ TCGv_env tcg_env; const void *tcg_code_gen_epilogue; uintptr_t tcg_splitwx_diff; -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) tcg_prologue_fn *tcg_qemu_tb_exec; #endif @@ -361,6 +363,7 @@ static void tcg_out_label(TCGContext *s, TCGLabel *l) tcg_debug_assert(!l->has_value); l->has_value = 1; l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr); + tcg_out_label_cb(s, l); } TCGLabel *gen_new_label(void) @@ -1440,7 +1443,7 @@ static TCGHelperInfo info_helper_st128_mmu = { | dh_typemask(ptr, 5) /* uintptr_t ra */ }; -#ifdef CONFIG_TCG_INTERPRETER +#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN) static ffi_type *typecode_to_ffi(int argmask) { /* @@ -1517,7 +1520,7 @@ static ffi_cif *init_ffi_layout(TCGHelperInfo *info) #else #define HELPER_INFO_INIT(I) (&(I)->init) #define HELPER_INFO_INIT_VAL(I) 1 -#endif /* CONFIG_TCG_INTERPRETER */ +#endif /* CONFIG_TCG_INTERPRETER || EMSCRIPTEN */ static inline bool arg_slot_reg_p(unsigned arg_slot) { @@ -1894,7 +1897,7 @@ void tcg_prologue_init(void) s->code_buf = s->code_gen_ptr; s->data_gen_ptr = NULL; -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr); #endif @@ -1913,7 +1916,7 @@ void tcg_prologue_init(void) prologue_size = tcg_current_code_size(s); perf_report_prologue(s->code_gen_ptr, prologue_size); -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), (uintptr_t)s->code_buf, prologue_size); #endif @@ -1950,7 +1953,7 @@ void tcg_prologue_init(void) } } -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) /* * Assert that goto_ptr is implemented completely, setting an epilogue. * For tci, we use NULL as the signal to return from the interpreter, @@ -7047,8 +7050,12 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start) if (!tcg_resolve_relocs(s)) { return -2; } + i = tcg_out_tb_end(s); + if (i < 0) { + return i; + } -#ifndef CONFIG_TCG_INTERPRETER +#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN) /* flush instruction cache */ flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf), (uintptr_t)s->code_buf, diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 35c66a48369da..d99d06c1da7e8 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -1301,6 +1301,17 @@ static void tcg_out_tb_start(TCGContext *s) /* nothing to do */ } +static int tcg_out_tb_end(TCGContext *s) +{ + /* nothing to do */ + return 0; +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + /* nothing to do */ +} + bool tcg_target_has_memory_bswap(MemOp memop) { return true; diff --git a/tcg/wasm.c b/tcg/wasm.c new file mode 100644 index 0000000000000..00f422218360a --- /dev/null +++ b/tcg/wasm.c @@ -0,0 +1,855 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * WebAssembly backend with forked TCI, based on tci.c + * + * Copyright (c) 2009, 2011, 2016 Stefan Weil + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "tcg/tcg.h" +#include "tcg/tcg-ldst.h" +#include "tcg/helper-info.h" +#include +#include +#include "wasm.h" + +/* TBs executed more than this value will be compiled to wasm */ +#define INSTANTIATE_NUM 1500 + +#define EM_JS_PRE(ret, name, args, body...) EM_JS(ret, name, args, body) + +#define DEC_PTR(p) bigintToI53Checked(p) +#define ENC_PTR(p) BigInt(p) +#if defined(WASM64_MEMORY64_2) +#define ENC_WASM_TABLE_IDX(i) Number(i) +#else +#define ENC_WASM_TABLE_IDX(i) i +#endif + +EM_JS_PRE(void*, instantiate_wasm, (void *wasm_begin, + int wasm_size, + void *import_vec_begin, + int import_vec_size), +{ + const memory_v = new DataView(HEAP8.buffer); + const wasm = HEAP8.subarray(DEC_PTR(wasm_begin), + DEC_PTR(wasm_begin) + wasm_size); + var helper = {}; + helper.u = () => { + return (Asyncify.state != Asyncify.State.Unwinding) ? 1 : 0; + }; + const entsize = TCG_TARGET_REG_BITS / 8; + for (var i = 0; i < import_vec_size / entsize; i++) { + const idx = memory_v.getBigInt64( + DEC_PTR(import_vec_begin) + i * entsize, true); + helper[i] = wasmTable.get(ENC_WASM_TABLE_IDX(idx)); + } + const mod = new WebAssembly.Module(new Uint8Array(wasm)); + const inst = new WebAssembly.Instance(mod, { + "env" : { + "memory" : wasmMemory, + }, + "helper" : helper, + }); + + Module.__wasm_tb.inst_gc_registry.register(inst, "tbinstance"); + + return ENC_PTR(addFunction(inst.exports.start, 'ii')); +}); + +__thread uintptr_t tci_tb_ptr; + +static void tci_args_l(uint32_t insn, const void *tb_ptr, void **l0) +{ + int diff = sextract32(insn, 12, 20); + *l0 = diff ? (void *)tb_ptr + diff : NULL; +} + +static void tci_args_r(uint32_t insn, TCGReg *r0) +{ + *r0 = extract32(insn, 8, 4); +} + +static void tci_args_nl(uint32_t insn, const void *tb_ptr, + uint8_t *n0, void **l1) +{ + *n0 = extract32(insn, 8, 4); + *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr; +} + +static void tci_args_rl(uint32_t insn, const void *tb_ptr, + TCGReg *r0, void **l1) +{ + *r0 = extract32(insn, 8, 4); + *l1 = sextract32(insn, 12, 20) + (void *)tb_ptr; +} + +static void tci_args_rr(uint32_t insn, TCGReg *r0, TCGReg *r1) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); +} + +static void tci_args_ri(uint32_t insn, TCGReg *r0, tcg_target_ulong *i1) +{ + *r0 = extract32(insn, 8, 4); + *i1 = sextract32(insn, 12, 20); +} + +static void tci_args_rrm(uint32_t insn, TCGReg *r0, + TCGReg *r1, MemOpIdx *m2) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *m2 = extract32(insn, 16, 16); +} + +static void tci_args_rrr(uint32_t insn, TCGReg *r0, TCGReg *r1, TCGReg *r2) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); +} + +static void tci_args_rrs(uint32_t insn, TCGReg *r0, TCGReg *r1, int32_t *i2) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *i2 = sextract32(insn, 16, 16); +} + +static void tci_args_rrbb(uint32_t insn, TCGReg *r0, TCGReg *r1, + uint8_t *i2, uint8_t *i3) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *i2 = extract32(insn, 16, 6); + *i3 = extract32(insn, 22, 6); +} + +static void tci_args_rrrc(uint32_t insn, + TCGReg *r0, TCGReg *r1, TCGReg *r2, TCGCond *c3) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *c3 = extract32(insn, 20, 4); +} + +static void tci_args_rrrrrc(uint32_t insn, TCGReg *r0, TCGReg *r1, + TCGReg *r2, TCGReg *r3, TCGReg *r4, TCGCond *c5) +{ + *r0 = extract32(insn, 8, 4); + *r1 = extract32(insn, 12, 4); + *r2 = extract32(insn, 16, 4); + *r3 = extract32(insn, 20, 4); + *r4 = extract32(insn, 24, 4); + *c5 = extract32(insn, 28, 4); +} + +static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition) +{ + bool result = false; + int32_t i0 = u0; + int32_t i1 = u1; + switch (condition) { + case TCG_COND_EQ: + result = (u0 == u1); + break; + case TCG_COND_NE: + result = (u0 != u1); + break; + case TCG_COND_LT: + result = (i0 < i1); + break; + case TCG_COND_GE: + result = (i0 >= i1); + break; + case TCG_COND_LE: + result = (i0 <= i1); + break; + case TCG_COND_GT: + result = (i0 > i1); + break; + case TCG_COND_LTU: + result = (u0 < u1); + break; + case TCG_COND_GEU: + result = (u0 >= u1); + break; + case TCG_COND_LEU: + result = (u0 <= u1); + break; + case TCG_COND_GTU: + result = (u0 > u1); + break; + default: + g_assert_not_reached(); + } + return result; +} + +static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition) +{ + bool result = false; + int64_t i0 = u0; + int64_t i1 = u1; + switch (condition) { + case TCG_COND_EQ: + result = (u0 == u1); + break; + case TCG_COND_NE: + result = (u0 != u1); + break; + case TCG_COND_LT: + result = (i0 < i1); + break; + case TCG_COND_GE: + result = (i0 >= i1); + break; + case TCG_COND_LE: + result = (i0 <= i1); + break; + case TCG_COND_GT: + result = (i0 > i1); + break; + case TCG_COND_LTU: + result = (u0 < u1); + break; + case TCG_COND_GEU: + result = (u0 >= u1); + break; + case TCG_COND_LEU: + result = (u0 <= u1); + break; + case TCG_COND_GTU: + result = (u0 > u1); + break; + default: + g_assert_not_reached(); + } + return result; +} + +static uint64_t tci_qemu_ld(CPUArchState *env, uint64_t taddr, + MemOpIdx oi, const void *tb_ptr) +{ + MemOp mop = get_memop(oi); + uintptr_t ra = (uintptr_t)tb_ptr; + + switch (mop & MO_SSIZE) { + case MO_UB: + return helper_ldub_mmu(env, taddr, oi, ra); + case MO_SB: + return helper_ldsb_mmu(env, taddr, oi, ra); + case MO_UW: + return helper_lduw_mmu(env, taddr, oi, ra); + case MO_SW: + return helper_ldsw_mmu(env, taddr, oi, ra); + case MO_UL: + return helper_ldul_mmu(env, taddr, oi, ra); + case MO_SL: + return helper_ldsl_mmu(env, taddr, oi, ra); + case MO_UQ: + return helper_ldq_mmu(env, taddr, oi, ra); + default: + g_assert_not_reached(); + } +} + +static void tci_qemu_st(CPUArchState *env, uint64_t taddr, uint64_t val, + MemOpIdx oi, const void *tb_ptr) +{ + MemOp mop = get_memop(oi); + uintptr_t ra = (uintptr_t)tb_ptr; + + switch (mop & MO_SIZE) { + case MO_UB: + helper_stb_mmu(env, taddr, val, oi, ra); + break; + case MO_UW: + helper_stw_mmu(env, taddr, val, oi, ra); + break; + case MO_UL: + helper_stl_mmu(env, taddr, val, oi, ra); + break; + case MO_UQ: + helper_stq_mmu(env, taddr, val, oi, ra); + break; + default: + g_assert_not_reached(); + } +} + +static __thread int thread_idx; + +static inline int32_t get_counter_local(void *tb_ptr) +{ + return get_counter(tb_ptr, thread_idx); +} + +static inline void set_counter_local(void *tb_ptr, int v) +{ + set_counter(tb_ptr, thread_idx, v); +} + +static inline struct WasmInstanceInfo *get_info_local(void *tb_ptr) +{ + return get_info(tb_ptr, thread_idx); +} + +static inline void set_info_local(void *tb_ptr, struct WasmInstanceInfo *info) +{ + set_info(tb_ptr, thread_idx, info); +} + +/* + * inc_counter increments the execution counter in the specified TB. + * If the counter reaches the limit, it returns true otherwise returns false. + */ +static inline bool inc_counter(void *tb_ptr) +{ + int32_t counter = get_counter_local(tb_ptr); + if ((counter >= 0) && (counter < INSTANTIATE_NUM)) { + set_counter_local(tb_ptr, counter + 1); + } else { + return true; /* enter to wasm TB */ + } + return false; +} + +static __thread struct WasmContext ctx = { + .tb_ptr = 0, + .stack = NULL, + .do_init = 1, + .buf128 = NULL, +}; + +static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env) +{ + uint32_t *tb_ptr = get_tci_ptr(ctx.tb_ptr); + tcg_target_ulong regs[TCG_TARGET_NB_REGS]; + uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) + / sizeof(uint64_t)]; + + regs[TCG_AREG0] = (tcg_target_ulong)env; + regs[TCG_REG_CALL_STACK] = (uintptr_t)stack; + + for (;;) { + uint32_t insn; + TCGOpcode opc; + TCGReg r0, r1, r2, r3, r4; + tcg_target_ulong t1; + uint8_t pos, len; + TCGCond condition; + uint32_t tmp32; + uint64_t taddr; + MemOpIdx oi; + int32_t ofs; + void *ptr; + + insn = *tb_ptr++; + opc = extract32(insn, 0, 8); + + switch (opc) { + case INDEX_op_call: + { + void *call_slots[MAX_CALL_IARGS]; + ffi_cif *cif; + void *func; + unsigned i, s, n; + + tci_args_nl(insn, tb_ptr, &len, &ptr); + func = ((void **)ptr)[0]; + cif = ((void **)ptr)[1]; + + n = cif->nargs; + for (i = s = 0; i < n; ++i) { + ffi_type *t = cif->arg_types[i]; + call_slots[i] = &stack[s]; + s += DIV_ROUND_UP(t->size, 8); + } + + /* Helper functions may need to access the "return address" */ + tci_tb_ptr = (uintptr_t)tb_ptr; + ffi_call(cif, func, stack, call_slots); + } + + switch (len) { + case 0: /* void */ + break; + case 1: /* uint32_t */ + /* + * The result winds up "left-aligned" in the stack[0] slot. + * Note that libffi has an odd special case in that it will + * always widen an integral result to ffi_arg. + */ + if (sizeof(ffi_arg) == 8) { + regs[TCG_REG_R0] = (uint32_t)stack[0]; + } else { + regs[TCG_REG_R0] = *(uint32_t *)stack; + } + break; + case 2: /* uint64_t */ + /* + * For TCG_TARGET_REG_BITS == 32, the register pair + * must stay in host memory order. + */ + memcpy(®s[TCG_REG_R0], stack, 8); + break; + case 3: /* Int128 */ + memcpy(®s[TCG_REG_R0], stack, 16); + break; + default: + g_assert_not_reached(); + } + break; + case INDEX_op_and: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] & regs[r2]; + break; + case INDEX_op_or: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] | regs[r2]; + break; + case INDEX_op_xor: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ^ regs[r2]; + break; + case INDEX_op_add: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] + regs[r2]; + break; + case INDEX_op_sub: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] - regs[r2]; + break; + case INDEX_op_mul: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] * regs[r2]; + break; + case INDEX_op_extract: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + regs[r0] = extract64(regs[r1], pos, len); + break; + case INDEX_op_sextract: + tci_args_rrbb(insn, &r0, &r1, &pos, &len); + regs[r0] = sextract64(regs[r1], pos, len); + break; + case INDEX_op_shl: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] << (regs[r2] % TCG_TARGET_REG_BITS); + break; + case INDEX_op_shr: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] >> (regs[r2] % TCG_TARGET_REG_BITS); + break; + case INDEX_op_sar: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ((tcg_target_long)regs[r1] + >> (regs[r2] % TCG_TARGET_REG_BITS)); + break; + case INDEX_op_neg: + tci_args_rr(insn, &r0, &r1); + regs[r0] = -regs[r1]; + break; + case INDEX_op_setcond: + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); + regs[r0] = tci_compare64(regs[r1], regs[r2], condition); + break; + case INDEX_op_movcond: + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); + tmp32 = tci_compare64(regs[r1], regs[r2], condition); + regs[r0] = regs[tmp32 ? r3 : r4]; + break; + case INDEX_op_tci_setcond32: + tci_args_rrrc(insn, &r0, &r1, &r2, &condition); + regs[r0] = tci_compare32(regs[r1], regs[r2], condition); + break; + case INDEX_op_tci_movcond32: + tci_args_rrrrrc(insn, &r0, &r1, &r2, &r3, &r4, &condition); + tmp32 = tci_compare32(regs[r1], regs[r2], condition); + regs[r0] = regs[tmp32 ? r3 : r4]; + break; + case INDEX_op_mov: + tci_args_rr(insn, &r0, &r1); + regs[r0] = regs[r1]; + break; + case INDEX_op_tci_movi: + tci_args_ri(insn, &r0, &t1); + regs[r0] = t1; + break; + case INDEX_op_tci_movl: + tci_args_rl(insn, tb_ptr, &r0, &ptr); + regs[r0] = *(tcg_target_ulong *)ptr; + break; + case INDEX_op_ld: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(tcg_target_ulong *)ptr; + break; + case INDEX_op_ld8u: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(uint8_t *)ptr; + break; + case INDEX_op_ld8s: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(int8_t *)ptr; + break; + case INDEX_op_ld16u: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(uint16_t *)ptr; + break; + case INDEX_op_ld16s: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(int16_t *)ptr; + break; + case INDEX_op_st: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + *(tcg_target_ulong *)ptr = regs[r0]; + break; + case INDEX_op_st8: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + *(uint8_t *)ptr = regs[r0]; + break; + case INDEX_op_st16: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + *(uint16_t *)ptr = regs[r0]; + break; + case INDEX_op_ld32u: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(uint32_t *)ptr; + break; + case INDEX_op_ld32s: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + regs[r0] = *(int32_t *)ptr; + break; + case INDEX_op_st32: + tci_args_rrs(insn, &r0, &r1, &ofs); + ptr = (void *)(regs[r1] + ofs); + *(uint32_t *)ptr = regs[r0]; + break; + case INDEX_op_divs: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (int64_t)regs[r1] / (int64_t)regs[r2]; + break; + case INDEX_op_divu: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (uint64_t)regs[r1] / (uint64_t)regs[r2]; + break; + case INDEX_op_rems: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (int64_t)regs[r1] % (int64_t)regs[r2]; + break; + case INDEX_op_remu: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (uint64_t)regs[r1] % (uint64_t)regs[r2]; + break; + case INDEX_op_tci_divs32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (int32_t)regs[r1] / (int32_t)regs[r2]; + break; + case INDEX_op_tci_divu32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (uint32_t)regs[r1] / (uint32_t)regs[r2]; + break; + case INDEX_op_tci_rems32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (int32_t)regs[r1] % (int32_t)regs[r2]; + break; + case INDEX_op_tci_remu32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = (uint32_t)regs[r1] % (uint32_t)regs[r2]; + break; + case INDEX_op_ctpop: + tci_args_rr(insn, &r0, &r1); + regs[r0] = ctpop64(regs[r1]); + break; + case INDEX_op_clz: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; + break; + case INDEX_op_ctz: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; + break; + case INDEX_op_tci_clz32: + tci_args_rrr(insn, &r0, &r1, &r2); + tmp32 = regs[r1]; + regs[r0] = tmp32 ? clz32(tmp32) : regs[r2]; + break; + case INDEX_op_tci_ctz32: + tci_args_rrr(insn, &r0, &r1, &r2); + tmp32 = regs[r1]; + regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; + break; + case INDEX_op_rotl: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = rol64(regs[r1], regs[r2] & 63); + break; + case INDEX_op_rotr: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ror64(regs[r1], regs[r2] & 63); + break; + case INDEX_op_tci_rotl32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = rol32(regs[r1], regs[r2] & 31); + break; + case INDEX_op_tci_rotr32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ror32(regs[r1], regs[r2] & 31); + break; + case INDEX_op_br: + tci_args_l(insn, tb_ptr, &ptr); + tb_ptr = ptr; + continue; + case INDEX_op_brcond: + tci_args_rl(insn, tb_ptr, &r0, &ptr); + if (regs[r0]) { + tb_ptr = ptr; + } + break; + case INDEX_op_exit_tb: + tci_args_l(insn, tb_ptr, &ptr); + ctx.tb_ptr = 0; + return (uintptr_t)ptr; + case INDEX_op_goto_tb: + tci_args_l(insn, tb_ptr, &ptr); + if (tb_ptr != *(void **)ptr) { + tb_ptr = *(void **)ptr; + ctx.tb_ptr = tb_ptr; + if (inc_counter(tb_ptr)) { + return 0; /* enter to wasm TB */ + } + tb_ptr = get_tci_ptr(tb_ptr); + } + break; + case INDEX_op_goto_ptr: + tci_args_r(insn, &r0); + ptr = (void *)regs[r0]; + if (!ptr) { + ctx.tb_ptr = 0; + return 0; + } + tb_ptr = ptr; + ctx.tb_ptr = tb_ptr; + if (inc_counter(tb_ptr)) { + return 0; /* enter to wasm TB */ + } + tb_ptr = get_tci_ptr(tb_ptr); + break; + case INDEX_op_qemu_ld: + tci_args_rrm(insn, &r0, &r1, &oi); + taddr = regs[r1]; + regs[r0] = tci_qemu_ld(env, taddr, oi, tb_ptr); + break; + case INDEX_op_qemu_st: + tci_args_rrm(insn, &r0, &r1, &oi); + taddr = regs[r1]; + tci_qemu_st(env, taddr, regs[r0], oi, tb_ptr); + break; + case INDEX_op_mb: + /* Ensure ordering for all kinds */ + smp_mb(); + break; + default: + g_assert_not_reached(); + } + } +} + +/* + * The maximum number of instances that can exist simultaneously + * + * If this limit is reached and a new instance is required, older instances are + * removed to allow creation of new ones without exceeding the browser's limit. + */ +#define MAX_INSTANCES 12000 + +static int instances_global; + +/* Avoid overwrapping of begin/end pointers */ +#define INSTANCES_BUF_MAX (MAX_INSTANCES + 1) + +static __thread struct WasmInstanceInfo instances[INSTANCES_BUF_MAX]; +static __thread int instances_begin; +static __thread int instances_end; + +static void add_instance(wasm_tb_func tb_func, void *tb_ptr) +{ + instances[instances_end].tb_func = tb_func; + instances[instances_end].tb_ptr = tb_ptr; + set_info_local(tb_ptr, &(instances[instances_end])); + instances_end = (instances_end + 1) % INSTANCES_BUF_MAX; + + qatomic_inc(&instances_global); +} + +static __thread int instance_pending_gc; +static __thread int instance_done_gc; + +static void remove_old_instances(void) +{ + int num; + if (instance_pending_gc > 0) { + return; + } + if (instances_begin <= instances_end) { + num = instances_end - instances_begin; + } else { + num = instances_end + (INSTANCES_BUF_MAX - instances_begin); + } + /* removes the half of the oldest instances in the buffer */ + num /= 2; + for (int i = 0; i < num; i++) { + EM_ASM({ removeFunction($0); }, instances[instances_begin].tb_func); + instances[instances_begin].tb_ptr = NULL; + instances_begin = (instances_begin + 1) % INSTANCES_BUF_MAX; + } + instance_pending_gc += num; +} + +static bool can_add_instance(void) +{ + return qatomic_read(&instances_global) < MAX_INSTANCES; +} + +static wasm_tb_func get_instance_from_tb(void *tb_ptr) +{ + struct WasmInstanceInfo *elm = get_info_local(tb_ptr); + if (elm == NULL) { + return NULL; + } + if (elm->tb_ptr != tb_ptr) { + /* + * This TB was instantiated before, but has been removed. Set counter to + * the max value so that this will be instantiated. + */ + set_counter_local(tb_ptr, INSTANTIATE_NUM); + set_info_local(tb_ptr, NULL); + return NULL; + } + return elm->tb_func; +} + +static void check_gc_completion(void) +{ + if (instance_done_gc > 0) { + qatomic_sub(&instances_global, instance_done_gc); + instance_pending_gc -= instance_done_gc; + instance_done_gc = 0; + } +} + +EM_JS_PRE(void, init_wasm_js, (void *instance_done_gc), +{ + Module.__wasm_tb = { + inst_gc_registry: new FinalizationRegistry((i) => { + if (i == "tbinstance") { + const memory_v = new DataView(HEAP8.buffer); + let v = memory_v.getInt32(DEC_PTR(instance_done_gc), true); + memory_v.setInt32(DEC_PTR(instance_done_gc), v + 1, true); + } + }) + }; +}); + +#define MAX_EXEC_NUM 50000 +static __thread int exec_cnt = MAX_EXEC_NUM; +static inline void trysleep(void) +{ + /* + * Even during running TBs continuously, try to return the control + * to the browser periodically and allow browsers doing tasks. + */ + if (--exec_cnt == 0) { + if (!can_add_instance()) { + emscripten_sleep(0); + check_gc_completion(); + } + exec_cnt = MAX_EXEC_NUM; + } +} + +static int thread_idx_max; + +static void init_wasm(void) +{ + thread_idx = qatomic_fetch_inc(&thread_idx_max); + ctx.stack = g_malloc(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE); + ctx.buf128 = g_malloc(16); + ctx.tci_tb_ptr = (uint32_t *)&tci_tb_ptr; + init_wasm_js(&instance_done_gc); +} + +static __thread bool initdone; + +uintptr_t tcg_qemu_tb_exec(CPUArchState *env, const void *v_tb_ptr) +{ + if (!initdone) { + init_wasm(); + initdone = true; + } + ctx.env = env; + ctx.tb_ptr = (void *)v_tb_ptr; + while (true) { + trysleep(); + uintptr_t res; + wasm_tb_func tb_func = get_instance_from_tb(ctx.tb_ptr); + if (tb_func) { + /* + * Call the Wasm instance + */ + res = call_wasm_tb(tb_func, &ctx); + } else if (!inc_counter(ctx.tb_ptr)) { + /* + * Run it on TCI because the counter value is small + */ + res = tcg_qemu_tb_exec_tci(env); + } else if (!can_add_instance()) { + /* + * Too many instances has been created, try removing older + * instances and keep running this TB on TCI + */ + remove_old_instances(); + check_gc_completion(); + res = tcg_qemu_tb_exec_tci(env); + } else { + /* + * Instantiate and run the Wasm module + */ + struct WasmTBHeader *header = (struct WasmTBHeader *)ctx.tb_ptr; + tb_func = (wasm_tb_func)instantiate_wasm(header->wasm_ptr, + header->wasm_size, + header->import_ptr, + header->import_size); + add_instance(tb_func, ctx.tb_ptr); + res = call_wasm_tb(tb_func, &ctx); + } + if (!ctx.tb_ptr) { + return res; + } + } +} diff --git a/tcg/wasm.h b/tcg/wasm.h new file mode 100644 index 0000000000000..88163c28df0d5 --- /dev/null +++ b/tcg/wasm.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef TCG_WASM_H +#define TCG_WASM_H + +/* + * WasmContext is a data shared among QEMU and wasm modules. + */ +struct WasmContext { + /* + * Pointer to the TB to be executed. + */ + void *tb_ptr; + + /* + * Pointer to the tci_tb_ptr variable. + */ + void *tci_tb_ptr; + + /* + * Buffer to store 128bit return value on call. + */ + void *buf128; + + /* + * Pointer to the CPUArchState struct. + */ + CPUArchState *env; + + /* + * Pointer to a stack array. + */ + uint64_t *stack; + + /* + * Flag indicating whether to initialize the block index(1) or not(0). + */ + uint32_t do_init; +}; + +/* Instantiated Wasm function of a TB */ +typedef uintptr_t (*wasm_tb_func)(struct WasmContext *); + +static inline uintptr_t call_wasm_tb(wasm_tb_func f, struct WasmContext *ctx) +{ + ctx->do_init = 1; /* reset the block index (rewinding will skip this) */ + return f(ctx); +} + +/* + * WasmInstanceInfo holds the relationship between TB and Wasm instance. + */ +struct WasmInstanceInfo { + void *tb_ptr; + wasm_tb_func tb_func; +}; + +/* + * A TB of the Wasm backend starts from a header which contains pointers for + * each data stored in the following region in the TB. + */ +struct WasmTBHeader { + /* + * Pointer to the region containing TCI instructions. + */ + void *tci_ptr; + + /* + * Pointer to the region containing Wasm instructions. + */ + void *wasm_ptr; + int wasm_size; + + /* + * Pointer to the array containing imported function pointers. + */ + void *import_ptr; + int import_size; + + /* + * Counter holds how many times the TB is executed before the instantiation + * for each thread. + */ + int32_t *counter_ptr; + + /* + * Pointer to the instance information on each thread. + */ + struct WasmInstanceInfo **info_ptr; +}; + +static inline void *get_tci_ptr(void *tb_ptr) +{ + return ((struct WasmTBHeader *)tb_ptr)->tci_ptr; +} + +static inline int32_t get_counter(void *tb_ptr, int idx) +{ + return ((struct WasmTBHeader *)tb_ptr)->counter_ptr[idx]; +} + +static inline void set_counter(void *tb_ptr, int idx, int v) +{ + ((struct WasmTBHeader *)tb_ptr)->counter_ptr[idx] = v; +} + +static inline struct WasmInstanceInfo *get_info(void *tb_ptr, int idx) +{ + return ((struct WasmTBHeader *)tb_ptr)->info_ptr[idx]; +} + +static inline void set_info(void *tb_ptr, int idx, + struct WasmInstanceInfo *info) +{ + ((struct WasmTBHeader *)tb_ptr)->info_ptr[idx] = info; +} + +#endif diff --git a/tcg/wasm/tcg-target-con-set.h b/tcg/wasm/tcg-target-con-set.h new file mode 100644 index 0000000000000..0dc41ebe33f50 --- /dev/null +++ b/tcg/wasm/tcg-target-con-set.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Wasm target-specific constraint sets. + * + * Based on tci/tcg-target-con-set.h + * + * Copyright (c) 2021 Linaro + */ + +/* + * C_On_Im(...) defines a constraint set with outputs and inputs. + * Each operand should be a sequence of constraint letters as defined by + * tcg-target-con-str.h; the constraint combination is inclusive or. + */ +C_O0_I1(r) +C_O0_I2(r, r) +C_O1_I1(r, r) +C_O1_I2(r, r, r) +C_O1_I4(r, r, r, r, r) diff --git a/tcg/wasm/tcg-target-con-str.h b/tcg/wasm/tcg-target-con-str.h new file mode 100644 index 0000000000000..21ddbcc01a9d3 --- /dev/null +++ b/tcg/wasm/tcg-target-con-str.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Define Wasm target-specific operand constraints. + * + * Based on tci/tcg-target-con-str.h + * + * Copyright (c) 2021 Linaro + */ + +/* + * Define constraint letters for register sets: + * REGS(letter, register_mask) + */ +REGS('r', MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) diff --git a/tcg/wasm/tcg-target-has.h b/tcg/wasm/tcg-target-has.h new file mode 100644 index 0000000000000..8fe9b4540318a --- /dev/null +++ b/tcg/wasm/tcg-target-has.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef TCG_TARGET_HAS_H +#define TCG_TARGET_HAS_H + +#define TCG_TARGET_HAS_tst 0 +#define TCG_TARGET_HAS_extr_i64_i32 0 +#define TCG_TARGET_HAS_qemu_ldst_i128 0 + +#define TCG_TARGET_extract_valid(type, ofs, len) 0 +#define TCG_TARGET_sextract_valid(type, ofs, len) \ + ((ofs == 0) && ((len == 8) || (len == 16) || (len == 32))) +#define TCG_TARGET_deposit_valid(type, ofs, len) 0 + +#endif diff --git a/tcg/wasm/tcg-target-mo.h b/tcg/wasm/tcg-target-mo.h new file mode 100644 index 0000000000000..525f7022931ce --- /dev/null +++ b/tcg/wasm/tcg-target-mo.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Define target-specific memory model + * + * Based on tci/tcg-target-mo.h + * + * Copyright (c) 2009, 2011 Stefan Weil + */ + +#ifndef TCG_TARGET_MO_H +#define TCG_TARGET_MO_H + +/* + * We could notice __i386__ or __s390x__ and reduce the barriers depending + * on the host. But if you want performance, you use the normal backend. + * We prefer consistency across hosts on this. + */ +#define TCG_TARGET_DEFAULT_MO 0 + +#endif diff --git a/tcg/wasm/tcg-target-opc.h.inc b/tcg/wasm/tcg-target-opc.h.inc new file mode 100644 index 0000000000000..092a5086ecde4 --- /dev/null +++ b/tcg/wasm/tcg-target-opc.h.inc @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Based on tci/tcg-target-opc.h.inc + * + * These opcodes for use between the tci generator and interpreter. + */ +DEF(tci_movi, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_movl, 1, 0, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_setcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_movcond32, 1, 2, 1, TCG_OPF_NOT_PRESENT) +DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_clz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_ctz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/wasm/tcg-target-reg-bits.h b/tcg/wasm/tcg-target-reg-bits.h new file mode 100644 index 0000000000000..3dd821691ff89 --- /dev/null +++ b/tcg/wasm/tcg-target-reg-bits.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef TCG_TARGET_REG_BITS_H +#define TCG_TARGET_REG_BITS_H + +#if UINTPTR_MAX != UINT64_MAX +# error Unsupported pointer size for TCG target +#endif +#define TCG_TARGET_REG_BITS 64 + +#endif diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc new file mode 100644 index 0000000000000..3bb4864da592d --- /dev/null +++ b/tcg/wasm/tcg-target.c.inc @@ -0,0 +1,2843 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Tiny Code Generator for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * Copyright (c) 2018 SiFive, Inc + * Copyright (c) 2008-2009 Arnaud Patard + * Copyright (c) 2009 Aurelien Jarno + * Copyright (c) 2008 Fabrice Bellard + * + * Based on tci/tcg-target.c.inc and riscv/tcg-target.c.inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/queue.h" +#include "../wasm.h" + +/* This is included to get the number of threads via tcg_max_ctxs. */ +#include "../tcg-internal.h" + +/* Used for function call generation. */ +#define TCG_TARGET_CALL_STACK_OFFSET 0 +#define TCG_TARGET_STACK_ALIGN 8 +#define TCG_TARGET_CALL_ARG_I32 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I64 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL +#define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL + +typedef uint32_t tcg_insn_unit_tci; + +static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + /* 2 of these are call clobbered, so use them last. */ + TCG_REG_R1, + TCG_REG_R0, +}; + +#ifdef CONFIG_DEBUG_TCG +static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "r00", + "r01", + "r02", + "r03", + "r04", + "r05", + "r06", + "r07", + "r08", + "r09", + "r10", + "r11", + "r12", + "r13", + "r14", + "r15", +}; +#endif + +/* No call arguments via registers. All will be stored on the "stack". */ +static const int tcg_target_call_iarg_regs[] = { }; + +static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) +{ + tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); + tcg_debug_assert(slot >= 0 && slot < 128 / TCG_TARGET_REG_BITS); + return TCG_REG_R0 + slot; +} + +static TCGConstraintSetIndex +tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags) +{ + return C_NotImplemented; +} + +/* Test if a constant matches the constraint. */ +static bool tcg_target_const_match(int64_t val, int ct, + TCGType type, TCGCond cond, int vece) +{ + return ct & TCG_CT_CONST; +} + +static void tcg_out_nop_fill(tcg_insn_unit *p, int count) +{ + memset(p, 0, sizeof(*p) * count); +} + +static bool patch_reloc(tcg_insn_unit *code_ptr, int type, + intptr_t value, intptr_t addend) +{ + intptr_t diff = value - (intptr_t)(code_ptr + 4); + + tcg_debug_assert(addend == 0); + tcg_debug_assert(type == 20); + + if (diff == sextract32(diff, 0, type)) { + tcg_patch32(code_ptr, + deposit32(*(uint32_t *)code_ptr, 32 - type, type, diff)); + return true; + } + return false; +} + +/* converts a TCG register to a wasm variable index */ +static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { + 0, /* TCG_REG_R0 */ + 1, /* TCG_REG_R1 */ + 2, /* TCG_REG_R2 */ + 3, /* TCG_REG_R3 */ + 4, /* TCG_REG_R4 */ + 5, /* TCG_REG_R5 */ + 6, /* TCG_REG_R6 */ + 7, /* TCG_REG_R7 */ + 8, /* TCG_REG_R8 */ + 9, /* TCG_REG_R9 */ + 10, /* TCG_REG_R10 */ + 11, /* TCG_REG_R11 */ + 12, /* TCG_REG_R12 */ + 13, /* TCG_REG_R13 */ + 14, /* TCG_REG_R14 */ + 15, /* TCG_REG_R15 */ +}; + +#define REG_IDX(r) tcg_target_reg_index[r] + +/* Global variable used for storing the current block index */ +#define BLOCK_IDX 16 + +/* Local variable pointing to WasmContext */ +#define CTX_IDX 0 + +/* Temporary local variables */ +#define TMP32_LOCAL_0_IDX 1 +#define TMP64_LOCAL_0_IDX 2 +#define TMP64_LOCAL_1_IDX 3 + +/* Function index */ +#define CHECK_UNWINDING_IDX 0 /* A function to check the Asyncify status */ +#define HELPER_IDX_START 1 /* The first index of helper functions */ + +#define PTR_TYPE 0x7e + +typedef enum { + OPC_UNREACHABLE = 0x00, + OPC_LOOP = 0x03, + OPC_IF = 0x04, + OPC_ELSE = 0x05, + OPC_END = 0x0b, + OPC_BR = 0x0c, + OPC_RETURN = 0x0f, + OPC_CALL = 0x10, + OPC_LOCAL_GET = 0x20, + OPC_LOCAL_SET = 0x21, + OPC_LOCAL_TEE = 0x22, + OPC_GLOBAL_GET = 0x23, + OPC_GLOBAL_SET = 0x24, + + OPC_I32_LOAD = 0x28, + OPC_I64_LOAD = 0x29, + OPC_I64_LOAD8_S = 0x30, + OPC_I64_LOAD8_U = 0x31, + OPC_I64_LOAD16_S = 0x32, + OPC_I64_LOAD16_U = 0x33, + OPC_I64_LOAD32_S = 0x34, + OPC_I64_LOAD32_U = 0x35, + OPC_I32_STORE = 0x36, + OPC_I64_STORE = 0x37, + OPC_I64_STORE8 = 0x3c, + OPC_I64_STORE16 = 0x3d, + OPC_I64_STORE32 = 0x3e, + + OPC_I32_CONST = 0x41, + OPC_I64_CONST = 0x42, + + OPC_I32_EQZ = 0x45, + OPC_I32_EQ = 0x46, + OPC_I32_NE = 0x47, + OPC_I32_LT_S = 0x48, + OPC_I32_LT_U = 0x49, + OPC_I32_GT_S = 0x4a, + OPC_I32_GT_U = 0x4b, + OPC_I32_LE_S = 0x4c, + OPC_I32_LE_U = 0x4d, + OPC_I32_GE_S = 0x4e, + OPC_I32_GE_U = 0x4f, + + OPC_I64_EQZ = 0x50, + OPC_I64_EQ = 0x51, + OPC_I64_NE = 0x52, + OPC_I64_LT_S = 0x53, + OPC_I64_LT_U = 0x54, + OPC_I64_GT_S = 0x55, + OPC_I64_GT_U = 0x56, + OPC_I64_LE_S = 0x57, + OPC_I64_LE_U = 0x58, + OPC_I64_GE_S = 0x59, + OPC_I64_GE_U = 0x5a, + + OPC_I32_CLZ = 0x67, + OPC_I32_CTZ = 0x68, + OPC_I32_ADD = 0x6a, + OPC_I32_DIV_S = 0x6d, + OPC_I32_DIV_U = 0x6e, + OPC_I32_REM_S = 0x6f, + OPC_I32_REM_U = 0x70, + OPC_I32_SHR_S = 0x75, + OPC_I32_SHR_U = 0x76, + OPC_I32_ROTL = 0x77, + OPC_I32_ROTR = 0x78, + + OPC_I64_CLZ = 0x79, + OPC_I64_CTZ = 0x7a, + OPC_I64_POPCNT = 0x7b, + OPC_I64_ADD = 0x7c, + OPC_I64_SUB = 0x7d, + OPC_I64_MUL = 0x7e, + OPC_I64_DIV_S = 0x7f, + OPC_I64_DIV_U = 0x80, + OPC_I64_REM_S = 0x81, + OPC_I64_REM_U = 0x82, + OPC_I64_AND = 0x83, + OPC_I64_OR = 0x84, + OPC_I64_XOR = 0x85, + OPC_I64_SHL = 0x86, + OPC_I64_SHR_S = 0x87, + OPC_I64_SHR_U = 0x88, + OPC_I64_ROTL = 0x89, + OPC_I64_ROTR = 0x8a, + + OPC_I32_WRAP_I64 = 0xa7, + OPC_I64_EXTEND_I32_S = 0xac, + OPC_I64_EXTEND_I32_U = 0xad, + OPC_I64_EXTEND8_S = 0xc2, + OPC_I64_EXTEND16_S = 0xc3, +} WasmInsn; + +typedef enum { + BLOCK_NORET = 0x40, + BLOCK_I64 = 0x7e, + BLOCK_I32 = 0x7f, +} WasmBlockType; + +#define BUF_SIZE 1024 +typedef struct LinkedBufEntry { + uint8_t data[BUF_SIZE]; + uint32_t size; + QSIMPLEQ_ENTRY(LinkedBufEntry) entry; +} LinkedBufEntry; + +typedef QSIMPLEQ_HEAD(, LinkedBufEntry) LinkedBuf; + +static void linked_buf_out8(LinkedBuf *linked_buf, uint8_t v) +{ + LinkedBufEntry *buf = QSIMPLEQ_LAST(linked_buf, LinkedBufEntry, entry); + if (!buf || (buf->size == BUF_SIZE)) { + LinkedBufEntry *e = tcg_malloc(sizeof(LinkedBufEntry)); + e->size = 0; + QSIMPLEQ_INSERT_TAIL(linked_buf, e, entry); + buf = e; + } + buf->data[buf->size++] = v; +} + +static void linked_buf_out_leb128(LinkedBuf *p, uint64_t v) +{ + uint8_t b; + do { + b = v & 0x7f; + v >>= 7; + if (v != 0) { + b |= 0x80; + } + linked_buf_out8(p, b); + } while (v != 0); +} + +static void linked_buf_out_sleb128(LinkedBuf *p, int64_t v) +{ + bool more = true; + uint8_t b; + while (more) { + b = v & 0x7f; + v >>= 7; + if (((v == 0) && ((b & 0x40) == 0)) || + ((v == -1) && ((b & 0x40) != 0))) { + more = false; + } else { + b |= 0x80; + } + linked_buf_out8(p, b); + } +} + +static int linked_buf_len(LinkedBuf *p) +{ + int total = 0; + LinkedBufEntry *e; + + QSIMPLEQ_FOREACH(e, p, entry) { + total += e->size; + } + return total; +} + +static int linked_buf_write(LinkedBuf *p, void *dst) +{ + int total = 0; + LinkedBufEntry *e; + + QSIMPLEQ_FOREACH(e, p, entry) { + memcpy(dst, e->data, e->size); + dst += e->size; + total += e->size; + } + return total; +} + +/* + * wasm code is generataed in the dynamically allocated buffer which + * are managed as a linked list. + */ +static __thread LinkedBuf sub_buf; + +static void init_sub_buf(void) +{ + QSIMPLEQ_INIT(&sub_buf); +} +static int sub_buf_len(void) +{ + return linked_buf_len(&sub_buf); +} +static void tcg_wasm_out8(TCGContext *s, uint8_t v) +{ + linked_buf_out8(&sub_buf, v); +} +static void tcg_wasm_out_leb128(TCGContext *s, uint64_t v) +{ + linked_buf_out_leb128(&sub_buf, v); +} +static void tcg_wasm_out_sleb128(TCGContext *s, int64_t v) +{ + linked_buf_out_sleb128(&sub_buf, v); +} + +static void tcg_wasm_out_op(TCGContext *s, WasmInsn opc) +{ + tcg_wasm_out8(s, opc); +} +static void tcg_wasm_out_op_idx(TCGContext *s, WasmInsn opc, uint32_t idx) +{ + tcg_wasm_out8(s, opc); + tcg_wasm_out_leb128(s, idx); +} +static void tcg_wasm_out_op_block(TCGContext *s, WasmInsn opc, WasmBlockType t) +{ + tcg_wasm_out8(s, opc); + tcg_wasm_out8(s, t); +} +static void tcg_wasm_out_op_const(TCGContext *s, WasmInsn opc, int64_t v) +{ + tcg_wasm_out8(s, opc); + switch (opc) { + case OPC_I32_CONST: + tcg_wasm_out_sleb128(s, (int32_t)v); + break; + case OPC_I64_CONST: + tcg_wasm_out_sleb128(s, v); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_wasm_out_o1_i2( + TCGContext *s, WasmInsn opc, TCGReg ret, TCGReg arg1, TCGReg arg2) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, opc); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} +static void tcg_wasm_out_o1_i2_type( + TCGContext *s, TCGType type, WasmInsn opc32, WasmInsn opc64, + TCGReg ret, TCGReg arg1, TCGReg arg2) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, opc32); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); + break; + case TCG_TYPE_I64: + tcg_wasm_out_o1_i2(s, opc64, ret, arg1, arg2); + break; + default: + g_assert_not_reached(); + } +} + +static const struct { + WasmInsn i32; + WasmInsn i64; +} tcg_cond_to_inst[] = { + [TCG_COND_EQ] = { OPC_I32_EQ, OPC_I64_EQ }, + [TCG_COND_NE] = { OPC_I32_NE, OPC_I64_NE }, + [TCG_COND_LT] = { OPC_I32_LT_S, OPC_I64_LT_S }, + [TCG_COND_GE] = { OPC_I32_GE_S, OPC_I64_GE_S }, + [TCG_COND_LE] = { OPC_I32_LE_S, OPC_I64_LE_S }, + [TCG_COND_GT] = { OPC_I32_GT_S, OPC_I64_GT_S }, + [TCG_COND_LTU] = { OPC_I32_LT_U, OPC_I64_LT_U }, + [TCG_COND_GEU] = { OPC_I32_GE_U, OPC_I64_GE_U }, + [TCG_COND_LEU] = { OPC_I32_LE_U, OPC_I64_LE_U }, + [TCG_COND_GTU] = { OPC_I32_GT_U, OPC_I64_GT_U } +}; + +static void tcg_wasm_out_cond( + TCGContext *s, TCGType type, TCGCond cond, TCGReg arg1, TCGReg arg2) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, tcg_cond_to_inst[cond].i32); + break; + case TCG_TYPE_I64: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, tcg_cond_to_inst[cond].i64); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_wasm_out_setcond(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, TCGReg arg2, TCGCond cond) +{ + tcg_wasm_out_cond(s, type, cond, arg1, arg2); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_negsetcond(TCGContext *s, TCGType type, TCGReg ret, + TCGReg arg1, TCGReg arg2, TCGCond cond) +{ + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_cond(s, type, cond, arg1, arg2); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op(s, OPC_I64_SUB); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_movcond(TCGContext *s, TCGType type, TCGReg ret, + TCGReg c1, TCGReg c2, + TCGReg v1, TCGReg v2, + TCGCond cond) +{ + tcg_wasm_out_cond(s, type, cond, c1, c2); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(v1)); + tcg_wasm_out_op(s, OPC_ELSE); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(v2)); + tcg_wasm_out_op(s, OPC_END); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_sextract(TCGContext *s, TCGReg dest, TCGReg arg1, + int pos, int len) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + + if (pos == 0) { + switch (len) { + case 8: + tcg_wasm_out_op(s, OPC_I64_EXTEND8_S); + break; + case 16: + tcg_wasm_out_op(s, OPC_I64_EXTEND16_S); + break; + case 32: + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_S); + break; + default: + g_assert_not_reached(); + } + } else { + g_assert_not_reached(); + } + + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(dest)); +} + +static void tcg_wasm_out_extract(TCGContext *s, TCGReg dest, TCGReg arg1, + int pos, int len) +{ + int64_t mask = ~0ULL >> (64 - len); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + if (pos > 0) { + tcg_wasm_out_op_const(s, OPC_I64_CONST, pos); + tcg_wasm_out_op(s, OPC_I64_SHR_U); + } + if ((pos + len) < 64) { + tcg_wasm_out_op_const(s, OPC_I64_CONST, mask); + tcg_wasm_out_op(s, OPC_I64_AND); + } + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(dest)); +} + +/* + * The size of the offset field of Wasm's load/store instruction defers + * depending on the "-sMEMORY64" flag value: 64bit when "-sMEMORY64=1" + * and 32bit when "-sMEMORY64=2". + */ +#if defined(WASM64_MEMORY64_2) +typedef uint32_t wasm_ldst_offset_t; +#else +typedef uint64_t wasm_ldst_offset_t; +#endif +static void tcg_wasm_out_op_ldst( + TCGContext *s, WasmInsn instr, uint32_t a, wasm_ldst_offset_t o) +{ + tcg_wasm_out_op(s, instr); + tcg_wasm_out_leb128(s, a); + tcg_wasm_out_leb128(s, (wasm_ldst_offset_t)o); +} + +/* + * tcg_wasm_out_norm_ptr emits instructions to adjust the 64bit pointer value + * at the top of the stack to satisfy Wasm's memory addressing requirements. + */ +static intptr_t tcg_wasm_out_norm_ptr(TCGContext *s, intptr_t offset) +{ +#if defined(WASM64_MEMORY64_2) + /* + * If Emscripten's "-sMEMORY64=2" is enabled, + * the address size is limited to 32bit. + */ + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); +#endif + /* + * Wasm's load/store instructions don't support negative value in + * the offset field. So this function calculates the target address + * using the base and the offset and makes the offset field 0. + */ + if (offset < 0) { +#if defined(WASM64_MEMORY64_2) + tcg_wasm_out_op_const(s, OPC_I32_CONST, offset); + tcg_wasm_out_op(s, OPC_I32_ADD); +#else + tcg_wasm_out_op_const(s, OPC_I64_CONST, offset); + tcg_wasm_out_op(s, OPC_I64_ADD); +#endif + offset = 0; + } + return offset; +} + +static void tcg_wasm_out_ld( + TCGContext *s, WasmInsn opc, TCGReg val, TCGReg base, intptr_t offset) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(base)); + offset = tcg_wasm_out_norm_ptr(s, offset); + tcg_wasm_out_op_ldst(s, opc, 0, offset); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(val)); +} + +static void tcg_wasm_out_st( + TCGContext *s, WasmInsn opc, TCGReg val, TCGReg base, intptr_t offset) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(base)); + offset = tcg_wasm_out_norm_ptr(s, offset); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(val)); + tcg_wasm_out_op_ldst(s, opc, 0, offset); +} + +static void tcg_wasm_out_mov(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_const(s, OPC_I64_CONST, (int32_t)arg); + break; + case TCG_TYPE_I64: + tcg_wasm_out_op_const(s, OPC_I64_CONST, arg); + break; + default: + g_assert_not_reached(); + } + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_neg(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg)); + tcg_wasm_out_op(s, OPC_I64_SUB); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_ctpop64(TCGContext *s, TCGReg ret, TCGReg arg) +{ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg)); + tcg_wasm_out_op(s, OPC_I64_POPCNT); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); +} + +static void tcg_wasm_out_cz( + TCGContext *s, TCGType type, WasmInsn opc32, WasmInsn opc64, + TCGReg ret, TCGReg arg1, TCGReg arg2) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I32); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, OPC_ELSE); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, opc32); + tcg_wasm_out_op(s, OPC_END); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); + break; + case TCG_TYPE_I64: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I64_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_ELSE); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, opc64); + tcg_wasm_out_op(s, OPC_END); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); + break; + default: + g_assert_not_reached(); + } +} + +typedef struct LabelInfo { + int label; + int block; + QSIMPLEQ_ENTRY(LabelInfo) entry; +} LabelInfo; + +static __thread QSIMPLEQ_HEAD(, LabelInfo) label_info; + +static void init_label_info(void) +{ + QSIMPLEQ_INIT(&label_info); +} + +static void add_label(int label, int block) +{ + LabelInfo *e = tcg_malloc(sizeof(LabelInfo)); + e->label = label; + e->block = block; + QSIMPLEQ_INSERT_TAIL(&label_info, e, entry); +} + +typedef struct BlockPlaceholder { + int label; + int pos; + QSIMPLEQ_ENTRY(BlockPlaceholder) entry; +} BlockPlaceholder; + +static __thread QSIMPLEQ_HEAD(, BlockPlaceholder) block_placeholder; +static __thread int64_t cur_block_idx; + +static void init_blocks(void) +{ + QSIMPLEQ_INIT(&block_placeholder); + cur_block_idx = 0; +} + +static void add_block_placeholder(int label, int pos) +{ + BlockPlaceholder *e = tcg_malloc(sizeof(BlockPlaceholder)); + e->label = label; + e->pos = pos; + QSIMPLEQ_INSERT_TAIL(&block_placeholder, e, entry); +} + +static int get_block_of_label(int label) +{ + LabelInfo *e; + QSIMPLEQ_FOREACH(e, &label_info, entry) { + if (e->label == label) { + return e->block; + } + } + return -1; +} + +static void tcg_wasm_out_new_block(TCGContext *s) +{ + tcg_wasm_out_op(s, OPC_END); /* close this block */ + + /* next block */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, BLOCK_IDX); + tcg_wasm_out_op_const(s, OPC_I64_CONST, ++cur_block_idx); + tcg_wasm_out_op(s, OPC_I64_LE_U); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); +} + +static void tcg_out_label_cb(TCGContext *s, TCGLabel *l) +{ + add_label(l->id, cur_block_idx + 1); + tcg_wasm_out_new_block(s); +} + +static void tcg_wasm_out_br_to_label(TCGContext *s, TCGLabel *l, bool br_if) +{ + int toploop_depth = 1; + if (br_if) { + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + toploop_depth++; + } + tcg_wasm_out8(s, OPC_I64_CONST); + + add_block_placeholder(l->id, sub_buf_len()); + + tcg_wasm_out8(s, 0x80); /* placeholder for the target block idx */ + tcg_wasm_out8(s, 0x80); + tcg_wasm_out8(s, 0x80); + tcg_wasm_out8(s, 0x80); + tcg_wasm_out8(s, 0x00); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + if (get_block_of_label(l->id) != -1) { + /* + * The label is placed before this br, branch to the top of loop + */ + tcg_wasm_out_op_idx(s, OPC_BR, toploop_depth); + } else { + /* + * The label will be generated after this br, + * branch to the end of the current block + */ + tcg_wasm_out_op_idx(s, OPC_BR, toploop_depth - 1); + } + if (br_if) { + tcg_wasm_out_op(s, OPC_END); + } +} + +static void tcg_wasm_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_wasm_out_br_to_label(s, l, false); +} + +static void tcg_wasm_out_brcond(TCGContext *s, TCGType type, + TCGReg arg1, TCGReg arg2, + TCGCond cond, TCGLabel *l) +{ + tcg_wasm_out_cond(s, type, cond, arg1, arg2); + tcg_wasm_out_br_to_label(s, l, true); +} + +#define CTX_OFFSET(f) offsetof(struct WasmContext, f) + +static intptr_t tcg_wasm_out_get_ctx(TCGContext *s, intptr_t off) +{ + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, CTX_IDX); + return tcg_wasm_out_norm_ptr(s, off); +} + +static void tcg_wasm_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + intptr_t ofs; + + /* Store ctx.tb_ptr = 0 which indicates there is no next TB */ + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tb_ptr)); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + + /* Return the control to the caller */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, arg); + tcg_wasm_out_op(s, OPC_RETURN); +} + +static void tcg_wasm_out_goto(TCGContext *s, TCGReg target, int block_depth) +{ + intptr_t ofs; + + /* Check if the target TB is the same as the current TB */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(target)); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tb_ptr)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op(s, OPC_I64_EQ); + + /* + * If the target TB is the same as the current TB, no need to return to the + * caller. Just branch to the top of the current TB. + */ + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_op_idx(s, OPC_BR, block_depth); /* br to the top of loop */ + tcg_wasm_out_op(s, OPC_END); + + /* Store the target TB to ctx.tb_ptr and return */ + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tb_ptr)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(target)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op(s, OPC_RETURN); +} + +static void tcg_wasm_out_goto_ptr(TCGContext *s, TCGReg arg) +{ + tcg_wasm_out_goto(s, arg, 2); +} + +static void tcg_wasm_out_goto_tb( + TCGContext *s, int which, uintptr_t cur_reset_ptr) +{ + intptr_t ofs; + + /* Set the target TB in the tmp variable. */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, get_jmp_target_addr(s, which)); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_TMP)); + + /* Goto the target TB if it's registered. */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_REG_TMP)); + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_reset_ptr); + tcg_wasm_out_op(s, OPC_I64_NE); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_goto(s, TCG_REG_TMP, 3); + tcg_wasm_out_op(s, OPC_END); +} + +static void push_arg_i64(TCGContext *s, int *stack_offset) +{ + intptr_t ofs; + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_REG_CALL_STACK)); + ofs = tcg_wasm_out_norm_ptr(s, *stack_offset); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + *stack_offset = *stack_offset + 8; +} + +static void gen_call(TCGContext *s, + const TCGHelperInfo *info, uint32_t func_idx) +{ + unsigned typemask = info->typemask; + int rettype = typemask & 7; + int stack_offset = 0; + intptr_t ofs; + + if (rettype == dh_typecode_i128) { + /* receive 128bit return value via the buffer */ + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(buf128)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + } + + for (typemask >>= 3; typemask; typemask >>= 3) { + switch (typemask & 7) { + case dh_typecode_void: + break; + case dh_typecode_i32: + case dh_typecode_s32: + push_arg_i64(s, &stack_offset); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + break; + case dh_typecode_i64: + case dh_typecode_s64: + push_arg_i64(s, &stack_offset); + break; + case dh_typecode_i128: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_REG_CALL_STACK)); + tcg_wasm_out_op_const(s, OPC_I64_CONST, stack_offset); + tcg_wasm_out_op(s, OPC_I64_ADD); + stack_offset += 16; + break; + case dh_typecode_ptr: + push_arg_i64(s, &stack_offset); + break; + default: + g_assert_not_reached(); + } + } + + tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); + + switch (rettype) { + case dh_typecode_void: + break; + case dh_typecode_i32: + case dh_typecode_s32: + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_S); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R0)); + break; + case dh_typecode_i64: + case dh_typecode_s64: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R0)); + break; + case dh_typecode_i128: + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(buf128)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R0)); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(buf128)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + ofs = tcg_wasm_out_norm_ptr(s, 8); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R1)); + break; + case dh_typecode_ptr: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_R0)); + break; + default: + g_assert_not_reached(); + } +} + +static __thread LinkedBuf types_buf; + +static void init_types_buf(void) +{ + QSIMPLEQ_INIT(&types_buf); +} + +static void types_buf_out8(uint8_t v) +{ + linked_buf_out8(&types_buf, v); +} + +static void gen_func_type_call(TCGContext *s, const TCGHelperInfo *info) +{ + unsigned typemask = info->typemask; + int rettype = typemask & 7; + uint32_t vec_size = 0; + + if (rettype == dh_typecode_i128) { + vec_size++; + } + for (int m = typemask >> 3; m; m >>= 3) { + if ((m & 7) != dh_typecode_void) { + vec_size++; + } + } + + types_buf_out8(0x60); + linked_buf_out_leb128(&types_buf, vec_size); + + if (rettype == dh_typecode_i128) { + types_buf_out8(PTR_TYPE); + } + + for (int m = typemask >> 3; m; m >>= 3) { + switch (m & 7) { + case dh_typecode_void: + break; + case dh_typecode_i32: + case dh_typecode_s32: + types_buf_out8(0x7f); + break; + case dh_typecode_i64: + case dh_typecode_s64: + types_buf_out8(0x7e); + break; + case dh_typecode_i128: + types_buf_out8(PTR_TYPE); + break; + case dh_typecode_ptr: + types_buf_out8(PTR_TYPE); + break; + default: + g_assert_not_reached(); + } + } + + switch (rettype) { + case dh_typecode_void: + case dh_typecode_i128: + types_buf_out8(0x0); + break; + case dh_typecode_i32: + case dh_typecode_s32: + types_buf_out8(0x1); + types_buf_out8(0x7f); + break; + case dh_typecode_i64: + case dh_typecode_s64: + types_buf_out8(0x1); + types_buf_out8(0x7e); + break; + case dh_typecode_ptr: + types_buf_out8(0x1); + types_buf_out8(PTR_TYPE); + break; + default: + g_assert_not_reached(); + } +} + +static __thread LinkedBuf imports_buf; + +static void init_imports_buf(void) +{ + QSIMPLEQ_INIT(&imports_buf); +} + +static void imports_buf_out8(uint8_t v) +{ + linked_buf_out8(&imports_buf, v); +} + +typedef struct HelperInfo { + intptr_t idx_on_qemu; + QSIMPLEQ_ENTRY(HelperInfo) entry; +} HelperInfo; + +static __thread QSIMPLEQ_HEAD(, HelperInfo) helpers; +__thread uint32_t helper_idx; + +static void init_helpers(void) +{ + QSIMPLEQ_INIT(&helpers); + helper_idx = HELPER_IDX_START; +} + +static uint32_t register_helper(TCGContext *s, intptr_t helper_idx_on_qemu) +{ + uint32_t typeidx = helper_idx + 1; + char buf[11]; /* enough for decimal int max + NULL*/ + int n = snprintf(buf, sizeof(buf), "%d", helper_idx - HELPER_IDX_START); + + tcg_debug_assert(helper_idx_on_qemu >= 0); + + HelperInfo *e = tcg_malloc(sizeof(HelperInfo)); + e->idx_on_qemu = helper_idx_on_qemu; + QSIMPLEQ_INSERT_TAIL(&helpers, e, entry); + + tcg_debug_assert(n < sizeof(buf)); + imports_buf_out8(6); /* helper */ + imports_buf_out8(0x68); + imports_buf_out8(0x65); + imports_buf_out8(0x6c); + imports_buf_out8(0x70); + imports_buf_out8(0x65); + imports_buf_out8(0x72); + linked_buf_out_leb128(&imports_buf, (uint32_t)n); + for (int i = 0; i < n; i++) { + imports_buf_out8(buf[i]); + } + imports_buf_out8(0); /* type(0) */ + linked_buf_out_leb128(&imports_buf, typeidx); + + return helper_idx++; +} + +static int helpers_len(void) +{ + int n = 0; + HelperInfo *e; + + QSIMPLEQ_FOREACH(e, &helpers, entry) { + n++; + } + return n; +} + +static int helpers_write_to_array(intptr_t *dst) +{ + intptr_t *start = dst; + HelperInfo *e; + + QSIMPLEQ_FOREACH(e, &helpers, entry) { + *dst++ = e->idx_on_qemu; + } + return (intptr_t)dst - (intptr_t)start; +} + +static int64_t get_helper_idx(TCGContext *s, intptr_t helper_idx_on_qemu) +{ + uint32_t idx = HELPER_IDX_START; + HelperInfo *e; + + QSIMPLEQ_FOREACH(e, &helpers, entry) { + if (e->idx_on_qemu == helper_idx_on_qemu) { + return idx; + } + idx++; + } + return -1; +} + +static void tcg_wasm_out_handle_unwinding(TCGContext *s) +{ + tcg_wasm_out_op_idx(s, OPC_CALL, CHECK_UNWINDING_IDX); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + /* returns if unwinding */ + tcg_wasm_out_op(s, OPC_RETURN); + tcg_wasm_out_op(s, OPC_END); +} + +static void tcg_wasm_out_call(TCGContext *s, intptr_t func, + const TCGHelperInfo *info) +{ + intptr_t ofs; + int64_t func_idx = get_helper_idx(s, func); + if (func_idx < 0) { + func_idx = register_helper(s, func); + gen_func_type_call(s, info); + } + + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(tci_tb_ptr)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_const(s, OPC_I64_CONST, (uint64_t)s->code_ptr); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + + gen_call(s, info, func_idx); + tcg_wasm_out_handle_unwinding(s); +} + +static void gen_func_type_qemu_ld(TCGContext *s, uint32_t oi) +{ + types_buf_out8(0x60); + types_buf_out8(0x4); + types_buf_out8(PTR_TYPE); + types_buf_out8(0x7e); + types_buf_out8(0x7f); + types_buf_out8(PTR_TYPE); + types_buf_out8(0x1); + types_buf_out8(0x7e); +} + +static void gen_func_type_qemu_st(TCGContext *s, uint32_t oi) +{ + MemOp mop = get_memop(oi); + + types_buf_out8(0x60); + types_buf_out8(0x5); + types_buf_out8(PTR_TYPE); + types_buf_out8(0x7e); + switch (mop & MO_SSIZE) { + case MO_UQ: + types_buf_out8(0x7e); + break; + default: + types_buf_out8(0x7f); + break; + } + types_buf_out8(0x7f); + types_buf_out8(PTR_TYPE); + types_buf_out8(0x0); +} + +static void *qemu_ld_helper_ptr(uint32_t oi) +{ + MemOp mop = get_memop(oi); + switch (mop & MO_SSIZE) { + case MO_UB: + return helper_ldub_mmu; + case MO_SB: + return helper_ldsb_mmu; + case MO_UW: + return helper_lduw_mmu; + case MO_SW: + return helper_ldsw_mmu; + case MO_UL: + return helper_ldul_mmu; + case MO_SL: + return helper_ldsl_mmu; + case MO_UQ: + return helper_ldq_mmu; + default: + g_assert_not_reached(); + } +} + +#define MIN_TLB_MASK_TABLE_OFS INT_MIN + +static uint8_t prepare_host_addr_wasm(TCGContext *s, uint8_t *hit_var, + TCGReg addr_reg, MemOpIdx oi, + bool is_ld) +{ + MemOp opc = get_memop(oi); + TCGAtomAlign aa; + unsigned a_mask; + unsigned s_bits = opc & MO_SIZE; + unsigned s_mask = (1u << s_bits) - 1; + int mem_index = get_mmuidx(oi); + int fast_ofs = tlb_mask_table_ofs(s, mem_index); + int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask); + int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table); + int add_off = offsetof(CPUTLBEntry, addend); + tcg_target_long compare_mask; + int offset; + + uint8_t tmp1 = TMP64_LOCAL_0_IDX; + uint8_t tmp2 = TMP64_LOCAL_1_IDX; + + if (!tcg_use_softmmu) { + g_assert_not_reached(); + } + + *hit_var = TMP32_LOCAL_0_IDX; + tcg_wasm_out_op_const(s, OPC_I32_CONST, 0); + tcg_wasm_out_op_idx(s, OPC_LOCAL_SET, *hit_var); + + aa = atom_and_align_for_opc(s, opc, MO_ATOM_IFALIGN, false); + a_mask = (1u << aa.align) - 1; + + /* Get the CPUTLBEntry offset */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + tcg_wasm_out_op_const(s, OPC_I64_CONST, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + tcg_wasm_out_op(s, OPC_I64_SHR_U); + + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + offset = tcg_wasm_out_norm_ptr(s, mask_ofs); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, offset); + tcg_wasm_out_op(s, OPC_I64_AND); + + /* Get the pointer to the target CPUTLBEntry */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + offset = tcg_wasm_out_norm_ptr(s, table_ofs); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, offset); + tcg_wasm_out_op(s, OPC_I64_ADD); + tcg_wasm_out_op_idx(s, OPC_LOCAL_TEE, tmp1); + + /* Load the tlb copmarator */ + offset = tcg_wasm_out_norm_ptr(s, is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, offset); + + /* + * For aligned accesses, we check the first byte and include the + * alignment bits within the address. For unaligned access, we + * check that we don't cross pages using the address of the last + * byte of the access. + */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + if (a_mask < s_mask) { + tcg_wasm_out_op_const(s, OPC_I64_CONST, s_mask - a_mask); + tcg_wasm_out_op(s, OPC_I64_ADD); + } + compare_mask = (uint64_t)TARGET_PAGE_MASK | a_mask; + tcg_wasm_out_op_const(s, OPC_I64_CONST, compare_mask); + tcg_wasm_out_op(s, OPC_I64_AND); + + /* Compare masked address with the TLB entry. */ + tcg_wasm_out_op(s, OPC_I64_EQ); + + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + + /* TLB Hit - translate address using addend. */ + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, tmp1); + offset = tcg_wasm_out_norm_ptr(s, add_off); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, offset); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + tcg_wasm_out_op(s, OPC_I64_ADD); + tcg_wasm_out_op_idx(s, OPC_LOCAL_SET, tmp2); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 1); + tcg_wasm_out_op_idx(s, OPC_LOCAL_SET, *hit_var); + + tcg_wasm_out_op(s, OPC_END); + + return tmp2; +} + +static void tcg_wasm_out_qemu_ld_direct( + TCGContext *s, TCGReg r, uint8_t base, MemOp opc) +{ + intptr_t ofs; + switch (opc & (MO_SSIZE)) { + case MO_UB: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD8_U, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_SB: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD8_S, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_UW: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD16_U, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_SW: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD16_S, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_UL: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD32_U, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_SL: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD32_S, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + case MO_UQ: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(r)); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) +{ + intptr_t helper_idx; + int64_t func_idx; + MemOp mop = get_memop(oi); + uint8_t base_var, hit_var; + + helper_idx = (intptr_t)qemu_ld_helper_ptr(oi); + func_idx = get_helper_idx(s, helper_idx); + if (func_idx < 0) { + func_idx = register_helper(s, helper_idx); + gen_func_type_qemu_ld(s, oi); + } + + base_var = prepare_host_addr_wasm(s, &hit_var, addr_reg, oi, true); + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, hit_var); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 1); + tcg_wasm_out_op(s, OPC_I32_EQ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_qemu_ld_direct(s, data_reg, base_var, mop); /* fast path */ + tcg_wasm_out_op(s, OPC_END); + + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, hit_var); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + + /* call the target helper */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + tcg_wasm_out_op_const(s, OPC_I32_CONST, oi); + tcg_wasm_out_op_const(s, OPC_I64_CONST, (intptr_t)s->code_ptr); + + tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(data_reg)); + tcg_wasm_out_handle_unwinding(s); + + tcg_wasm_out_op(s, OPC_END); +} + +static void *qemu_st_helper_ptr(uint32_t oi) +{ + MemOp mop = get_memop(oi); + switch (mop & MO_SIZE) { + case MO_8: + return helper_stb_mmu; + case MO_16: + return helper_stw_mmu; + case MO_32: + return helper_stl_mmu; + case MO_64: + return helper_stq_mmu; + default: + g_assert_not_reached(); + } +} + +static void tcg_wasm_out_qemu_st_direct( + TCGContext *s, TCGReg lo, uint8_t base, MemOp opc) +{ + intptr_t ofs; + switch (opc & (MO_SSIZE)) { + case MO_8: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(lo)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE8, 0, ofs); + break; + case MO_16: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(lo)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE16, 0, ofs); + break; + case MO_32: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(lo)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE32, 0, ofs); + break; + case MO_64: + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, base); + ofs = tcg_wasm_out_norm_ptr(s, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(lo)); + tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + break; + default: + g_assert_not_reached(); + } +} + +static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, + TCGReg addr_reg, MemOpIdx oi) +{ + intptr_t helper_idx; + int64_t func_idx; + MemOp mop = get_memop(oi); + uint8_t base_var, hit_var; + + helper_idx = (intptr_t)qemu_st_helper_ptr(oi); + func_idx = get_helper_idx(s, helper_idx); + if (func_idx < 0) { + func_idx = register_helper(s, helper_idx); + gen_func_type_qemu_st(s, oi); + } + + base_var = prepare_host_addr_wasm(s, &hit_var, addr_reg, oi, false); + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, hit_var); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 1); + tcg_wasm_out_op(s, OPC_I32_EQ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_qemu_st_direct(s, data_reg, base_var, mop); /* fast path */ + tcg_wasm_out_op(s, OPC_END); + + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + + tcg_wasm_out_op_idx(s, OPC_LOCAL_GET, hit_var); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + + /* call the target helper */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); + switch (mop & MO_SSIZE) { + case MO_UQ: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(data_reg)); + break; + default: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(data_reg)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + break; + } + tcg_wasm_out_op_const(s, OPC_I32_CONST, oi); + tcg_wasm_out_op_const(s, OPC_I64_CONST, (intptr_t)s->code_ptr); + + tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); + tcg_wasm_out_handle_unwinding(s); + + tcg_wasm_out_op(s, OPC_END); +} + +static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) +{ + tcg_insn_unit_tci insn = 0; + + tcg_out_reloc(s, s->code_ptr, 20, l0, 0); + insn = deposit32(insn, 0, 8, op); + tcg_out32(s, insn); +} + +static void tcg_out_op_p(TCGContext *s, TCGOpcode op, void *p0) +{ + tcg_insn_unit_tci insn = 0; + intptr_t diff; + + /* Special case for exit_tb: map null -> 0. */ + if (p0 == NULL) { + diff = 0; + } else { + diff = p0 - (void *)(s->code_ptr + 4); + tcg_debug_assert(diff != 0); + if (diff != sextract32(diff, 0, 20)) { + tcg_raise_tb_overflow(s); + } + } + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 12, 20, diff); + tcg_out32(s, insn); +} + +static void tcg_out_op_r(TCGContext *s, TCGOpcode op, TCGReg r0) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + tcg_out32(s, insn); +} + +static void tcg_out_op_v(TCGContext *s, TCGOpcode op) +{ + tcg_out32(s, (uint8_t)op); +} + +static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) +{ + tcg_insn_unit_tci insn = 0; + + tcg_debug_assert(i1 == sextract32(i1, 0, 20)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 20, i1); + tcg_out32(s, insn); +} + +static void tcg_out_op_rl(TCGContext *s, TCGOpcode op, TCGReg r0, TCGLabel *l1) +{ + tcg_insn_unit_tci insn = 0; + + tcg_out_reloc(s, s->code_ptr, 20, l1, 0); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + tcg_out32(s, insn); +} + +static void tcg_out_op_rr(TCGContext *s, TCGOpcode op, TCGReg r0, TCGReg r1) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + tcg_out32(s, insn); +} + +static void tcg_out_op_rrm(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, TCGArg m2) +{ + tcg_insn_unit_tci insn = 0; + + tcg_debug_assert(m2 == extract32(m2, 0, 16)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 16, m2); + tcg_out32(s, insn); +} + +static void tcg_out_op_rrr(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, TCGReg r2) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + tcg_out32(s, insn); +} + +static void tcg_out_op_rrs(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, intptr_t i2) +{ + tcg_insn_unit_tci insn = 0; + + tcg_debug_assert(i2 == sextract32(i2, 0, 16)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 16, i2); + tcg_out32(s, insn); +} + +static void tcg_out_op_rrbb(TCGContext *s, TCGOpcode op, TCGReg r0, + TCGReg r1, uint8_t b2, uint8_t b3) +{ + tcg_insn_unit_tci insn = 0; + + tcg_debug_assert(b2 == extract32(b2, 0, 6)); + tcg_debug_assert(b3 == extract32(b3, 0, 6)); + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 6, b2); + insn = deposit32(insn, 22, 6, b3); + tcg_out32(s, insn); +} + +static void tcg_out_op_rrrc(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, TCGReg r2, TCGCond c3) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, c3); + tcg_out32(s, insn); +} + +static void tcg_out_op_rrrrrc(TCGContext *s, TCGOpcode op, + TCGReg r0, TCGReg r1, TCGReg r2, + TCGReg r3, TCGReg r4, TCGCond c5) +{ + tcg_insn_unit_tci insn = 0; + + insn = deposit32(insn, 0, 8, op); + insn = deposit32(insn, 8, 4, r0); + insn = deposit32(insn, 12, 4, r1); + insn = deposit32(insn, 16, 4, r2); + insn = deposit32(insn, 20, 4, r3); + insn = deposit32(insn, 24, 4, r4); + insn = deposit32(insn, 28, 4, c5); + tcg_out32(s, insn); +} + +static void tgen_and(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_and, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_AND, a0, a1, a2); +} + +static const TCGOutOpBinary outop_and = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_and, +}; + +static void tgen_or(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_or, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_OR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_or = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_or, +}; + +static void tgen_xor(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_xor, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_XOR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_xor = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_xor, +}; + +static void tgen_add(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_add, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_ADD, a0, a1, a2); +} + +static const TCGOutOpBinary outop_add = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_add, +}; + +static void tgen_sub(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_sub, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_SUB, a0, a1, a2); +} + +static const TCGOutOpSubtract outop_sub = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sub, +}; + +static void tgen_mul(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_mul, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_MUL, a0, a1, a2); +} + +static const TCGOutOpBinary outop_mul = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_mul, +}; + +static void tcg_out_sextract(TCGContext *s, TCGType type, TCGReg rd, + TCGReg rs, unsigned pos, unsigned len) +{ + tcg_out_op_rrbb(s, INDEX_op_sextract, rd, rs, pos, len); + tcg_wasm_out_sextract(s, rd, rs, pos, len); +} + +static const TCGOutOpExtract outop_sextract = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tcg_out_sextract, +}; + +static void tgen_shl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + tcg_out_op_rrr(s, INDEX_op_shl, a0, a1, a2); + tcg_wasm_out_o1_i2(s, OPC_I64_SHL, a0, a1, a2); +} + +static const TCGOutOpBinary outop_shl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_shl, +}; + +static void tgen_shr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGReg orig_a1 = a1; + if (type < TCG_TYPE_REG) { + tcg_out_op_rrbb(s, INDEX_op_extract, TCG_REG_TMP, a1, 0, 32); + a1 = TCG_REG_TMP; + } + tcg_out_op_rrr(s, INDEX_op_shr, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_SHR_U, OPC_I64_SHR_U, + a0, orig_a1, a2); +} + +static const TCGOutOpBinary outop_shr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_shr, +}; + +static void tgen_sar(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGReg orig_a1 = a1; + if (type < TCG_TYPE_REG) { + tcg_out_op_rrbb(s, INDEX_op_sextract, TCG_REG_TMP, a1, 0, 32); + a1 = TCG_REG_TMP; + } + tcg_out_op_rrr(s, INDEX_op_sar, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_SHR_S, OPC_I64_SHR_S, + a0, orig_a1, a2); +} + +static const TCGOutOpBinary outop_sar = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_sar, +}; + +static void tgen_setcond_tci(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_setcond32 + : INDEX_op_setcond); + tcg_out_op_rrrc(s, opc, dest, arg1, arg2, cond); +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond_tci(s, type, cond, dest, arg1, arg2); + tcg_wasm_out_setcond(s, type, dest, arg1, arg2, cond); +} + +static const TCGOutOpSetcond outop_setcond = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_setcond, +}; + +static void tgen_negsetcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg dest, TCGReg arg1, TCGReg arg2) +{ + tgen_setcond_tci(s, type, cond, dest, arg1, arg2); + tcg_out_op_rr(s, INDEX_op_neg, dest, dest); + tcg_wasm_out_negsetcond(s, type, dest, arg1, arg2, cond); +} + +static const TCGOutOpSetcond outop_negsetcond = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_negsetcond, +}; + +static void tgen_movcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg ret, TCGReg c1, TCGArg c2, bool const_c2, + TCGArg vt, bool const_vt, TCGArg vf, bool consf_vf) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_movcond32 + : INDEX_op_movcond); + tcg_out_op_rrrrrc(s, opc, ret, c1, c2, vt, vf, cond); + tcg_wasm_out_movcond(s, type, ret, c1, c2, vt, vf, cond); +} + +static const TCGOutOpMovcond outop_movcond = { + .base.static_constraint = C_O1_I4(r, r, r, r, r), + .out = tgen_movcond, +}; + +static void tcg_tci_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + switch (type) { + case TCG_TYPE_I32: + arg = (int32_t)arg; + /* fall through */ + case TCG_TYPE_I64: + break; + default: + g_assert_not_reached(); + } + + if (arg == sextract32(arg, 0, 20)) { + tcg_out_op_ri(s, INDEX_op_tci_movi, ret, arg); + } else { + tcg_insn_unit_tci insn = 0; + + new_pool_label(s, arg, 20, s->code_ptr, 0); + insn = deposit32(insn, 0, 8, INDEX_op_tci_movl); + insn = deposit32(insn, 8, 4, ret); + tcg_out32(s, insn); + } +} + +static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) +{ + tcg_out_op_rr(s, INDEX_op_mov, ret, arg); + tcg_wasm_out_mov(s, ret, arg); + return true; +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + tcg_tci_out_movi(s, type, ret, arg); + tcg_wasm_out_movi(s, type, ret, arg); +} + +static void stack_bounds_check(TCGReg base, intptr_t offset) +{ + if (base == TCG_REG_CALL_STACK) { + tcg_debug_assert(offset >= 0); + tcg_debug_assert(offset < (TCG_STATIC_CALL_ARGS_SIZE + + TCG_STATIC_FRAME_SIZE)); + } +} + +static void tcg_out_ldst(TCGContext *s, TCGOpcode op, TCGReg val, + TCGReg base, intptr_t offset) +{ + stack_bounds_check(base, offset); + if (offset != sextract32(offset, 0, 16)) { + tcg_tci_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, offset); + tcg_out_op_rrr(s, INDEX_op_add, TCG_REG_TMP, TCG_REG_TMP, base); + base = TCG_REG_TMP; + offset = 0; + } + tcg_out_op_rrs(s, op, val, base, offset); +} + +static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg val, TCGReg base, + intptr_t offset) +{ + TCGOpcode op = INDEX_op_ld; + WasmInsn wasm_opc = OPC_I64_LOAD; + + if (type == TCG_TYPE_I32) { + op = INDEX_op_ld32u; + wasm_opc = OPC_I64_LOAD32_U; + } + tcg_out_ldst(s, op, val, base, offset); + tcg_wasm_out_ld(s, wasm_opc, val, base, offset); +} + +static void tgen_ld8u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld8u, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD8_U, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8u, +}; + +static void tgen_ld8s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld8s, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD8_S, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld8s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld8s, +}; + +static void tgen_ld16u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld16u, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD16_U, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16u, +}; + +static void tgen_ld16s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld16s, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD16_S, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld16s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld16s, +}; + +static void tgen_ld32u(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld32u, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD32_U, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32u = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32u, +}; + +static void tgen_ld32s(TCGContext *s, TCGType type, TCGReg dest, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_ld32s, dest, base, offset); + tcg_wasm_out_ld(s, OPC_I64_LOAD32_S, dest, base, offset); +} + +static const TCGOutOpLoad outop_ld32s = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_ld32s, +}; + +static void tgen_st8(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_st8, data, base, offset); + tcg_wasm_out_st(s, OPC_I64_STORE8, data, base, offset); +} + +static const TCGOutOpStore outop_st8 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st8, +}; + +static void tgen_st16(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, ptrdiff_t offset) +{ + tcg_out_ldst(s, INDEX_op_st16, data, base, offset); + tcg_wasm_out_st(s, OPC_I64_STORE16, data, base, offset); +} + +static const TCGOutOpStore outop_st16 = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tgen_st16, +}; + +static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val, TCGReg base, + intptr_t offset) +{ + TCGOpcode op = INDEX_op_st; + WasmInsn wasm_opc = OPC_I64_STORE; + + if (type == TCG_TYPE_I32) { + op = INDEX_op_st32; + wasm_opc = OPC_I64_STORE32; + } + tcg_out_ldst(s, op, val, base, offset); + tcg_wasm_out_st(s, wasm_opc, val, base, offset); +} + +static const TCGOutOpStore outop_st = { + .base.static_constraint = C_O0_I2(r, r), + .out_r = tcg_out_st, +}; + +static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, + TCGReg base, intptr_t ofs) +{ + return false; +} + +static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) +{ + tcg_out_sextract(s, type, rd, rs, 0, 8); + tcg_wasm_out_sextract(s, rd, rs, 0, 8); +} + +static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, rd, rs, 0, 8); + tcg_wasm_out_extract(s, rd, rs, 0, 8); +} + +static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) +{ + tcg_out_sextract(s, type, rd, rs, 0, 16); + tcg_wasm_out_sextract(s, rd, rs, 0, 16); +} + +static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, rd, rs, 0, 16); + tcg_wasm_out_extract(s, rd, rs, 0, 16); +} + +static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_sextract(s, TCG_TYPE_I64, rd, rs, 0, 32); + tcg_wasm_out_sextract(s, rd, rs, 0, 32); +} + +static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, rd, rs, 0, 32); + tcg_wasm_out_extract(s, rd, rs, 0, 32); +} + +static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_ext32s(s, rd, rs); +} + +static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_ext32u(s, rd, rs); +} + +static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rs) +{ + tcg_out_op_rr(s, INDEX_op_mov, rd, rs); + tcg_wasm_out_extract(s, rd, rs, 0, 32); +} + +static void tgen_extrh_i64_i32(TCGContext *s, TCGType t, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rrbb(s, INDEX_op_extract, a0, a1, 32, 32); + tcg_wasm_out_extract(s, a0, a1, 32, 32); +} + +static const TCGOutOpUnary outop_extrh_i64_i32 = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_extrh_i64_i32, +}; + +static void tgen_divs(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_divs32 + : INDEX_op_divs); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_DIV_S, OPC_I64_DIV_S, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divs = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divs, +}; + +static void tgen_divu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_divu32 + : INDEX_op_divu); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_DIV_U, OPC_I64_DIV_U, a0, a1, a2); +} + +static const TCGOutOpBinary outop_divu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_divu, +}; + +static void tgen_rems(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rems32 + : INDEX_op_rems); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_REM_S, OPC_I64_REM_S, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rems = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rems, +}; + +static void tgen_remu(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_remu32 + : INDEX_op_remu); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_REM_U, OPC_I64_REM_U, a0, a1, a2); +} + +static const TCGOutOpBinary outop_remu = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_remu, +}; + +static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_neg, a0, a1); + tcg_wasm_out_neg(s, a0, a1); +} + +static const TCGOutOpUnary outop_neg = { + .base.static_constraint = C_O1_I1(r, r), + .out_rr = tgen_neg, +}; + +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1) +{ + tcg_out_op_rr(s, INDEX_op_ctpop, a0, a1); + tcg_wasm_out_ctpop64(s, a0, a1); +} + +static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags) +{ + return type == TCG_TYPE_REG ? C_O1_I1(r, r) : C_NotImplemented; +} + +static const TCGOutOpUnary outop_ctpop = { + .base.static_constraint = C_Dynamic, + .base.dynamic_constraint = cset_ctpop, + .out_rr = tgen_ctpop, +}; + +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotl32 + : INDEX_op_rotl); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_ROTL, OPC_I64_ROTL, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotl, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotr32 + : INDEX_op_rotr); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_ROTR, OPC_I64_ROTR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotr, +}; + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_clz32 + : INDEX_op_clz); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_cz(s, type, OPC_I32_CLZ, OPC_I64_CLZ, a0, a1, a2); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_clz, +}; + +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_ctz32 + : INDEX_op_ctz); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_cz(s, type, OPC_I32_CTZ, OPC_I64_CTZ, a0, a1, a2); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_ctz, +}; + +static void tgen_brcond(TCGContext *s, TCGType type, TCGCond cond, + TCGReg arg0, TCGReg arg1, TCGLabel *l) +{ + tgen_setcond_tci(s, type, cond, TCG_REG_TMP, arg0, arg1); + tcg_out_op_rl(s, INDEX_op_brcond, TCG_REG_TMP, l); + tcg_wasm_out_brcond(s, type, arg0, arg1, cond, l); +} + +static const TCGOutOpBrcond outop_brcond = { + .base.static_constraint = C_O0_I2(r, r), + .out_rr = tgen_brcond, +}; + +static void tcg_out_br(TCGContext *s, TCGLabel *l) +{ + tcg_out_op_l(s, INDEX_op_br, l); + tcg_wasm_out_br(s, l); +} + +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + tcg_out_op_p(s, INDEX_op_exit_tb, (void *)arg); + tcg_wasm_out_exit_tb(s, arg); +} + +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* indirect jump method. */ + tcg_out_op_p(s, INDEX_op_goto_tb, (void *)get_jmp_target_addr(s, which)); + set_jmp_reset_offset(s, which); + tcg_wasm_out_goto_tb(s, which, (intptr_t)s->code_ptr); +} + +static void tcg_out_goto_ptr(TCGContext *s, TCGReg a0) +{ + tcg_out_op_r(s, INDEX_op_goto_ptr, a0); + tcg_wasm_out_goto_ptr(s, a0); +} + +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* Always indirect, nothing to do */ +} + +static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs, + tcg_target_long imm) +{ + /* This function is only used for passing structs by reference. */ + g_assert_not_reached(); +} + +static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, + const TCGHelperInfo *info) +{ + ffi_cif *cif = info->cif; + tcg_insn_unit_tci insn = 0; + uint8_t which; + + if (cif->rtype == &ffi_type_void) { + which = 0; + } else { + tcg_debug_assert(cif->rtype->size == 4 || + cif->rtype->size == 8 || + cif->rtype->size == 16); + which = ctz32(cif->rtype->size) - 1; + } + new_pool_l2(s, 20, s->code_ptr, 0, (uintptr_t)func, (uintptr_t)cif); + insn = deposit32(insn, 0, 8, INDEX_op_call); + insn = deposit32(insn, 8, 4, which); + tcg_out32(s, insn); + tcg_wasm_out_call(s, (intptr_t)func, info); +} + +static void tgen_qemu_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_op_rrm(s, INDEX_op_qemu_ld, data, addr, oi); + tcg_wasm_out_qemu_ld(s, data, addr, oi); +} + +static const TCGOutOpQemuLdSt outop_qemu_ld = { + .base.static_constraint = C_O1_I1(r, r), + .out = tgen_qemu_ld, +}; + +static void tgen_qemu_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg addr, MemOpIdx oi) +{ + tcg_out_op_rrm(s, INDEX_op_qemu_st, data, addr, oi); + tcg_wasm_out_qemu_st(s, data, addr, oi); +} + +static const TCGOutOpQemuLdSt outop_qemu_st = { + .base.static_constraint = C_O0_I2(r, r), + .out = tgen_qemu_st, +}; + +bool tcg_target_has_memory_bswap(MemOp memop) +{ + return false; +} + +static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + g_assert_not_reached(); +} + +static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) +{ + g_assert_not_reached(); +} + +static void tcg_out_mb(TCGContext *s, unsigned a0) +{ + tcg_out_op_v(s, INDEX_op_mb); + + /* + * Wasm's threading proposal provides atomic.fence instruction as the fence + * operator. + * https://webassembly.github.io/threads/core/syntax/instructions.html#atomic-memory-instructions + */ + tcg_wasm_out8(s, 0xfe); + tcg_wasm_out8(s, 0x03); + tcg_wasm_out8(s, 0x00); +} + +static const TCGOutOpDeposit outop_deposit = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpExtract outop_extract = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpExtract2 outop_extract2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addco = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_addci = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_addcio = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbo = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbi = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpAddSubCarry outop_subbio = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpUnary outop_not = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_andc = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_eqv = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nand = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_nor = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_orc = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpDivRem outop_divs2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpDivRem outop_divu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpMul2 outop_muls2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_mulsh = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpMul2 outop_mulu2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBinary outop_muluh = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBswap outop_bswap16 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpBswap outop_bswap32 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpUnary outop_bswap64 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_ld2 = { + .base.static_constraint = C_NotImplemented, +}; + +static const TCGOutOpQemuLdSt2 outop_qemu_st2 = { + .base.static_constraint = C_NotImplemented, +}; + +static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2) +{ + return false; +} + +static void tcg_out_set_borrow(TCGContext *s) +{ + g_assert_not_reached(); +} + +static void tcg_out_set_carry(TCGContext *s) +{ + g_assert_not_reached(); +} + +/* Generate global QEMU prologue and epilogue code. */ +static inline void tcg_target_qemu_prologue(TCGContext *s) +{ +} + +static const uint8_t mod_1[] = { + 0x0, 0x61, 0x73, 0x6d, /* magic */ + 0x01, 0x0, 0x0, 0x0, /* version */ + + 0x01, /* type section */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placehodler for size */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placehodler for num of types vec */ + 0x60, /* 0: Type of "start" function */ + 0x01, PTR_TYPE, /* arg: ctx pointer */ + 0x01, PTR_TYPE, /* return: res */ + 0x60, /* 1: Type of the asyncify helper */ + 0x0, /* no argument */ + 0x01, 0x7f, /* return: res (i32) */ +}; + +#define MOD_1_PH_TYPE_SECTION_SIZE_OFF 9 +#define MOD_1_PH_TYPE_VEC_NUM_OFF 14 + +static const uint8_t mod_2[] = { + 0x02, /* import section */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placehodler for size */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placehodler for imports num */ + 0x03, 0x65, 0x6e, 0x76, /* module: "env" */ + 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, /* name: "memory" */ +#if defined(WASM64_MEMORY64_2) + /* 32bit memory is used for Emscripten's "-sMEMORY64=2" configuration. */ + 0x02, 0x03, /* shared mem */ + 0x00, 0x80, 0x80, 0x04, /* min: 0, max: 65536 pages */ +#else + /* + * 64bit memory is used for Emscripten's "-sMEMORY64=1" configuration. + * Note: the maximum 64bit memory size of the engine implementations is + * limited to 262144 pages(16GiB) + * https://webassembly.github.io/memory64/js-api/#limits + */ + 0x02, 0x07, /* shared mem(64bit) */ + 0x00, 0x80, 0x80, 0x10, /* min: 0, max: 262144 pages */ +#endif + 0x06, 0x68, 0x65, 0x6c, 0x70, 0x65, 0x72, /* module: "helper" */ + 0x01, 0x75, /* name: "u" */ + 0x00, 0x01, /* func type 1 */ +}; + +#define MOD_2_PH_IMPORT_SECTION_SIZE_OFF 1 +#define MOD_2_PH_IMPORT_VEC_NUM_OFF 6 + +static const uint8_t mod_3[] = { + 0x03, /* function section */ + 2, 1, 0x00, /* function type 0 */ + + 0x06, /* global section */ + 86, /* section size */ + 17, /* num of global vars */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + 0x7e, 0x01, 0x42, 0x00, 0x0b, /* 0-cleared 64bit var */ + + 0x07, /* export section */ + 13, /* size of section */ + 1, /* num of funcs */ + 0x05, 0x73, 0x74, 0x61, 0x72, 0x74, /* "start" function */ + 0x00, 0x80, 0x80, 0x80, 0x80, 0x00, /* placeholder for func index*/ + + 0x0a, /* code section */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placeholder for section size*/ + 1, /* num of codes */ + 0x80, 0x80, 0x80, 0x80, 0x00, /* placeholder for code size */ + 0x2, 0x1, 0x7f, 0x2, 0x7e, /* local variables (32bit*1, 64bit*2) */ +}; + +#define MOD_3_PH_EXPORT_START_FUNC_IDX 102 +#define MOD_3_PH_CODE_SECTION_SIZE_OFF 108 +#define MOD_3_PH_CODE_SIZE_OFF 114 +#define MOD_3_VARIABLES_SIZE 5 +#define MOD_3_CODE_SECTION_SIZE_ADD 11 + +static void fill_uint32_leb128(uint8_t *b, uint32_t v) +{ + do { + *b |= v & 0x7f; + v >>= 7; + b++; + } while (v != 0); +} + +typedef struct FillValueU32 { + int64_t offset; + uint32_t value; +} FillValueU32; + +static int write_mod(TCGContext *s, const uint8_t mod[], int len, + FillValueU32 values[], int values_len) +{ + void *base = s->code_ptr; + + if (unlikely(((void *)s->code_ptr + len) + > s->code_gen_highwater)) { + return -1; + } + + memcpy(s->code_ptr, mod, len); + s->code_ptr += len; + + for (int i = 0; i < values_len; i++) { + fill_uint32_leb128(base + values[i].offset, values[i].value); + } + + return 0; +} + +static int write_mod_code(TCGContext *s) +{ + void *base = s->code_ptr; + int code_size = sub_buf_len(); + BlockPlaceholder *e; + + if (unlikely(((void *)s->code_ptr + code_size) > s->code_gen_highwater)) { + return -1; + } + linked_buf_write(&sub_buf, s->code_ptr); + s->code_ptr += code_size; + + QSIMPLEQ_FOREACH(e, &block_placeholder, entry) { + uint8_t *ph = e->pos + base; + int blk = get_block_of_label(e->label); + tcg_debug_assert(blk >= 0); + fill_uint32_leb128(ph, blk); + } + + return 0; +} + +static void tcg_out_tb_start(TCGContext *s) +{ + int size; + intptr_t ofs; + struct WasmTBHeader *h; + + init_sub_buf(); + init_blocks(); + init_label_info(); + init_helpers(); + init_types_buf(); + init_imports_buf(); + + /* TB starts from a header */ + h = (struct WasmTBHeader *)(s->code_ptr); + s->code_ptr += sizeof(struct WasmTBHeader); + + /* locate counters */ + h->counter_ptr = (int32_t *)s->code_ptr; + size = tcg_max_ctxs * sizeof(int32_t); + memset(s->code_ptr, 0, size); + s->code_ptr += size; + + /* locate the instance information */ + h->info_ptr = (struct WasmInstanceInfo **)s->code_ptr; + size = tcg_max_ctxs * sizeof(void *); + memset(s->code_ptr, 0, size); + s->code_ptr += size; + + /* Followed by TCI code */ + h->tci_ptr = s->code_ptr; + + /* Initialize fundamental registers */ + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); + tcg_wasm_out_op(s, OPC_I64_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(env)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_AREG0)); + + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(stack)); + tcg_wasm_out_op_ldst(s, OPC_I64_LOAD, 0, ofs); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_CALL_STACK)); + tcg_wasm_out_op(s, OPC_END); + + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(do_init)); + tcg_wasm_out_op_ldst(s, OPC_I32_LOAD, 0, ofs); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 0); + tcg_wasm_out_op(s, OPC_I32_NE); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(do_init)); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 0); + tcg_wasm_out_op_ldst(s, OPC_I32_STORE, 0, ofs); + tcg_wasm_out_op(s, OPC_END); + + tcg_wasm_out_op_block(s, OPC_LOOP, BLOCK_NORET); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, BLOCK_IDX); + tcg_wasm_out_op(s, OPC_I64_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); +} + +static int tcg_out_tb_end(TCGContext *s) +{ + int res; + struct WasmTBHeader *h = (struct WasmTBHeader *)(s->code_buf); + + tcg_wasm_out_op(s, OPC_END); /* end if */ + tcg_wasm_out_op(s, OPC_END); /* end loop */ + tcg_wasm_out_op(s, OPC_UNREACHABLE); + tcg_wasm_out_op(s, OPC_END); /* end func */ + + /* write wasm blob */ + h->wasm_ptr = s->code_ptr; + + res = write_mod(s, mod_1, sizeof(mod_1), (FillValueU32[]) { + { + MOD_1_PH_TYPE_SECTION_SIZE_OFF, + linked_buf_len(&types_buf) + + sizeof(mod_1) - MOD_1_PH_TYPE_VEC_NUM_OFF + }, + { + MOD_1_PH_TYPE_VEC_NUM_OFF, + HELPER_IDX_START + helpers_len() + 1/* start */ + }, + }, 2); + if (res < 0) { + return res; + } + s->code_ptr += linked_buf_write(&types_buf, s->code_ptr); + + res = write_mod(s, mod_2, sizeof(mod_2), (FillValueU32[]) { + { + MOD_2_PH_IMPORT_SECTION_SIZE_OFF, + linked_buf_len(&imports_buf) + + sizeof(mod_2) - MOD_2_PH_IMPORT_VEC_NUM_OFF + }, + { + MOD_2_PH_IMPORT_VEC_NUM_OFF, + HELPER_IDX_START + helpers_len() + 1/* memory */ + }, + }, 2); + if (res < 0) { + return res; + } + s->code_ptr += linked_buf_write(&imports_buf, s->code_ptr); + + res = write_mod(s, mod_3, sizeof(mod_3), (FillValueU32[]) { + { + MOD_3_PH_EXPORT_START_FUNC_IDX, + HELPER_IDX_START + helpers_len() + }, + { + MOD_3_PH_CODE_SECTION_SIZE_OFF, + sub_buf_len() + MOD_3_CODE_SECTION_SIZE_ADD + }, + { + MOD_3_PH_CODE_SIZE_OFF, + sub_buf_len() + MOD_3_VARIABLES_SIZE + }, + }, 3); + if (res < 0) { + return res; + } + + res = write_mod_code(s); + if (res < 0) { + return res; + } + h->wasm_size = (intptr_t)s->code_ptr - (intptr_t)h->wasm_ptr; + + /* record imported helper functions */ + if (unlikely(((void *)s->code_ptr + helpers_len() * 4) + > s->code_gen_highwater)) { + return -1; + } + h->import_ptr = s->code_ptr; + s->code_ptr += helpers_write_to_array((intptr_t *)s->code_ptr); + h->import_size = (intptr_t)s->code_ptr - (intptr_t)h->import_ptr; + + return 0; +} + +static void tcg_target_init(TCGContext *s) +{ + /* The current code uses uint8_t for tcg operations. */ + tcg_debug_assert(tcg_op_defs_max <= UINT8_MAX); + + /* Registers available for 32 bit operations. */ + tcg_target_available_regs[TCG_TYPE_I32] = BIT(TCG_TARGET_NB_REGS) - 1; + /* Registers available for 64 bit operations. */ + tcg_target_available_regs[TCG_TYPE_I64] = BIT(TCG_TARGET_NB_REGS) - 1; + /* + * The TCI "registers" are in the local stack frame and + * cannot be clobbered by the called helper functions. Additionally, Wasm + * modules for a TB and QEMU itself (i.e. helpers) are separated so also + * those variables aren't clobbered by the called helper functions. + * However, the TB assumes a 128-bit return value and assigns to + * the return value registers. + */ + tcg_target_call_clobber_regs = + MAKE_64BIT_MASK(TCG_REG_R0, 128 / TCG_TARGET_REG_BITS); + + s->reserved_regs = 0; + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK); + + /* The call arguments come first, followed by the temp storage. */ + tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, + TCG_STATIC_FRAME_SIZE); +} diff --git a/tcg/wasm/tcg-target.h b/tcg/wasm/tcg-target.h new file mode 100644 index 0000000000000..f00761d19fc01 --- /dev/null +++ b/tcg/wasm/tcg-target.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Tiny Code Generator for QEMU + * + * Based on tci/tcg-target.h + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef TCG_TARGET_H +#define TCG_TARGET_H + +#define TCG_TARGET_INSN_UNIT_SIZE 1 +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) + +/* Number of registers available. */ +#define TCG_TARGET_NB_REGS 16 + +/* List of registers which are used by TCG. */ +typedef enum { + TCG_REG_R0 = 0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R15, + + TCG_REG_TMP = TCG_REG_R13, + TCG_AREG0 = TCG_REG_R14, + TCG_REG_CALL_STACK = TCG_REG_R15, +} TCGReg; + +#endif /* TCG_TARGET_H */ diff --git a/tests/docker/dockerfiles/emsdk-wasm32-cross.docker b/tests/docker/dockerfiles/emsdk-wasm-cross.docker similarity index 85% rename from tests/docker/dockerfiles/emsdk-wasm32-cross.docker rename to tests/docker/dockerfiles/emsdk-wasm-cross.docker index 60a7d02f5613e..4b41be62ab864 100644 --- a/tests/docker/dockerfiles/emsdk-wasm32-cross.docker +++ b/tests/docker/dockerfiles/emsdk-wasm-cross.docker @@ -1,14 +1,17 @@ # syntax = docker/dockerfile:1.5 -ARG EMSDK_VERSION_QEMU=3.1.50 +ARG EMSDK_VERSION_QEMU=4.0.10 ARG ZLIB_VERSION=1.3.1 ARG GLIB_MINOR_VERSION=2.84 ARG GLIB_VERSION=${GLIB_MINOR_VERSION}.0 ARG PIXMAN_VERSION=0.44.2 -ARG FFI_VERSION=v3.4.7 +ARG FFI_VERSION=v3.5.2 ARG MESON_VERSION=1.5.0 +ARG TARGET_CPU=wasm32 +ARG WASM64_MEMORY64=0 -FROM emscripten/emsdk:$EMSDK_VERSION_QEMU AS build-base +FROM emscripten/emsdk:$EMSDK_VERSION_QEMU AS build-base-common +ARG TARGET_CPU ARG MESON_VERSION ENV TARGET=/builddeps/target ENV CPATH="$TARGET/include" @@ -33,8 +36,8 @@ RUN < /cross.meson [host_machine] system = 'emscripten' -cpu_family = 'wasm32' -cpu = 'wasm32' +cpu_family = '${TARGET_CPU}' +cpu = '${TARGET_CPU}' endian = 'little' [binaries] @@ -46,6 +49,16 @@ pkgconfig = ['pkg-config', '--static'] EOT EOF +FROM build-base-common AS build-base-wasm32 + +FROM build-base-common AS build-base-wasm64 +ARG WASM64_MEMORY64 +ENV CFLAGS="$CFLAGS -sMEMORY64=${WASM64_MEMORY64}" +ENV CXXFLAGS="$CXXFLAGS -sMEMORY64=${WASM64_MEMORY64}" +ENV LDFLAGS="$LDFLAGS -sMEMORY64=${WASM64_MEMORY64}" + +FROM build-base-${TARGET_CPU} AS build-base + FROM build-base AS zlib-dev ARG ZLIB_VERSION RUN mkdir -p /zlib @@ -56,17 +69,19 @@ RUN emconfigure ./configure --prefix=$TARGET --static RUN emmake make install -j$(nproc) FROM build-base AS libffi-dev +ARG TARGET_CPU +ARG WASM64_MEMORY64 ARG FFI_VERSION RUN mkdir -p /libffi RUN git clone https://github.com/libffi/libffi /libffi WORKDIR /libffi RUN git checkout $FFI_VERSION RUN autoreconf -fiv -RUN emconfigure ./configure --host=wasm32-unknown-linux \ +RUN emconfigure ./configure --host=${TARGET_CPU}-unknown-linux \ --prefix=$TARGET --enable-static \ --disable-shared --disable-dependency-tracking \ --disable-builddir --disable-multi-os-directory \ - --disable-raw-api --disable-docs + --disable-raw-api --disable-docs WASM64_MEMORY64=${WASM64_MEMORY64} RUN emmake make install SUBDIRS='include' -j$(nproc) FROM build-base AS pixman-dev