From 3aa56c51a6b5cec5e492b6ce2119c5db44f308e4 Mon Sep 17 00:00:00 2001 From: Yuta Saito Date: Thu, 6 Nov 2025 01:04:56 +0000 Subject: [PATCH] [clang][WebAssembly] Return aggregate values indirectly in swiftcc by default The Swift calling convention on Wasm has historically returned aggregate values directly at the LLVM IR level due to the use of the generic `SwiftABIInfo` implementation. The direct return at LLVM IR level will cause unnecessary stack allocation and memory copies for each aggregate return value at ISel/MI time, which misses lots of optimization opportunities. This change provides a SwiftABIInfo hook implementation for Wasm targets to limit the number of scalar return values up to 1 by default, which will use sret for aggregate return values. Also adds basic support for the experimental multivalue ABI. --- clang/lib/CodeGen/ABIInfo.cpp | 30 ++++--- clang/lib/CodeGen/ABIInfo.h | 3 + clang/lib/CodeGen/Targets/WebAssembly.cpp | 35 +++++++- .../CodeGen/WebAssembly/wasm-return-swiftcc.c | 82 +++++++++++++++++++ 4 files changed, 137 insertions(+), 13 deletions(-) create mode 100644 clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c diff --git a/clang/lib/CodeGen/ABIInfo.cpp b/clang/lib/CodeGen/ABIInfo.cpp index 59b2b17788380..e1788edff2bd2 100644 --- a/clang/lib/CodeGen/ABIInfo.cpp +++ b/clang/lib/CodeGen/ABIInfo.cpp @@ -247,6 +247,22 @@ ABIInfo::getOptimalVectorMemoryType(llvm::FixedVectorType *T, // Pin the vtable to this file. SwiftABIInfo::~SwiftABIInfo() = default; +void SwiftABIInfo::countOccupiedRegisters(ArrayRef scalarTypes, + unsigned &intCount, unsigned &fpCount, + unsigned maxIntRegisterBitWidth) { + for (llvm::Type *type : scalarTypes) { + if (type->isPointerTy()) { + intCount++; + } else if (auto *intTy = dyn_cast(type)) { + intCount += (intTy->getBitWidth() + maxIntRegisterBitWidth - 1) / + maxIntRegisterBitWidth; + } else { + assert(type->isVectorTy() || type->isFloatingPointTy()); + fpCount++; + } + } +} + /// Does the given lowering require more than the given number of /// registers when expanded? /// @@ -262,18 +278,10 @@ SwiftABIInfo::~SwiftABIInfo() = default; /// return registers. bool SwiftABIInfo::occupiesMoreThan(ArrayRef scalarTypes, unsigned maxAllRegisters) const { + // Use the pointer width as the maximum integer register bit width by default. + unsigned ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default); unsigned intCount = 0, fpCount = 0; - for (llvm::Type *type : scalarTypes) { - if (type->isPointerTy()) { - intCount++; - } else if (auto intTy = dyn_cast(type)) { - auto ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default); - intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth; - } else { - assert(type->isVectorTy() || type->isFloatingPointTy()); - fpCount++; - } - } + countOccupiedRegisters(scalarTypes, intCount, fpCount, ptrWidth); return (intCount + fpCount > maxAllRegisters); } diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index b253696f502d8..b3e86b37b6dcb 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -141,6 +141,9 @@ class SwiftABIInfo { CodeGenTypes &CGT; bool SwiftErrorInRegister; + static void countOccupiedRegisters(ArrayRef scalarTypes, + unsigned &intCount, unsigned &fpCount, + unsigned maxIntRegisterBitWidth); bool occupiesMoreThan(ArrayRef scalarTypes, unsigned maxAllRegisters) const; diff --git a/clang/lib/CodeGen/Targets/WebAssembly.cpp b/clang/lib/CodeGen/Targets/WebAssembly.cpp index 9217c78a540a3..da1e1d9960ad0 100644 --- a/clang/lib/CodeGen/Targets/WebAssembly.cpp +++ b/clang/lib/CodeGen/Targets/WebAssembly.cpp @@ -45,13 +45,44 @@ class WebAssemblyABIInfo final : public ABIInfo { AggValueSlot Slot) const override; }; +class WebAssemblySwiftABIInfo final : public SwiftABIInfo { + WebAssemblyABIKind Kind; + +public: + explicit WebAssemblySwiftABIInfo(CodeGen::CodeGenTypes &CGT, + WebAssemblyABIKind K) + : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false), Kind(K) {} + + bool shouldPassIndirectly(ArrayRef ComponentTys, + bool AsReturnValue) const override { + unsigned maxIntRegisterBitWidth = 64; + unsigned intCount = 0, fpCount = 0; + countOccupiedRegisters(ComponentTys, intCount, fpCount, + maxIntRegisterBitWidth); + + if (AsReturnValue) { + if (Kind == WebAssemblyABIKind::ExperimentalMV) { + // If the experimental multivalue ABI is enabled, try to return up to 2 + // values for each of int and fp, which is a very conservative value + // based on the number of available physical gp return registers used in + // the major engines to minimize stack spills at JIT time. + return intCount > 2 || fpCount > 2; + } + // By default, limit to 1 total register. + return (intCount + fpCount > 1); + } + // For an argument, limit to 4 total registers, which is the default limit + // used by the default SwiftABIInfo implementation. + return (intCount + fpCount > 4); + } +}; + class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo { public: explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, WebAssemblyABIKind K) : TargetCodeGenInfo(std::make_unique(CGT, K)) { - SwiftInfo = - std::make_unique(CGT, /*SwiftErrorInRegister=*/false); + SwiftInfo = std::make_unique(CGT, K); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, diff --git a/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c b/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c new file mode 100644 index 0000000000000..d1fb9add8e7bf --- /dev/null +++ b/clang/test/CodeGen/WebAssembly/wasm-return-swiftcc.c @@ -0,0 +1,82 @@ +// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -emit-llvm -target-abi experimental-mv -o - | FileCheck %s -check-prefix=EXPERIMENTAL-MV + +typedef struct { + int aa; + int bb; +} s1; + +// Multiple-element structs should be returned through sret. +// CHECK: define swiftcc void @return_s1(ptr dead_on_unwind noalias writable sret(%struct.s1) align 4 %agg.result) +// EXPERIMENTAL-MV: define swiftcc i64 @return_s1() +__attribute__((swiftcall)) +s1 return_s1(void) { + s1 foo; + return foo; +} + +typedef struct { + int cc; +} s2; + +// Single-element structs should be returned directly. +// CHECK: define swiftcc i32 @return_s2() +// EXPERIMENTAL-MV: define swiftcc i32 @return_s2() +__attribute__((swiftcall)) +s2 return_s2(void) { + s2 foo; + return foo; +} + +typedef struct { + char c1[4]; +} s3; + +// CHECK: define swiftcc i32 @return_s3() +// EXPERIMENTAL-MV: define swiftcc i32 @return_s3() +__attribute__((swiftcall)) +s3 return_s3(void) { + s3 foo; + return foo; +} + +typedef struct { + int bf1 : 4; + int bf2 : 3; + int bf3 : 8; +} s4; + +// CHECK: define swiftcc i16 @return_s4() +// EXPERIMENTAL-MV: define swiftcc i16 @return_s4() +__attribute__((swiftcall)) +s4 return_s4(void) { + s4 foo; + return foo; +} + +// Single-element structs fitting in a i64 should be returned directly. +typedef struct { + long long v; +} s5; + +// CHECK: define swiftcc i64 @return_s5() +// EXPERIMENTAL-MV: define swiftcc i64 @return_s5() +__attribute__((swiftcall)) +s5 return_s5(void) { + s5 foo; + return foo; +} + +// Multiple-element structs not fitting in a i64 +typedef struct { + long long v1; + long long v2; +} s6; + +// CHECK: define swiftcc void @return_s6(ptr dead_on_unwind noalias writable sret(%struct.s6) align 8 %agg.result) +// EXPERIMENTAL-MV: define swiftcc { i64, i64 } @return_s6() +__attribute__((swiftcall)) +s6 return_s6(void) { + s6 foo; + return foo; +}