Skip to content

Commit 3aa56c5

Browse files
[clang][WebAssembly] Return aggregate values indirectly in swiftcc by default
The Swift calling convention on Wasm has historically returned aggregate values directly at the LLVM IR level due to the use of the generic `SwiftABIInfo` implementation. The direct return at LLVM IR level will cause unnecessary stack allocation and memory copies for each aggregate return value at ISel/MI time, which misses lots of optimization opportunities. This change provides a SwiftABIInfo hook implementation for Wasm targets to limit the number of scalar return values up to 1 by default, which will use sret for aggregate return values. Also adds basic support for the experimental multivalue ABI.
1 parent 18ffb87 commit 3aa56c5

File tree

4 files changed

+137
-13
lines changed

4 files changed

+137
-13
lines changed

clang/lib/CodeGen/ABIInfo.cpp

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,22 @@ ABIInfo::getOptimalVectorMemoryType(llvm::FixedVectorType *T,
247247
// Pin the vtable to this file.
248248
SwiftABIInfo::~SwiftABIInfo() = default;
249249

250+
void SwiftABIInfo::countOccupiedRegisters(ArrayRef<llvm::Type *> scalarTypes,
251+
unsigned &intCount, unsigned &fpCount,
252+
unsigned maxIntRegisterBitWidth) {
253+
for (llvm::Type *type : scalarTypes) {
254+
if (type->isPointerTy()) {
255+
intCount++;
256+
} else if (auto *intTy = dyn_cast<llvm::IntegerType>(type)) {
257+
intCount += (intTy->getBitWidth() + maxIntRegisterBitWidth - 1) /
258+
maxIntRegisterBitWidth;
259+
} else {
260+
assert(type->isVectorTy() || type->isFloatingPointTy());
261+
fpCount++;
262+
}
263+
}
264+
}
265+
250266
/// Does the given lowering require more than the given number of
251267
/// registers when expanded?
252268
///
@@ -262,18 +278,10 @@ SwiftABIInfo::~SwiftABIInfo() = default;
262278
/// return registers.
263279
bool SwiftABIInfo::occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
264280
unsigned maxAllRegisters) const {
281+
// Use the pointer width as the maximum integer register bit width by default.
282+
unsigned ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
265283
unsigned intCount = 0, fpCount = 0;
266-
for (llvm::Type *type : scalarTypes) {
267-
if (type->isPointerTy()) {
268-
intCount++;
269-
} else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
270-
auto ptrWidth = CGT.getTarget().getPointerWidth(LangAS::Default);
271-
intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
272-
} else {
273-
assert(type->isVectorTy() || type->isFloatingPointTy());
274-
fpCount++;
275-
}
276-
}
284+
countOccupiedRegisters(scalarTypes, intCount, fpCount, ptrWidth);
277285

278286
return (intCount + fpCount > maxAllRegisters);
279287
}

clang/lib/CodeGen/ABIInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,9 @@ class SwiftABIInfo {
141141
CodeGenTypes &CGT;
142142
bool SwiftErrorInRegister;
143143

144+
static void countOccupiedRegisters(ArrayRef<llvm::Type *> scalarTypes,
145+
unsigned &intCount, unsigned &fpCount,
146+
unsigned maxIntRegisterBitWidth);
144147
bool occupiesMoreThan(ArrayRef<llvm::Type *> scalarTypes,
145148
unsigned maxAllRegisters) const;
146149

clang/lib/CodeGen/Targets/WebAssembly.cpp

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,44 @@ class WebAssemblyABIInfo final : public ABIInfo {
4545
AggValueSlot Slot) const override;
4646
};
4747

48+
class WebAssemblySwiftABIInfo final : public SwiftABIInfo {
49+
WebAssemblyABIKind Kind;
50+
51+
public:
52+
explicit WebAssemblySwiftABIInfo(CodeGen::CodeGenTypes &CGT,
53+
WebAssemblyABIKind K)
54+
: SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/false), Kind(K) {}
55+
56+
bool shouldPassIndirectly(ArrayRef<llvm::Type *> ComponentTys,
57+
bool AsReturnValue) const override {
58+
unsigned maxIntRegisterBitWidth = 64;
59+
unsigned intCount = 0, fpCount = 0;
60+
countOccupiedRegisters(ComponentTys, intCount, fpCount,
61+
maxIntRegisterBitWidth);
62+
63+
if (AsReturnValue) {
64+
if (Kind == WebAssemblyABIKind::ExperimentalMV) {
65+
// If the experimental multivalue ABI is enabled, try to return up to 2
66+
// values for each of int and fp, which is a very conservative value
67+
// based on the number of available physical gp return registers used in
68+
// the major engines to minimize stack spills at JIT time.
69+
return intCount > 2 || fpCount > 2;
70+
}
71+
// By default, limit to 1 total register.
72+
return (intCount + fpCount > 1);
73+
}
74+
// For an argument, limit to 4 total registers, which is the default limit
75+
// used by the default SwiftABIInfo implementation.
76+
return (intCount + fpCount > 4);
77+
}
78+
};
79+
4880
class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
4981
public:
5082
explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
5183
WebAssemblyABIKind K)
5284
: TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {
53-
SwiftInfo =
54-
std::make_unique<SwiftABIInfo>(CGT, /*SwiftErrorInRegister=*/false);
85+
SwiftInfo = std::make_unique<WebAssemblySwiftABIInfo>(CGT, K);
5586
}
5687

5788
void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -emit-llvm -o - | FileCheck %s
2+
// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -emit-llvm -target-abi experimental-mv -o - | FileCheck %s -check-prefix=EXPERIMENTAL-MV
3+
4+
typedef struct {
5+
int aa;
6+
int bb;
7+
} s1;
8+
9+
// Multiple-element structs should be returned through sret.
10+
// CHECK: define swiftcc void @return_s1(ptr dead_on_unwind noalias writable sret(%struct.s1) align 4 %agg.result)
11+
// EXPERIMENTAL-MV: define swiftcc i64 @return_s1()
12+
__attribute__((swiftcall))
13+
s1 return_s1(void) {
14+
s1 foo;
15+
return foo;
16+
}
17+
18+
typedef struct {
19+
int cc;
20+
} s2;
21+
22+
// Single-element structs should be returned directly.
23+
// CHECK: define swiftcc i32 @return_s2()
24+
// EXPERIMENTAL-MV: define swiftcc i32 @return_s2()
25+
__attribute__((swiftcall))
26+
s2 return_s2(void) {
27+
s2 foo;
28+
return foo;
29+
}
30+
31+
typedef struct {
32+
char c1[4];
33+
} s3;
34+
35+
// CHECK: define swiftcc i32 @return_s3()
36+
// EXPERIMENTAL-MV: define swiftcc i32 @return_s3()
37+
__attribute__((swiftcall))
38+
s3 return_s3(void) {
39+
s3 foo;
40+
return foo;
41+
}
42+
43+
typedef struct {
44+
int bf1 : 4;
45+
int bf2 : 3;
46+
int bf3 : 8;
47+
} s4;
48+
49+
// CHECK: define swiftcc i16 @return_s4()
50+
// EXPERIMENTAL-MV: define swiftcc i16 @return_s4()
51+
__attribute__((swiftcall))
52+
s4 return_s4(void) {
53+
s4 foo;
54+
return foo;
55+
}
56+
57+
// Single-element structs fitting in a i64 should be returned directly.
58+
typedef struct {
59+
long long v;
60+
} s5;
61+
62+
// CHECK: define swiftcc i64 @return_s5()
63+
// EXPERIMENTAL-MV: define swiftcc i64 @return_s5()
64+
__attribute__((swiftcall))
65+
s5 return_s5(void) {
66+
s5 foo;
67+
return foo;
68+
}
69+
70+
// Multiple-element structs not fitting in a i64
71+
typedef struct {
72+
long long v1;
73+
long long v2;
74+
} s6;
75+
76+
// CHECK: define swiftcc void @return_s6(ptr dead_on_unwind noalias writable sret(%struct.s6) align 8 %agg.result)
77+
// EXPERIMENTAL-MV: define swiftcc { i64, i64 } @return_s6()
78+
__attribute__((swiftcall))
79+
s6 return_s6(void) {
80+
s6 foo;
81+
return foo;
82+
}

0 commit comments

Comments
 (0)