diff --git a/doc/running.html b/doc/running.html
index f71eee42f6..56d4c7bfbe 100644
--- a/doc/running.html
+++ b/doc/running.html
@@ -299,9 +299,9 @@
-O[level]
| recunroll | 2 | Min. unroll factor for true recursion |
-| sizemcode | 32 | Size of each machine code area in KBytes (Windows: 64K) |
+sizemcode | 64 | Size of each machine code area in KBytes |
-| maxmcode | 512 | Max. total size of all machine code areas in KBytes |
+maxmcode | 2048 | Max. total size of all machine code areas in KBytes |
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
index 8b27e9625c..db3adb4845 100644
--- a/dynasm/dasm_arm64.lua
+++ b/dynasm/dasm_arm64.lua
@@ -244,6 +244,10 @@ local map_cond = {
hs = 2, lo = 3,
}
+local map_bti = {
+ c = 0x40, j = 0x80, jc = 0xc0,
+}
+
------------------------------------------------------------------------------
local parse_reg_type
@@ -475,6 +479,12 @@ local function parse_cond(expr, inv)
return shl(bit.bxor(c, inv), 12)
end
+local function parse_map(expr, map)
+ local x = map[expr]
+ if not x then werror("bad operand") end
+ return x
+end
+
local function parse_load(params, nparams, n, op)
if params[n+2] then werror("too many operands") end
local scale = shr(op, 30)
@@ -823,11 +833,21 @@ map_op = {
tbz_3 = "36000000DTBw|36000000DTBx",
tbnz_3 = "37000000DTBw|37000000DTBx",
+ -- Branch Target Identification.
+ bti_1 = "d503241ft",
+
-- ARM64e: Pointer authentication codes (PAC).
blraaz_1 = "d63f081fNx",
+ blrabz_1 = "d63f0c1fNx",
braa_2 = "d71f0800NDx",
+ brab_2 = "d71f0c00NDx",
braaz_1 = "d61f081fNx",
+ brabz_1 = "d61f0c1fNx",
+ paciasp_0 = "d503233f",
pacibsp_0 = "d503237f",
+ autiasp_0 = "d50323bf",
+ autibsp_0 = "d50323ff",
+ retaa_0 = "d65f0bff",
retab_0 = "d65f0fff",
-- Miscellaneous instructions.
@@ -996,6 +1016,8 @@ local function parse_template(params, template, nparams, pos)
op = op + parse_cond(q, 0); n = n + 1
elseif p == "c" then
op = op + parse_cond(q, 1); n = n + 1
+ elseif p == "t" then
+ op = op + parse_map(q, map_bti); n = n + 1
else
assert(false)
diff --git a/src/Makefile b/src/Makefile
index 4a56d1e8e5..e657af1343 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -302,6 +302,9 @@ endif
ifneq (,$(INSTALL_LJLIBD))
TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\"
endif
+ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1))
+ TARGET_XCFLAGS+= -fno-strict-float-cast-overflow
+endif
##############################################################################
# Target system detection.
@@ -354,6 +357,9 @@ else
ifeq (GNU/kFreeBSD,$(TARGET_SYS))
TARGET_XLIBS+= -ldl
endif
+ ifeq (GNU,$(TARGET_SYS))
+ TARGET_XLIBS+= -ldl
+ endif
endif
endif
endif
@@ -440,6 +446,14 @@ ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D PAUTH
TARGET_ARCH+= -DLJ_ABI_PAUTH=1
endif
+ifneq (,$(findstring LJ_ABI_BRANCH_TRACK 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D BRANCH_TRACK
+ TARGET_ARCH+= -DLJ_ABI_BRANCH_TRACK=1
+endif
+ifneq (,$(findstring LJ_ABI_SHADOW_STACK 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D SHADOW_STACK
+ TARGET_ARCH+= -DLJ_ABI_SHADOW_STACK=1
+endif
DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH))))
ifeq (Windows,$(TARGET_SYS))
DASM_AFLAGS+= -D WIN
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index a30a34b6be..e4ca19779d 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -465,9 +465,11 @@ typedef struct {
mach_segment_command_64 seg;
mach_section_64 sec;
mach_symtab_command sym;
+} mach_obj_64;
+typedef struct {
mach_nlist_64 sym_entry;
uint8_t space[4096];
-} mach_obj_64;
+} mach_obj_64_tail;
]]
local symname = '_'..LJBC_PREFIX..ctx.modname
local cputype, cpusubtype = 0x01000007, 3
@@ -479,7 +481,10 @@ typedef struct {
-- Create Mach-O object and fill in header.
local o = ffi.new("mach_obj_64")
- local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8)
+ local t = ffi.new("mach_obj_64_tail")
+ local ofs_bc = ffi.sizeof(o)
+ local sz_bc = aligned(#s, 8)
+ local ofs_sym = ofs_bc + sz_bc
-- Fill in sections and symbols.
o.hdr.magic = 0xfeedfacf
@@ -491,7 +496,7 @@ typedef struct {
o.seg.cmd = 0x19
o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)
o.seg.vmsize = #s
- o.seg.fileoff = mach_size
+ o.seg.fileoff = ofs_bc
o.seg.filesize = #s
o.seg.maxprot = 1
o.seg.initprot = 1
@@ -499,22 +504,23 @@ typedef struct {
ffi.copy(o.sec.sectname, "__data")
ffi.copy(o.sec.segname, "__DATA")
o.sec.size = #s
- o.sec.offset = mach_size
+ o.sec.offset = ofs_bc
o.sym.cmd = 2
o.sym.cmdsize = ffi.sizeof(o.sym)
- o.sym.symoff = ffi.offsetof(o, "sym_entry")
+ o.sym.symoff = ofs_sym
o.sym.nsyms = 1
- o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)
+ o.sym.stroff = ofs_sym + ffi.offsetof(t, "space")
o.sym.strsize = aligned(#symname+2, 8)
- o.sym_entry.type = 0xf
- o.sym_entry.sect = 1
- o.sym_entry.strx = 1
- ffi.copy(o.space+1, symname)
+ t.sym_entry.type = 0xf
+ t.sym_entry.sect = 1
+ t.sym_entry.strx = 1
+ ffi.copy(t.space+1, symname)
-- Write Mach-O object file.
local fp = savefile(output, "wb")
- fp:write(ffi.string(o, mach_size))
- bcsave_tail(fp, output, s)
+ fp:write(ffi.string(o, ofs_bc))
+ fp:write(s, ("\0"):rep(sz_bc - #s))
+ bcsave_tail(fp, output, ffi.string(t, ffi.offsetof(t, "space") + o.sym.strsize))
end
local function bcsave_obj(ctx, output, s)
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua
index 4457aac080..facc6e4a1e 100644
--- a/src/jit/dis_arm64.lua
+++ b/src/jit/dis_arm64.lua
@@ -695,7 +695,10 @@ local map_br = { -- Branches, exception generating and system instructions.
},
{ -- System instructions.
shift = 0, mask = 0x3fffff,
- [0x03201f] = "nop"
+ [0x03201f] = "nop",
+ [0x03245f] = "bti c",
+ [0x03249f] = "bti j",
+ [0x0324df] = "bti jc",
},
{ -- Unconditional branch, register.
shift = 0, mask = 0xfffc1f,
@@ -920,7 +923,7 @@ local function disass_ins(ctx)
elseif p == "B" then
local addr = ctx.addr + pos + parse_immpc(op, name)
ctx.rel = addr
- x = "0x"..tohex(addr)
+ x = format("0x%08x", addr)
elseif p == "T" then
x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31))
elseif p == "V" then
@@ -1171,6 +1174,9 @@ local function disass_ins(ctx)
end
end
second0 = true
+ elseif p == " " then
+ operands[#operands+1] = pat:match(" (.*)")
+ break
else
assert(false)
end
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index b1de0eeae1..6b04ee8495 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -122,7 +122,7 @@ local map_opc2 = {
"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
"movhpsXmr||movhpdXmr",
"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
-"hintnopVm","hintnopVm","hintnopVm","hintnopVm",
+"hintnopVm","hintnopVm","endbr*hintnopVm","hintnopVm",
--2x
"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
"movapsXrm||movapdXrm",
@@ -804,6 +804,24 @@ map_act = {
return dispatch(ctx, map_opcvm[ctx.mrm])
end,
+ -- Special NOP for endbr64/endbr32.
+ endbr = function(ctx, name, pat)
+ if ctx.rep then
+ local pos = ctx.pos
+ local b = byte(ctx.code, pos)
+ local text
+ if b == 0xfa then text = "endbr64"
+ elseif b == 0xfb then text = "endbr64"
+ end
+ if text then
+ ctx.pos = pos + 1
+ ctx.rep = nil
+ return putop(ctx, text)
+ end
+ end
+ return dispatch(ctx, pat)
+ end,
+
-- Floating point opcode dispatch.
fp = function(ctx, name, pat)
local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
diff --git a/src/lib_jit.c b/src/lib_jit.c
index fd8e585b83..0f75c5ac64 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -479,12 +479,21 @@ static int jitopt_param(jit_State *J, const char *str)
size_t len = *(const uint8_t *)lst;
lj_assertJ(len != 0, "bad JIT_P_STRING");
if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
- int32_t n = 0;
+ uint32_t n = 0;
const char *p = &str[len+1];
while (*p >= '0' && *p <= '9')
n = n*10 + (*p++ - '0');
- if (*p) return 0; /* Malformed number. */
- J->param[i] = n;
+ if (*p || (int32_t)n < 0) return 0; /* Malformed number. */
+ if (i == JIT_P_sizemcode) { /* Adjust to required range here. */
+#if LJ_TARGET_JUMPRANGE
+ uint32_t maxkb = ((1 << (LJ_TARGET_JUMPRANGE - 10)) - 64);
+#else
+ uint32_t maxkb = ((1 << (31 - 10)) - 64);
+#endif
+ n = (n + (LJ_PAGESIZE >> 10) - 1) & ~((LJ_PAGESIZE >> 10) - 1);
+ if (n > maxkb) n = maxkb;
+ }
+ J->param[i] = (int32_t)n;
if (i == JIT_P_hotloop)
lj_dispatch_init_hotcount(J2G(J));
return 1; /* Ok. */
@@ -714,7 +723,16 @@ static void jit_init(lua_State *L)
jit_State *J = L2J(L);
J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
memcpy(J->param, jit_param_default, sizeof(J->param));
+#if LJ_TARGET_UNALIGNED
+ G(L)->tmptv.u64 = U64x(0000504d,4d500000);
+#endif
lj_dispatch_update(G(L));
+#if LJ_TARGET_UNALIGNED
+ /* If you get a crash below then your toolchain indicates unaligned
+ ** accesses are OK, but your kernel disagrees. I.e. fix your toolchain.
+ */
+ if (*(uint32_t *)((char *)&G(L)->tmptv + 2) != 0x504d4d50u) L->top = NULL;
+#endif
}
#endif
diff --git a/src/lj_arch.h b/src/lj_arch.h
index a4eecf27e0..5f3880680b 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -96,6 +96,9 @@
#elif defined(__QNX__)
#define LJ_TARGET_QNX 1
#define LUAJIT_OS LUAJIT_OS_POSIX
+#elif defined(__GNU__)
+#define LJ_TARGET_HURD 1
+#define LUAJIT_OS LUAJIT_OS_POSIX
#else
#define LUAJIT_OS LUAJIT_OS_OTHER
#endif
@@ -216,6 +219,29 @@
#error "macOS requires GC64 -- don't disable it"
#endif
+#if !defined(LJ_ABI_BRANCH_TRACK) && (__CET__ & 1) && \
+ LJ_TARGET_GC64 && defined(LUAJIT_ENABLE_CET_BR)
+/*
+** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT).
+** This is not enabled by default because it causes a notable slowdown of
+** the interpreter on all x64 CPUs, whether they have CET enabled or not.
+** If your toolchain enables -fcf-protection=branch by default, you need
+** to build with: make amalg XCFLAGS=-DLUAJIT_ENABLE_CET_BR
+*/
+#define LJ_ABI_BRANCH_TRACK 1
+#endif
+
+#if !defined(LJ_ABI_SHADOW_STACK) && (__CET__ & 2)
+/*
+** Control-Flow Enforcement Technique (CET) shadow stack (CET-SS).
+** It has no code overhead and doesn't cause any slowdowns when unused.
+** It can also be unconditionally enabled since all code already follows
+** a strict CALL to RET correspondence for performance reasons (all modern
+** CPUs use a (non-enforcing) shadow stack for return branch prediction).
+*/
+#define LJ_ABI_SHADOW_STACK 1
+#endif
+
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
#define LJ_ARCH_NAME "arm"
@@ -262,6 +288,11 @@
#if !defined(LJ_ABI_PAUTH) && defined(__arm64e__)
#define LJ_ABI_PAUTH 1
#endif
+#if !defined(LJ_ABI_BRANCH_TRACK) && (__ARM_FEATURE_BTI_DEFAULT & 1) && \
+ defined(LUAJIT_ENABLE_CET_BR)
+/* See comments about LUAJIT_ENABLE_CET_BR above. */
+#define LJ_ABI_BRANCH_TRACK 1
+#endif
#define LJ_TARGET_ARM64 1
#define LJ_TARGET_EHRETREG 0
#define LJ_TARGET_EHRAREG 30
@@ -270,8 +301,13 @@
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_TARGET_GC64 1
+#define LJ_PAGESIZE 16384
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+#if __ARM_FEATURE_UNALIGNED
+#define LJ_TARGET_UNALIGNED 1
+#endif
+
#define LJ_ARCH_VERSION 80
#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
@@ -425,7 +461,7 @@
#define LJ_TARGET_MIPS 1
#define LJ_TARGET_EHRETREG 4
#define LJ_TARGET_EHRAREG 31
-#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
+#define LJ_TARGET_JUMPRANGE 28 /* 2^28 = 256MB-aligned region */
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
diff --git a/src/lj_asm.c b/src/lj_asm.c
index fec4351251..0e888c294a 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -93,6 +93,10 @@ typedef struct ASMState {
MCode *invmcp; /* Points to invertible loop branch (or NULL). */
MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
MCode *realign; /* Realign loop if not NULL. */
+ MCode *mctail; /* Tail of trace before stack adjust + jmp. */
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64
+ MCode *mcexit; /* Pointer to exit stubs. */
+#endif
#ifdef LUAJIT_RANDOM_RA
/* Randomize register allocation. OK for fuzz testing, not for production. */
@@ -2541,7 +2545,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
RA_DBGX((as, "===== STOP ====="));
/* General trace setup. Emit tail of trace. */
- asm_tail_prep(as);
+ asm_tail_prep(as, T->link);
as->mcloop = NULL;
as->flagmcp = NULL;
as->topslot = 0;
@@ -2586,6 +2590,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
asm_head_side(as);
else
asm_head_root(as);
+#if LJ_ABI_BRANCH_TRACK
+ emit_branch_track(as);
+#endif
asm_phi_fixup(as);
if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index de435057e1..406360d26a 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -79,18 +79,43 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
/* Generate an exit stub group at the bottom of the reserved MCode memory. */
static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
{
+ ExitNo i;
+ int ind = 0;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mcbot;
- int i;
- if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop)
+ if (mxp + 6+EXITSTUBS_PER_GROUP >= as->mctop)
asm_mclimit(as);
- /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */
- *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP);
- *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu);
- mxp++;
+ if ((((target - mxp - 2) + 0x00800000u) >> 24) == 0) {
+ /* str lr, [sp]; bl ->vm_exit_handler;
+ ** .long DISPATCH_address, group.
+ */
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp = ARMI_BL | ((target - mxp - 2) & 0x00ffffffu); mxp++;
+ } else if ((as->flags & JIT_F_ARMV6T2)) {
+ /*
+ ** str lr, [sp]; movw/movt lr, vm_exit_handler; blx lr;
+ ** .long DISPATCH_address, group;
+ */
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mxp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mxp++ = ARMI_BLXr | ARMF_M(RID_LR);
+ ind = 2;
+ } else {
+ /* .long vm_exit_handler;
+ ** str lr, [sp]; ldr lr, [pc, #-16]; blx lr;
+ ** .long DISPATCH_address, group;
+ */
+ *mxp++ = (MCode)target;
+ *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP);
+ *mxp++ = ARMI_LDRL | ARMF_D(RID_LR) | 16;
+ *mxp++ = ARMI_BLXr | ARMF_M(RID_LR);
+ ind = 1;
+ }
*mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */
*mxp++ = group*EXITSTUBS_PER_GROUP;
for (i = 0; i < EXITSTUBS_PER_GROUP; i++)
- *mxp++ = ARMI_B|((-6-i)&0x00ffffffu);
+ *mxp++ = ARMI_B | ((-6-ind-i) & 0x00ffffffu);
lj_mcode_sync(as->mcbot, mxp);
lj_mcode_commitbot(as->J, mxp);
as->mcbot = mxp;
@@ -1927,7 +1952,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
} else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
as->curins--; /* Always skip the loword min/max. */
if (uselo || usehi)
- asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
+ asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS);
return;
#elif LJ_HASFFI
} else if ((ir-1)->o == IR_CONV) {
@@ -2210,33 +2235,46 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
- MCode *target;
+ MCode *target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- as->mctop = --p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
uint32_t k = emit_isk12(ARMI_ADD, spadj);
lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
- p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ *mcp++ = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
+ }
+ if ((((target - mcp - 2) + 0x00800000u) >> 24) == 0) {
+ *mcp = ARMI_B | ((target - mcp - 2) & 0x00ffffffu); mcp++;
+ } else if ((as->flags & JIT_F_ARMV6T2)) {
+ *mcp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mcp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR);
+ *mcp++ = ARMI_BX | ARMF_M(RID_LR);
+ } else {
+ *mcp++ = ARMI_LDRL | ARMI_LS_U | ARMF_D(RID_LR) | 0;
+ *mcp++ = ARMI_BX | ARMF_M(RID_LR);
+ *mcp++ = (MCode)target;
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu);
+ while (as->mctop > mcp) *--as->mctop = ARMI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-1; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ MCode *target = (MCode *)(void *)lj_vm_exit_interp;
+ if ((((target - p - 2) + 0x00800000u) >> 24) ||
+ (((target - p - 1) + 0x00800000u) >> 24)) p -= 2;
+ }
+ p--; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
*p = 0; /* Prevent load/store merging. */
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
index 4feaa3b0c2..fdcff1db24 100644
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -51,15 +51,27 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
{
ExitNo i;
+ int ind;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
asm_mclimit(as);
- /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
+ ind = !A64F_S_OK(target - (mxp - nexits - 2), 26);
+ /* !ind: 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno;
+ ** ind: 1: str lr,[sp]; ldr lr, [gl, K64_VXH]; blr lr; movz w0,traceno;
+ ** bl <1; bl <1; ...
+ */
for (i = nexits-1; (int32_t)i >= 0; i--)
- *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
+ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-ind-i));
+ as->mcexit = mxp;
*--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
- mxp--;
- *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
+ if (ind) {
+ *--mxp = A64I_LE(A64I_BLR_AUTH | A64F_N(RID_LR));
+ *--mxp = A64I_LE(A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]) >> 3));
+ } else {
+ mxp--;
+ *mxp = A64I_LE(A64I_BL | A64F_S26(target-mxp));
+ }
*--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
as->mctop = mxp;
}
@@ -67,7 +79,7 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
{
/* Keep this in-sync with exitstub_trace_addr(). */
- return as->mctop + exitno + 3;
+ return as->mcexit + exitno;
}
/* Emit conditional branch to exit for guard. */
@@ -1044,7 +1056,8 @@ static void asm_fstore(ASMState *as, IRIns *ir)
static void asm_xload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
+ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
+ "unaligned XLOAD");
asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
}
@@ -1917,34 +1930,42 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
+ MCode *mcp = as->mctail;
MCode *target;
/* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
- if (spadj == 0) {
- *--p = A64I_LE(A64I_NOP);
- as->mctop = p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
uint32_t k = emit_isk12(spadj);
lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
- p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
+ *mcp++ = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = A64I_B | A64F_S26((target-p)+1);
+ /* Emit exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ if (lnk || A64F_S_OK(target - mcp, 26)) {
+ *mcp = A64I_B | A64F_S26(target - mcp); mcp++;
+ } else {
+ *mcp++ = A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]) >> 3);
+ *mcp++ = A64I_BR_AUTH | A64F_N(RID_LR);
+ }
+ while (as->mctop > mcp) *--as->mctop = A64I_LE(A64I_NOP);
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-1; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ MCode *target = (MCode *)(void *)lj_vm_exit_interp;
+ if (!A64F_S_OK(target - p, 26) || !A64F_S_OK(target - (p+1), 26)) p--;
+ }
+ p--; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
*p = 0; /* Prevent load/store merging. */
}
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index af0e714f15..8dadabe4a0 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -92,13 +92,23 @@ static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
/* Setup exit stub after the end of each trace. */
static void asm_exitstub_setup(ASMState *as)
{
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
- *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
- *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
- lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0,
- "branch target out of range");
- *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
+ *--mxp = MIPSI_LI | MIPSF_T(RID_TMP) | as->T->traceno;
+ if (((uintptr_t)(mxp-1) ^ target) >> 28 == 0) {
+ /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
+ *--mxp = MIPSI_J | ((target >> 2) & 0x03ffffffu);
+ } else {
+ /* sw TMP, 0(sp); li TMP, K*_VXH(jgl); jr TMP ; li TMP, traceno */
+ *--mxp = MIPSI_JR | MIPSF_S(RID_TMP);
+ *--mxp = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) |
+#if LJ_64
+ jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]);
+#else
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]);
+#endif
+ }
+ *--mxp = MIPSI_SW | MIPSF_T(RID_TMP) | MIPSF_S(RID_SP) | 0;
as->mctop = mxp;
}
@@ -428,7 +438,8 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
{
/* The modified regs must match with the *.dasc implementation. */
RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
- RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
+ RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)|
+ RID2RSET(RID_CFUNCADDR)
#if LJ_TARGET_MIPSR6
|RID2RSET(RID_F21)
#endif
@@ -514,7 +525,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
{
/* The modified regs must match with the *.dasc implementation. */
RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
- RID2RSET(RID_R1)|RID2RSET(RID_R12);
+ RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_CFUNCADDR);
if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
ra_evictset(as, drop);
/* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
@@ -2699,18 +2710,37 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
+ uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- MCode *p = as->mctop-1;
- *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
- p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
+ if (((uintptr_t)mcp ^ target) >> 28 == 0) {
+ *mcp++ = MIPSI_J | ((target >> 2) & 0x03ffffffu);
+ } else {
+ *mcp++ = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) |
+#if LJ_64
+ jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]);
+#else
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]);
+#endif
+ *mcp++ = MIPSI_JR | MIPSF_S(RID_TMP);
+ }
+ *mcp++ = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
- as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */
- as->invmcp = as->loopref ? as->mcp : NULL;
+ as->mcp = as->mctop - 2; /* Leave room for branch plus nop or stack adj. */
+ if (as->loopref) {
+ as->invmcp = as->mcp;
+ } else {
+ if (!lnk) {
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp;
+ if (((uintptr_t)as->mcp ^ target) >> 28 != 0) as->mcp--;
+ }
+ as->invmcp = NULL;
+ }
+ as->mctail = as->mcp;
}
/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index df1ac42f7a..d77c45ce9b 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -48,23 +48,38 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
{
ExitNo i;
+ int ind;
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler;
MCode *mxp = as->mctop;
- if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+ if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim)
asm_mclimit(as);
- /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */
+ ind = ((target - (uintptr_t)(mxp - nexits - 2) + 0x02000000u) >> 26) ? 2 : 0;
+ /* !ind: 1: mflr r0; bl ->vm_exit_handler; li r0, traceno;
+ ** ind: 1: lwz r0, K32_VXH(jgl); mtctr r0; mflr r0; bctrl; li r0, traceno;
+ ** bl <1; bl <1; ...
+ */
for (i = nexits-1; (int32_t)i >= 0; i--)
- *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2);
+ *--mxp = PPCI_BL | (((-3-ind-i) & 0x00ffffffu) << 2);
+ as->mcexit = mxp;
*--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */
- mxp--;
- *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2);
- *--mxp = PPCI_MFLR|PPCF_T(RID_TMP);
+ if (ind) {
+ *--mxp = PPCI_BCTRL;
+ *--mxp = PPCI_MFLR | PPCF_T(RID_TMP);
+ *--mxp = PPCI_MTCTR | PPCF_T(RID_TMP);
+ *--mxp = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) |
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]);
+ } else {
+ mxp--;
+ *mxp = PPCI_BL | ((target - (uintptr_t)mxp) & 0x03fffffcu);
+ *--mxp = PPCI_MFLR | PPCF_T(RID_TMP);
+ }
as->mctop = mxp;
}
static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
{
/* Keep this in-sync with exitstub_trace_addr(). */
- return as->mctop + exitno + 3;
+ return as->mcexit + exitno;
}
/* Emit conditional branch to exit for guard. */
@@ -2218,34 +2233,43 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
/* Fixup the tail code. */
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
- MCode *p = as->mctop;
- MCode *target;
+ uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp;
+ MCode *mcp = as->mctail;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- *--p = PPCI_NOP;
- *--p = PPCI_NOP;
- as->mctop = p;
- } else {
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
- p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
- p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
+ *mcp++ = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
+ *mcp++ = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2);
+ /* Emit exit branch. */
+ if ((((target - (uintptr_t)mcp) + 0x02000000u) >> 26) == 0) {
+ *mcp = PPCI_B | ((target - (uintptr_t)mcp) & 0x03fffffcu); mcp++;
+ } else {
+ *mcp++ = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) |
+ jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]);
+ *mcp++ = PPCI_MTCTR | PPCF_T(RID_TMP);
+ *mcp++ = PPCI_BCTR;
+ }
+ while (as->mctop > mcp) *--as->mctop = PPCI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop - 1; /* Leave room for exit branch. */
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- as->mcp = p-2; /* Leave room for stack pointer adjustment. */
+ if (!lnk) {
+ uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp;
+ if ((((target - (uintptr_t)p) + 0x02000000u) >> 26) ||
+ (((target - (uintptr_t)(p-2)) + 0x02000000u) >> 26)) p -= 2;
+ }
+ p -= 2; /* Leave room for stack pointer adjustment. */
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 936ff438fd..f3c2238a2f 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -9,9 +9,12 @@
static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
{
ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
+ MCode *target = (MCode *)(void *)lj_vm_exit_handler;
MCode *mxp = as->mcbot;
MCode *mxpstart = mxp;
- if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop)
+ if (mxp + ((2+2)*EXITSTUBS_PER_GROUP +
+ (LJ_GC64 ? 0 : 8) +
+ (LJ_64 ? 6 : 5)) >= as->mctop)
asm_mclimit(as);
/* Push low byte of exitno for each exit stub. */
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
@@ -30,8 +33,13 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
*(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
#endif
/* Jump to exit handler which fills in the ExitState. */
- *mxp++ = XI_JMP; mxp += 4;
- *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler);
+ if (jmprel_ok(mxp + 5, target)) { /* Direct jump. */
+ *mxp++ = XI_JMP; mxp += 4;
+ *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, target);
+ } else { /* RIP-relative indirect jump. */
+ *mxp++ = XI_GROUP5; *mxp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mxp += 4;
+ *((int32_t *)(mxp-4)) = (int32_t)((group ? as->J->exitstubgroup[0] : mxpstart) - 8 - mxp);
+ }
/* Commit the code for this group (even if assembly fails later on). */
lj_mcode_commitbot(as->J, mxp);
as->mcbot = mxp;
@@ -45,6 +53,16 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
ExitNo i;
if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
lj_trace_err(as->J, LJ_TRERR_SNAPOV);
+#if LJ_64
+ if (as->J->exitstubgroup[0] == NULL) {
+ /* Store the two potentially out-of-range targets below group 0. */
+ MCode *mxp = as->mcbot;
+ while ((uintptr_t)mxp & 7) *mxp++ = XI_INT3;
+ *((void **)mxp) = (void *)lj_vm_exit_interp; mxp += 8;
+ *((void **)mxp) = (void *)lj_vm_exit_handler; mxp += 8;
+ as->mcbot = mxp; /* Don't bother to commit, done in asm_exitstub_gen. */
+ }
+#endif
for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
if (as->J->exitstubgroup[i] == NULL)
as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
@@ -396,7 +414,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
"bad interned 64 bit constant");
} else {
while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
- *(uint64_t*)as->mcbot = *k;
+ *(uint64_t *)as->mcbot = *k;
ir->i = (int32_t)(as->mctop - as->mcbot);
as->mcbot += 8;
as->mclim = as->mcbot + MCLIM_REDZONE;
@@ -728,7 +746,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
p = (MCode *)(void *)ir_k64(irf)->u64;
else
p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i;
- if (p - as->mcp == (int32_t)(p - as->mcp))
+ if (jmprel_ok(p, as->mcp))
return p; /* Call target is still in +-2GB range. */
/* Avoid the indirect case of emit_call(). Try to hoist func addr. */
}
@@ -2084,7 +2102,8 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
RegSet allow = RSET_GPR;
Reg dest, right;
int32_t k = 0;
- if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */
+ if (as->flagmcp == as->mcp && xa != XOg_X_IMUL) {
+ /* Drop test r,r instruction. */
MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2);
MCode *q = p[0] == 0x0f ? p+1 : p;
if ((*q & 15) < 14) {
@@ -2805,6 +2824,8 @@ static void asm_gc_check(ASMState *as)
emit_rr(as, XO_TEST, RID_RET, RID_RET);
args[0] = ASMREF_TMP1; /* global_State *g */
args[1] = ASMREF_TMP2; /* MSize steps */
+ /* Insert nop to simplify GC exit recognition in lj_asm_patchexit. */
+ if (!jmprel_ok(as->mcp, (MCode *)(void *)ci->func)) *--as->mcp = XI_NOP;
asm_gencall(as, ci, args);
tmp = ra_releasetmp(as, ASMREF_TMP1);
#if LJ_GC64
@@ -2918,40 +2939,36 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp)
static void asm_tail_fixup(ASMState *as, TraceNo lnk)
{
/* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
- MCode *p = as->mctop;
- MCode *target, *q;
+ MCode *mcp = as->mctail;
+ MCode *target;
int32_t spadj = as->T->spadjust;
- if (spadj == 0) {
- p -= LJ_64 ? 7 : 6;
- } else {
- MCode *p1;
- /* Patch stack adjustment. */
+ if (spadj) { /* Emit stack adjustment. */
+ if (LJ_64) *mcp++ = 0x48;
if (checki8(spadj)) {
- p -= 3;
- p1 = p-6;
- *p1 = (MCode)spadj;
+ *mcp++ = XI_ARITHi8;
+ *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP);
+ *mcp++ = (MCode)spadj;
} else {
- p1 = p-9;
- *(int32_t *)p1 = spadj;
+ *mcp++ = XI_ARITHi;
+ *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP);
+ *(int32_t *)mcp = spadj; mcp += 4;
}
-#if LJ_64
- p1[-3] = 0x48;
-#endif
- p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
- p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
}
- /* Patch exit branch. */
- target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- *(int32_t *)(p-4) = jmprel(as->J, p, target);
- p[-5] = XI_JMP;
+ /* Emit exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp;
+ if (lnk || jmprel_ok(mcp + 5, target)) { /* Direct jump. */
+ *mcp++ = XI_JMP; mcp += 4;
+ *(int32_t *)(mcp-4) = jmprel(as->J, mcp, target);
+ } else { /* RIP-relative indirect jump. */
+ *mcp++ = XI_GROUP5; *mcp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mcp += 4;
+ *((int32_t *)(mcp-4)) = (int32_t)(as->J->exitstubgroup[0] - 16 - mcp);
+ }
/* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
- for (q = as->mctop-1; q >= p; q--)
- *q = XI_NOP;
- as->mctop = p;
+ while (as->mctop > mcp) *--as->mctop = XI_NOP;
}
/* Prepare tail of code. */
-static void asm_tail_prep(ASMState *as)
+static void asm_tail_prep(ASMState *as, TraceNo lnk)
{
MCode *p = as->mctop;
/* Realign and leave room for backwards loop branch or exit branch. */
@@ -2963,15 +2980,17 @@ static void asm_tail_prep(ASMState *as)
as->mctop = p;
p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */
} else {
- p -= 5; /* Space for exit branch (near jmp). */
+ p -= (LJ_64 && !lnk) ? 6 : 5; /* Space for exit branch. */
}
if (as->loopref) {
as->invmcp = as->mcp = p;
} else {
- /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
- as->mcp = p - (LJ_64 ? 7 : 6);
+ /* Leave room for ESP adjustment: add esp, imm */
+ p -= LJ_64 ? 7 : 6;
+ as->mcp = p;
as->invmcp = NULL;
}
+ as->mctail = p;
}
/* -- Trace setup --------------------------------------------------------- */
@@ -3131,6 +3150,10 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
} else if (*p == XI_CALL &&
(void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) {
pgc = p+7; /* Do not patch GC check exit. */
+ } else if (LJ_64 && *p == 0xff &&
+ p[1] == MODRM(XM_REG, XOg_CALL, RID_RET) &&
+ p[2] == XI_NOP) {
+ pgc = p+5; /* Do not patch GC check exit. */
}
}
lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
diff --git a/src/lj_bc.h b/src/lj_bc.h
index a94ea4e4a8..53b3e50123 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -259,6 +259,11 @@ static LJ_AINLINE int bc_isret(BCOp op)
return (op == BC_RETM || op == BC_RET || op == BC_RET0 || op == BC_RET1);
}
+static LJ_AINLINE int bc_isret_or_tail(BCOp op)
+{
+ return (op == BC_CALLMT || op == BC_CALLT || bc_isret(op));
+}
+
LJ_DATA const uint16_t lj_bc_mode[];
LJ_DATA const uint16_t lj_bc_ofs[];
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 37e909b391..5570952208 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -191,7 +191,7 @@ static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t)
} else if (tp == BCDUMP_KTAB_NUM) {
o->u32.lo = bcread_uleb128(ls);
o->u32.hi = bcread_uleb128(ls);
- } else if (tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */
+ } else if (t && tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */
settabV(ls->L, o, t);
} else {
lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
@@ -209,13 +209,13 @@ static GCtab *bcread_ktab(LexState *ls)
MSize i;
TValue *o = tvref(t->array);
for (i = 0; i < narray; i++, o++)
- bcread_ktabk(ls, o, t);
+ bcread_ktabk(ls, o, NULL);
}
if (nhash) { /* Read hash entries. */
MSize i;
for (i = 0; i < nhash; i++) {
TValue key;
- bcread_ktabk(ls, &key, t);
+ bcread_ktabk(ls, &key, NULL);
lj_assertLS(!tvisnil(&key), "nil key");
bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t);
}
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index ae69cd28d1..d5f092ea5e 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -781,17 +781,24 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
{
CTSize sz = ct->size;
unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
- while (ct->sib) {
+ while (ct->sib && n <= 4) {
+ unsigned int m = 1;
CType *sct;
ct = ctype_get(cts, ct->sib);
if (ctype_isfield(ct->info)) {
sct = ctype_rawchild(cts, ct);
+ if (ctype_isarray(sct->info)) {
+ CType *cct = ctype_rawchild(cts, sct);
+ if (!cct->size) continue;
+ m = sct->size / cct->size;
+ sct = cct;
+ }
if (ctype_isfp(sct->info)) {
r |= sct->size;
- if (!isu) n++; else if (n == 0) n = 1;
+ if (!isu) n += m; else if (n < m) n = m;
} else if (ctype_iscomplex(sct->info)) {
r |= (sct->size >> 1);
- if (!isu) n += 2; else if (n < 2) n = 2;
+ if (!isu) n += 2*m; else if (n < 2*m) n = 2*m;
} else if (ctype_isstruct(sct->info)) {
goto substruct;
} else {
@@ -803,10 +810,11 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
sct = ctype_rawchild(cts, ct);
substruct:
if (sct->size > 0) {
- unsigned int s = ccall_classify_struct(cts, sct);
+ unsigned int s = ccall_classify_struct(cts, sct), sn;
if (s <= 1) goto noth;
r |= (s & 255);
- if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+ sn = (s >> 8) * m;
+ if (!isu) n += sn; else if (n < sn) n = sn;
}
}
}
@@ -893,7 +901,9 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
/* -- Common C call handling ---------------------------------------------- */
-/* Infer the destination CTypeID for a vararg argument. */
+/* Infer the destination CTypeID for a vararg argument.
+** Note: may reallocate cts->tab and invalidate CType pointers.
+*/
CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o)
{
if (tvisnumber(o)) {
@@ -921,13 +931,16 @@ CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o)
}
}
-/* Setup arguments for C call. */
+/* Setup arguments for C call.
+** Note: may reallocate cts->tab and invalidate CType pointers.
+*/
static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
CCallState *cc)
{
int gcsteps = 0;
TValue *o, *top = L->top;
CTypeID fid;
+ CTInfo info = ct->info; /* lj_ccall_ctid_vararg may invalidate ct pointer. */
CType *ctr;
MSize maxgpr, ngpr = 0, nsp = 0, narg;
#if CCALL_NARG_FPR
@@ -946,7 +959,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
#if LJ_TARGET_X86
/* x86 has several different calling conventions. */
cc->resx87 = 0;
- switch (ctype_cconv(ct->info)) {
+ switch (ctype_cconv(info)) {
case CTCC_FASTCALL: maxgpr = 2; break;
case CTCC_THISCALL: maxgpr = 1; break;
default: maxgpr = 0; break;
@@ -963,7 +976,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
} else if (ctype_iscomplex(ctr->info) || ctype_isstruct(ctr->info)) {
/* Preallocate cdata object and anchor it after arguments. */
CTSize sz = ctr->size;
- GCcdata *cd = lj_cdata_new(cts, ctype_cid(ct->info), sz);
+ GCcdata *cd = lj_cdata_new(cts, ctype_cid(info), sz);
void *dp = cdataptr(cd);
setcdataV(L, L->top++, cd);
if (ctype_isstruct(ctr->info)) {
@@ -986,7 +999,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
}
#if LJ_TARGET_ARM64 && LJ_ABI_WIN
- if ((ct->info & CTF_VARARG)) {
+ if ((info & CTF_VARARG)) {
nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */
ngpr = maxgpr;
nfpr = CCALL_NARG_FPR;
@@ -1007,7 +1020,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
lj_assertL(ctype_isfield(ctf->info), "field expected");
did = ctype_cid(ctf->info);
} else {
- if (!(ct->info & CTF_VARARG))
+ if (!(info & CTF_VARARG))
lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too many arguments. */
did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */
isva = 1;
@@ -1178,11 +1191,11 @@ int lj_ccall_func(lua_State *L, GCcdata *cd)
ct = ctype_rawchild(cts, ct);
}
if (ctype_isfunc(ct->info)) {
+ CTypeID id = ctype_typeid(cts, ct);
CCallState cc;
int gcsteps, ret;
cc.func = (void (*)(void))cdata_getptr(cdataptr(cd), sz);
gcsteps = ccall_set_args(L, cts, ct, &cc);
- ct = (CType *)((intptr_t)ct-(intptr_t)cts->tab);
cts->cb.slot = ~0u;
lj_vm_ffi_call(&cc);
if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */
@@ -1190,7 +1203,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd)
tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000);
setboolV(lj_tab_set(L, cts->miscmap, &tv), 1);
}
- ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */
+ ct = ctype_get(cts, id); /* Table may have been reallocated. */
gcsteps += ccall_get_results(L, cts, ct, &cc, &ret);
#if LJ_TARGET_X86 && LJ_ABI_WIN
/* Automatically detect __stdcall and fix up C function declaration. */
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index d93dbc6457..c4b25cd7d1 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -34,22 +34,29 @@
#elif LJ_TARGET_X86ORX64
+#if LJ_ABI_BRANCH_TRACK
+#define CALLBACK_MCODE_SLOTSZ 8
+#else
+#define CALLBACK_MCODE_SLOTSZ 4
+#endif
+#define CALLBACK_MCODE_NSLOT (128 / CALLBACK_MCODE_SLOTSZ)
+
#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0)
#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
#define CALLBACK_SLOT2OFS(slot) \
- (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
+ (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/CALLBACK_MCODE_NSLOT) + CALLBACK_MCODE_SLOTSZ*(slot))
static MSize CALLBACK_OFS2SLOT(MSize ofs)
{
MSize group;
ofs -= CALLBACK_MCODE_HEAD;
- group = ofs / (32*4 + CALLBACK_MCODE_GROUP);
- return (ofs % (32*4 + CALLBACK_MCODE_GROUP))/4 + group*32;
+ group = ofs / (128 + CALLBACK_MCODE_GROUP);
+ return (ofs % (128 + CALLBACK_MCODE_GROUP))/CALLBACK_MCODE_SLOTSZ + group*CALLBACK_MCODE_NSLOT;
}
#define CALLBACK_MAX_SLOT \
- (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32)
+ (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+128))*CALLBACK_MCODE_NSLOT)
#elif LJ_TARGET_ARM
@@ -57,6 +64,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#elif LJ_TARGET_ARM64
+#if LJ_ABI_BRANCH_TRACK
+#define CALLBACK_MCODE_SLOTSZ 12
+#endif
+
#define CALLBACK_MCODE_HEAD 32
#elif LJ_TARGET_PPC
@@ -81,8 +92,11 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#endif
#ifndef CALLBACK_SLOT2OFS
-#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
+#ifndef CALLBACK_MCODE_SLOTSZ
+#define CALLBACK_MCODE_SLOTSZ 8
+#endif
+#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_SLOTSZ*(slot))
+#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/CALLBACK_MCODE_SLOTSZ)
#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
#endif
@@ -118,9 +132,13 @@ static void *callback_mcode_init(global_State *g, uint8_t *page)
*(void **)p = target; p += 8;
#endif
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+#if LJ_ABI_BRANCH_TRACK
+ *(uint32_t *)p = XI_ENDBR64; p += 4;
+#endif
/* mov al, slot; jmp group */
*p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot;
- if ((slot & 31) == 31 || slot == CALLBACK_MAX_SLOT-1) {
+ if ((slot & (CALLBACK_MCODE_NSLOT-1)) == (CALLBACK_MCODE_NSLOT-1) ||
+ slot == CALLBACK_MAX_SLOT-1) {
/* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
*p++ = XI_PUSH + RID_EBP;
*p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
@@ -140,7 +158,8 @@ static void *callback_mcode_init(global_State *g, uint8_t *page)
*p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4;
#endif
} else {
- *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2);
+ *p++ = XI_JMPs;
+ *p++ = (uint8_t)(CALLBACK_MCODE_SLOTSZ*(CALLBACK_MCODE_NSLOT-1-(slot&(CALLBACK_MCODE_NSLOT-1))) - 2);
}
}
return p;
@@ -181,6 +200,9 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
((void **)p)[1] = g;
p += 4;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+#if LJ_ABI_BRANCH_TRACK
+ *p++ = A64I_BTI_C;
+#endif
*p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
*p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
p++;
@@ -263,7 +285,7 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
#endif
/* Check for macOS hardened runtime. */
-#if LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000
+#if defined(LUAJIT_ENABLE_OSX_HRT) && LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000
#include
#define CCMAP_CREATE MAP_JIT
#else
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index f88cddfd95..27f2c1dd99 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -1101,12 +1101,15 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
crec_finalizer(J, trcd, 0, fin);
}
-/* Record argument conversions. */
+/* Record argument conversions.
+** Note: may reallocate cts->tab and invalidate CType pointers.
+*/
static TRef crec_call_args(jit_State *J, RecordFFData *rd,
CTState *cts, CType *ct)
{
TRef args[CCI_NARGS_MAX];
CTypeID fid;
+ CTInfo info = ct->info; /* lj_ccall_ctid_vararg may invalidate ct pointer. */
MSize i, n;
TRef tr, *base;
cTValue *o;
@@ -1115,9 +1118,9 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
TRef *arg0 = NULL, *arg1 = NULL;
#endif
int ngpr = 0;
- if (ctype_cconv(ct->info) == CTCC_THISCALL)
+ if (ctype_cconv(info) == CTCC_THISCALL)
ngpr = 1;
- else if (ctype_cconv(ct->info) == CTCC_FASTCALL)
+ else if (ctype_cconv(info) == CTCC_FASTCALL)
ngpr = 2;
#elif LJ_TARGET_ARM64 && LJ_TARGET_OSX
int ngpr = CCALL_NARG_GPR;
@@ -1144,7 +1147,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
lj_assertJ(ctype_isfield(ctf->info), "field expected");
did = ctype_cid(ctf->info);
} else {
- if (!(ct->info & CTF_VARARG))
+ if (!(info & CTF_VARARG))
lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */
#if LJ_TARGET_ARM64 && LJ_TARGET_OSX
if (ngpr >= 0) {
@@ -1248,14 +1251,17 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
{
CTState *cts = ctype_ctsG(J2G(J));
CType *ct = ctype_raw(cts, cd->ctypeid);
+ CTInfo info;
IRType tp = IRT_PTR;
if (ctype_isptr(ct->info)) {
tp = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32;
ct = ctype_rawchild(cts, ct);
}
- if (ctype_isfunc(ct->info)) {
+ info = ct->info; /* crec_call_args may invalidate ct pointer. */
+ if (ctype_isfunc(info)) {
TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR);
CType *ctr = ctype_rawchild(cts, ct);
+ CTInfo ctr_info = ctr->info; /* crec_call_args may invalidate ctr. */
IRType t = crec_ct2irt(cts, ctr);
TRef tr;
TValue tv;
@@ -1263,22 +1269,22 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000);
if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv)))
lj_trace_err(J, LJ_TRERR_BLACKL);
- if (ctype_isvoid(ctr->info)) {
+ if (ctype_isvoid(ctr_info)) {
t = IRT_NIL;
rd->nres = 0;
- } else if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info) ||
- ctype_isenum(ctr->info)) || t == IRT_CDATA) {
+ } else if (!(ctype_isnum(ctr_info) || ctype_isptr(ctr_info) ||
+ ctype_isenum(ctr_info)) || t == IRT_CDATA) {
lj_trace_err(J, LJ_TRERR_NYICALL);
}
- if ((ct->info & CTF_VARARG)
+ if ((info & CTF_VARARG)
#if LJ_TARGET_X86
- || ctype_cconv(ct->info) != CTCC_CDECL
+ || ctype_cconv(info) != CTCC_CDECL
#endif
)
func = emitir(IRT(IR_CARG, IRT_NIL), func,
lj_ir_kint(J, ctype_typeid(cts, ct)));
tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func);
- if (ctype_isbool(ctr->info)) {
+ if (ctype_isbool(ctr_info)) {
if (frame_islua(J->L->base-1) && bc_b(frame_pc(J->L->base-1)[-1]) == 1) {
/* Don't check result if ignored. */
tr = TREF_NIL;
@@ -1294,8 +1300,8 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
tr = TREF_TRUE;
}
} else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) ||
- t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr->info)) {
- TRef trid = lj_ir_kint(J, ctype_cid(ct->info));
+ t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr_info)) {
+ TRef trid = lj_ir_kint(J, ctype_cid(info));
tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr);
if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J);
} else if (t == IRT_FLOAT || t == IRT_U32) {
diff --git a/src/lj_debug.c b/src/lj_debug.c
index b3d52afc8f..f9392d8e6b 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -101,6 +101,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
pt = funcproto(fn);
pos = proto_bcpos(pt, ins) - 1;
#if LJ_HASJIT
+ if (pos == NO_BCPOS) return 1; /* Pretend it's the first bytecode. */
if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */
if (bc_isret(bc_op(ins[-1]))) {
GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index c60e7d7560..3e1eb64bfc 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -173,6 +173,11 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
return 0; /* Failed. */
}
+#define emit_movw_k(k) \
+ (ARMI_MOVW | ((k) & 0x0fffu) | (((k) & 0xf000u) << 4))
+#define emit_movt_k(k) \
+ (ARMI_MOVT | (((k) >> 16) & 0x0fffu) | ((((k) >> 16) & 0xf000u) << 4))
+
/* Load a 32 bit constant into a GPR. */
static void emit_loadi(ASMState *as, Reg rd, int32_t i)
{
@@ -184,13 +189,13 @@ static void emit_loadi(ASMState *as, Reg rd, int32_t i)
emit_d(as, ARMI_MOV^k, rd);
} else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
/* 16 bit loword constant for ARMv6T2. */
- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
+ emit_d(as, emit_movw_k(i), rd);
} else if (emit_kdelta1(as, rd, i)) {
/* One step delta relative to another constant. */
} else if ((as->flags & JIT_F_ARMV6T2)) {
/* 32 bit hiword/loword constant for ARMv6T2. */
- emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd);
- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
+ emit_d(as, emit_movt_k(i), rd);
+ emit_d(as, emit_movw_k(i), rd);
} else if (emit_kdelta2(as, rd, i)) {
/* Two step delta relative to another constant. */
} else {
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
index ca1269b7c3..a8be741562 100644
--- a/src/lj_emit_arm64.h
+++ b/src/lj_emit_arm64.h
@@ -409,6 +409,13 @@ static void emit_call(ASMState *as, ASMFunction target)
}
}
+#if LJ_ABI_BRANCH_TRACK
+static void emit_branch_track(ASMState *as)
+{
+ *--as->mcp = A64I_BTI_J;
+}
+#endif
+
/* -- Emit generic operations --------------------------------------------- */
/* Generic move between two regs. */
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index d8104959aa..d65b1c5777 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -80,6 +80,9 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
/* -- Emit loads/stores --------------------------------------------------- */
+#define jglofs(as, k) \
+ (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff)
+
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index b13f00fe5b..56928e4235 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -53,6 +53,9 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
/* -- Emit loads/stores --------------------------------------------------- */
+#define jglofs(as, k) \
+ (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff)
+
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index f477301162..858fe753be 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -70,6 +70,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
return p;
}
+#if LJ_ABI_BRANCH_TRACK
+static void emit_branch_track(ASMState *as)
+{
+ emit_u32(as, XI_ENDBR64);
+}
+#endif
+
/* op + modrm */
#define emit_opm(xo, mode, rr, rb, p, delta) \
(p[(delta)-1] = MODRM((mode), (rr), (rb)), \
@@ -471,6 +478,17 @@ static void emit_sfixup(ASMState *as, MCLabel source)
/* Return label pointing to current PC. */
#define emit_label(as) ((as)->mcp)
+/* Check if two adresses are in relative jump range. */
+static LJ_AINLINE int jmprel_ok(MCode *a, MCode *b)
+{
+#if LJ_64
+ return a - b == (int32_t)(a - b);
+#else
+ UNUSED(a); UNUSED(b);
+ return 1;
+#endif
+}
+
/* Compute relative 32 bit offset for jump and call instructions. */
static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
{
@@ -504,7 +522,7 @@ static void emit_call_(ASMState *as, MCode *target)
{
MCode *p = as->mcp;
#if LJ_64
- if (target-p != (int32_t)(target-p)) {
+ if (!jmprel_ok(target, p)) {
/* Assumes RID_RET is never an argument to calls and always clobbered. */
emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET);
emit_loadu64(as, RID_RET, (uint64_t)target);
diff --git a/src/lj_err.c b/src/lj_err.c
index 03b5030be6..e8e1875805 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -803,9 +803,17 @@ LJ_NOINLINE GCstr *lj_err_str(lua_State *L, ErrMsg em)
return lj_str_newz(L, err2msg(em));
}
+LJ_NORET LJ_NOINLINE static void lj_err_err(lua_State *L)
+{
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRERR));
+ lj_err_throw(L, LUA_ERRERR);
+}
+
/* Out-of-memory error. */
LJ_NOINLINE void lj_err_mem(lua_State *L)
{
+ if (L->status == LUA_ERRERR)
+ lj_err_err(L);
if (L->status == LUA_ERRERR+1) /* Don't touch the stack during lua_open. */
lj_vm_unwind_c(L->cframe, LUA_ERRMEM);
if (LJ_HASJIT) {
@@ -902,6 +910,8 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
/* Stack overflow error. */
void LJ_FASTCALL lj_err_stkov(lua_State *L)
{
+ if (L->status == LUA_ERRERR)
+ lj_err_err(L);
lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL);
lj_err_run(L);
}
diff --git a/src/lj_gc.c b/src/lj_gc.c
index d9581d20d3..c779d583e9 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -106,6 +106,7 @@ static void gc_mark_start(global_State *g)
setgcrefnull(g->gc.weak);
gc_markobj(g, mainthread(g));
gc_markobj(g, tabref(mainthread(g)->env));
+ gc_markobj(g, vmthread(g));
gc_marktv(g, &g->registrytv);
gc_mark_gcroot(g);
g->gc.state = GCSpropagate;
@@ -507,24 +508,25 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
uint8_t oldh = hook_save(g);
GCSize oldt = g->gc.threshold;
int errcode;
+ lua_State *VL = vmthread(g);
TValue *top;
lj_trace_abort(g);
hook_entergc(g); /* Disable hooks and new traces during __gc. */
if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
- top = L->top;
- copyTV(L, top++, mo);
+ top = VL->top;
+ copyTV(VL, top++, mo);
if (LJ_FR2) setnilV(top++);
- setgcV(L, top, o, ~o->gch.gct);
- L->top = top+1;
- errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
+ setgcV(VL, top, o, ~o->gch.gct);
+ VL->top = top+1;
+ errcode = lj_vm_pcall(VL, top, 1+0, -1); /* Stack: |mo|o| -> | */
+ setgcref(g->cur_L, obj2gco(L));
hook_restore(g, oldh);
if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
g->gc.threshold = oldt; /* Restore GC threshold. */
if (errcode) {
- ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */
- lj_vmevent_send(L, ERRFIN,
- copyTV(L, L->top++, restorestack(L, errobj));
+ lj_vmevent_send(g, ERRFIN,
+ copyTV(V, V->top++, L->top-1);
);
L->top--;
}
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 102ba0b4b7..05a8e9bbe9 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -104,14 +104,6 @@
/* -- JIT engine parameters ----------------------------------------------- */
-#if LJ_TARGET_WINDOWS || LJ_64
-/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */
-#define JIT_P_sizemcode_DEFAULT 64
-#else
-/* Could go as low as 4K, but the mmap() overhead would be rather high. */
-#define JIT_P_sizemcode_DEFAULT 32
-#endif
-
/* Optimization parameters and their defaults. Length is a char in octal! */
#define JIT_PARAMDEF(_) \
_(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
@@ -131,9 +123,9 @@
_(\011, recunroll, 2) /* Min. unroll for true recursion. */ \
\
/* Size of each machine code area (in KBytes). */ \
- _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \
+ _(\011, sizemcode, 64) \
/* Max. total size of all machine code areas (in KBytes). */ \
- _(\010, maxmcode, 512) \
+ _(\010, maxmcode, 2048) \
/* End of list. */
enum {
@@ -374,10 +366,14 @@ enum {
LJ_K64_2P63, /* 2^63 */
LJ_K64_M2P64, /* -2^64 */
#endif
+#endif
+#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
+ LJ_K64_VM_EXIT_HANDLER,
+ LJ_K64_VM_EXIT_INTERP,
#endif
LJ_K64__MAX,
};
-#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
+#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS)
enum {
#if LJ_TARGET_X86ORX64
@@ -393,6 +389,10 @@ enum {
#if LJ_TARGET_MIPS64
LJ_K32_2P63, /* 2^63 */
LJ_K32_M2P64, /* -2^64 */
+#endif
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
+ LJ_K32_VM_EXIT_HANDLER,
+ LJ_K32_VM_EXIT_INTERP,
#endif
LJ_K32__MAX
};
@@ -513,6 +513,7 @@ typedef struct jit_State {
MCode *mcbot; /* Bottom of current mcode area. */
size_t szmcarea; /* Size of current mcode area. */
size_t szallmcarea; /* Total size of all allocated mcode areas. */
+ uintptr_t mcmin, mcmax; /* Mcode allocation range. */
TValue errinfo; /* Additional info element for trace errors. */
diff --git a/src/lj_load.c b/src/lj_load.c
index 828bf8aea0..24b660a8b1 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -122,8 +122,9 @@ LUALIB_API int luaL_loadfilex(lua_State *L, const char *filename,
copyTV(L, L->top-1, L->top);
}
if (err) {
+ const char *fname = filename ? filename : "stdin";
L->top--;
- lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(err));
+ lua_pushfstring(L, "cannot read %s: %s", fname, strerror(err));
return LUA_ERRFILE;
}
return status;
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 43694226a6..c3032f4e2d 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -63,31 +63,46 @@ void lj_mcode_sync(void *start, void *end)
#if LJ_HASJIT
+#if LUAJIT_SECURITY_MCODE != 0
+/* Protection twiddling failed. Probably due to kernel security. */
+static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
+{
+ lua_CFunction panic = J2G(J)->panic;
+ if (panic) {
+ lua_State *L = J->L;
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
+ panic(L);
+ }
+ exit(EXIT_FAILURE);
+}
+#endif
+
#if LJ_TARGET_WINDOWS
#define MCPROT_RW PAGE_READWRITE
#define MCPROT_RX PAGE_EXECUTE_READ
#define MCPROT_RWX PAGE_EXECUTE_READWRITE
-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
+static void *mcode_alloc_at(uintptr_t hint, size_t sz, DWORD prot)
{
- void *p = LJ_WIN_VALLOC((void *)hint, sz,
- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
- if (!p && !hint)
- lj_trace_err(J, LJ_TRERR_MCODEAL);
- return p;
+ return LJ_WIN_VALLOC((void *)hint, sz,
+ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
}
-static void mcode_free(jit_State *J, void *p, size_t sz)
+static void mcode_free(void *p, size_t sz)
{
- UNUSED(J); UNUSED(sz);
+ UNUSED(sz);
VirtualFree(p, 0, MEM_RELEASE);
}
-static int mcode_setprot(void *p, size_t sz, DWORD prot)
+static void mcode_setprot(jit_State *J, void *p, size_t sz, DWORD prot)
{
+#if LUAJIT_SECURITY_MCODE != 0
DWORD oprot;
- return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
+ if (!LJ_WIN_VPROTECT(p, sz, prot, &oprot)) mcode_protfail(J);
+#else
+ UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot);
+#endif
}
#elif LJ_TARGET_POSIX
@@ -99,7 +114,7 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
#endif
/* Check for macOS hardened runtime. */
-#if LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000
+#if defined(LUAJIT_ENABLE_OSX_HRT) && LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000
#include
#define MCMAP_CREATE MAP_JIT
#else
@@ -111,37 +126,39 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC)
#ifdef PROT_MPROTECT
#define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX))
+#elif MCMAP_CREATE
+#define MCPROT_CREATE PROT_EXEC
#else
#define MCPROT_CREATE 0
#endif
-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
+static void *mcode_alloc_at(uintptr_t hint, size_t sz, int prot)
{
void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0);
- if (p == MAP_FAILED) {
- if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
- p = NULL;
+ if (p == MAP_FAILED) return NULL;
#if MCMAP_CREATE
- } else {
- pthread_jit_write_protect_np(0);
+ pthread_jit_write_protect_np(0);
#endif
- }
return p;
}
-static void mcode_free(jit_State *J, void *p, size_t sz)
+static void mcode_free(void *p, size_t sz)
{
- UNUSED(J);
munmap(p, sz);
}
-static int mcode_setprot(void *p, size_t sz, int prot)
+static void mcode_setprot(jit_State *J, void *p, size_t sz, int prot)
{
+#if LUAJIT_SECURITY_MCODE != 0
#if MCMAP_CREATE
- pthread_jit_write_protect_np((prot & PROC_EXEC));
+ UNUSED(J); UNUSED(p); UNUSED(sz);
+ pthread_jit_write_protect_np((prot & PROT_EXEC));
return 0;
#else
- return mprotect(p, sz, prot);
+ if (mprotect(p, sz, prot)) mcode_protfail(J);
+#endif
+#else
+ UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot);
#endif
}
@@ -151,6 +168,49 @@ static int mcode_setprot(void *p, size_t sz, int prot)
#endif
+#ifdef LUAJIT_MCODE_TEST
+/* Test wrapper for mcode allocation. DO NOT ENABLE in production! Try:
+** LUAJIT_MCODE_TEST=hhhhhhhhhhhhhhhh luajit -jv main.lua
+** LUAJIT_MCODE_TEST=F luajit -jv main.lua
+*/
+static void *mcode_alloc_at_TEST(jit_State *J, uintptr_t hint, size_t sz, int prot)
+{
+ static int test_ofs = 0;
+ static const char *test_str;
+ if (!test_str) {
+ test_str = getenv("LUAJIT_MCODE_TEST");
+ if (!test_str) test_str = "";
+ }
+ switch (test_str[test_ofs]) {
+ case 'a': /* OK for one allocation. */
+ test_ofs++;
+ /* fallthrough */
+ case '\0': /* EOS: OK for any further allocations. */
+ break;
+ case 'h': /* Ignore one hint. */
+ test_ofs++;
+ /* fallthrough */
+ case 'H': /* Ignore any further hints. */
+ hint = 0u;
+ break;
+ case 'r': /* Randomize one hint. */
+ test_ofs++;
+ /* fallthrough */
+ case 'R': /* Randomize any further hints. */
+ hint = lj_prng_u64(&J2G(J)->prng) & ~(uintptr_t)0xffffu;
+ hint &= ((uintptr_t)1 << (LJ_64 ? 47 : 31)) - 1;
+ break;
+ case 'f': /* Fail one allocation. */
+ test_ofs++;
+ /* fallthrough */
+ default: /* 'F' or unknown: Fail any further allocations. */
+ return NULL;
+ }
+ return mcode_alloc_at(hint, sz, prot);
+}
+#define mcode_alloc_at(hint, sz, prot) mcode_alloc_at_TEST(J, hint, sz, prot)
+#endif
+
/* -- MCode area protection ----------------------------------------------- */
#if LUAJIT_SECURITY_MCODE == 0
@@ -172,7 +232,7 @@ static int mcode_setprot(void *p, size_t sz, int prot)
static void mcode_protect(jit_State *J, int prot)
{
- UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot);
+ UNUSED(J); UNUSED(prot);
}
#else
@@ -188,24 +248,11 @@ static void mcode_protect(jit_State *J, int prot)
#define MCPROT_GEN MCPROT_RW
#define MCPROT_RUN MCPROT_RX
-/* Protection twiddling failed. Probably due to kernel security. */
-static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J)
-{
- lua_CFunction panic = J2G(J)->panic;
- if (panic) {
- lua_State *L = J->L;
- setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT));
- panic(L);
- }
- exit(EXIT_FAILURE);
-}
-
/* Change protection of MCode area. */
static void mcode_protect(jit_State *J, int prot)
{
if (J->mcprot != prot) {
- if (LJ_UNLIKELY(mcode_setprot(J->mcarea, J->szmcarea, prot)))
- mcode_protfail(J);
+ mcode_setprot(J, J->mcarea, J->szmcarea, prot);
J->mcprot = prot;
}
}
@@ -214,47 +261,74 @@ static void mcode_protect(jit_State *J, int prot)
/* -- MCode area allocation ----------------------------------------------- */
-#if LJ_64
-#define mcode_validptr(p) (p)
-#else
-#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000)
-#endif
-
#ifdef LJ_TARGET_JUMPRANGE
-/* Get memory within relative jump distance of our code in 64 bit mode. */
-static void *mcode_alloc(jit_State *J, size_t sz)
+#define MCODE_RANGE64 ((1u << LJ_TARGET_JUMPRANGE) - 0x10000u)
+
+/* Set a memory range for mcode allocation with addr in the middle. */
+static void mcode_setrange(jit_State *J, uintptr_t addr)
{
- /* Target an address in the static assembler code (64K aligned).
- ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB.
- ** Use half the jump range so every address in the range can reach any other.
- */
#if LJ_TARGET_MIPS
- /* Use the middle of the 256MB-aligned region. */
- uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
- ~(uintptr_t)0x0fffffffu) + 0x08000000u;
+ /* Use the whole 256MB-aligned region. */
+ J->mcmin = addr & ~(uintptr_t)((1u << LJ_TARGET_JUMPRANGE) - 1);
+ J->mcmax = J->mcmin + (1u << LJ_TARGET_JUMPRANGE);
#else
- uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
+ /* Every address in the 64KB-aligned range should be able to reach
+ ** any other, so MCODE_RANGE64 is only half the (signed) branch range.
+ */
+ J->mcmin = (addr - (MCODE_RANGE64 >> 1) + 0xffffu) & ~(uintptr_t)0xffffu;
+ J->mcmax = J->mcmin + MCODE_RANGE64;
#endif
- const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21);
- /* First try a contiguous area below the last one. */
- uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0;
- int i;
- /* Limit probing iterations, depending on the available pool size. */
- for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) {
- if (mcode_validptr(hint)) {
- void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN);
-
- if (mcode_validptr(p) &&
- ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range))
- return p;
- if (p) mcode_free(J, p, sz); /* Free badly placed area. */
- }
- /* Next try probing 64K-aligned pseudo-random addresses. */
+ /* Avoid wrap-around and the 64KB corners. */
+ if (addr < J->mcmin || !J->mcmin) J->mcmin = 0x10000u;
+ if (addr > J->mcmax) J->mcmax = ~(uintptr_t)0xffffu;
+}
+
+/* Check if an address is in range of the mcode allocation range. */
+static LJ_AINLINE int mcode_inrange(jit_State *J, uintptr_t addr, size_t sz)
+{
+ /* Take care of unsigned wrap-around of addr + sz, too. */
+ return addr >= J->mcmin && addr + sz >= J->mcmin && addr + sz <= J->mcmax;
+}
+
+/* Get memory within a specific jump range in 64 bit mode. */
+static void *mcode_alloc(jit_State *J, size_t sz)
+{
+ uintptr_t hint;
+ int i = 0, j;
+ if (!J->mcmin) /* Place initial range near the interpreter code. */
+ mcode_setrange(J, (uintptr_t)(void *)lj_vm_exit_handler);
+ else if (!J->mcmax) /* Switch to a new range (already flushed). */
+ goto newrange;
+ /* First try a contiguous area below the last one (if in range). */
+ hint = (uintptr_t)J->mcarea - sz;
+ if (!mcode_inrange(J, hint, sz)) /* Also takes care of NULL J->mcarea. */
+ goto probe;
+ for (; i < 16; i++) {
+ void *p = mcode_alloc_at(hint, sz, MCPROT_GEN);
+ if (mcode_inrange(J, (uintptr_t)p, sz))
+ return p; /* Success. */
+ else if (p)
+ mcode_free(p, sz); /* Free badly placed area. */
+ probe:
+ /* Next try probing 64KB-aligned pseudo-random addresses. */
+ j = 0;
do {
- hint = lj_prng_u64(&J2G(J)->prng) & ((1u<mcmin + (lj_prng_u64(&J2G(J)->prng) & MCODE_RANGE64);
+ if (++j > 15) goto fail;
+ } while (!mcode_inrange(J, hint, sz));
+ }
+fail:
+ if (!J->mcarea) { /* Switch to a new range now. */
+ void *p;
+ newrange:
+ p = mcode_alloc_at(0, sz, MCPROT_GEN);
+ if (p) {
+ mcode_setrange(J, (uintptr_t)p + (sz >> 1));
+ return p; /* Success. */
+ }
+ } else {
+ J->mcmax = 0; /* Switch to a new range after the flush. */
}
lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */
return NULL;
@@ -267,15 +341,13 @@ static void *mcode_alloc(jit_State *J, size_t sz)
{
#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
/* Allow better executable memory allocation for OpenBSD W^X mode. */
- void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
- if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
- mcode_free(J, p, sz);
- return NULL;
- }
- return p;
+ void *p = mcode_alloc_at(0, sz, MCPROT_RUN);
+ if (p) mcode_setprot(J, p, sz, MCPROT_GEN);
#else
- return mcode_alloc_at(J, 0, sz, MCPROT_GEN);
+ void *p = mcode_alloc_at(0, sz, MCPROT_GEN);
#endif
+ if (!p) lj_trace_err(J, LJ_TRERR_MCODEAL);
+ return p;
}
#endif
@@ -287,7 +359,6 @@ static void mcode_allocarea(jit_State *J)
{
MCode *oldarea = J->mcarea;
size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10;
- sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
J->mcarea = (MCode *)mcode_alloc(J, sz);
J->szmcarea = sz;
J->mcprot = MCPROT_GEN;
@@ -309,7 +380,7 @@ void lj_mcode_free(jit_State *J)
MCode *next = ((MCLink *)mc)->next;
size_t sz = ((MCLink *)mc)->size;
lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
- mcode_free(J, mc, sz);
+ mcode_free(mc, sz);
mc = next;
}
}
@@ -345,32 +416,25 @@ void lj_mcode_abort(jit_State *J)
MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
{
if (finish) {
-#if LUAJIT_SECURITY_MCODE
if (J->mcarea == ptr)
mcode_protect(J, MCPROT_RUN);
- else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN)))
- mcode_protfail(J);
-#endif
+ else
+ mcode_setprot(J, ptr, ((MCLink *)ptr)->size, MCPROT_RUN);
return NULL;
} else {
- MCode *mc = J->mcarea;
+ uintptr_t base = (uintptr_t)J->mcarea, addr = (uintptr_t)ptr;
/* Try current area first to use the protection cache. */
- if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) {
-#if LUAJIT_SECURITY_MCODE
+ if (addr >= base && addr < base + J->szmcarea) {
mcode_protect(J, MCPROT_GEN);
-#endif
- return mc;
+ return (MCode *)base;
}
/* Otherwise search through the list of MCode areas. */
for (;;) {
- mc = ((MCLink *)mc)->next;
- lj_assertJ(mc != NULL, "broken MCode area chain");
- if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) {
-#if LUAJIT_SECURITY_MCODE
- if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN)))
- mcode_protfail(J);
-#endif
- return mc;
+ base = (uintptr_t)(((MCLink *)base)->next);
+ lj_assertJ(base != 0, "broken MCode area chain");
+ if (addr >= base && addr < base + ((MCLink *)base)->size) {
+ mcode_setprot(J, (MCode *)base, ((MCLink *)base)->size, MCPROT_GEN);
+ return (MCode *)base;
}
}
}
@@ -382,7 +446,6 @@ void lj_mcode_limiterr(jit_State *J, size_t need)
size_t sizemcode, maxmcode;
lj_mcode_abort(J);
sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10;
- sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10;
if (need * sizeof(MCode) > sizemcode)
lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */
diff --git a/src/lj_obj.h b/src/lj_obj.h
index 855727bfab..73b186e256 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -647,6 +647,7 @@ typedef struct global_State {
TValue tmptv, tmptv2; /* Temporary TValues. */
Node nilnode; /* Fallback 1-element hash part (nil key and value). */
TValue registrytv; /* Anchor for registry. */
+ GCRef vmthref; /* Link to VM thread. */
GCupval uvhead; /* Head of double-linked list of all open upvalues. */
int32_t hookcount; /* Instruction hook countdown. */
int32_t hookcstart; /* Start count for instruction hook counter. */
@@ -663,6 +664,7 @@ typedef struct global_State {
} global_State;
#define mainthread(g) (&gcref(g->mainthref)->th)
+#define vmthread(g) (&gcref(g->vmthref)->th)
#define niltv(L) \
check_exp(tvisnil(&G(L)->nilnode.val), &G(L)->nilnode.val)
#define niltvg(g) \
diff --git a/src/lj_parse.c b/src/lj_parse.c
index f41163804a..181ce4d7e2 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -1517,23 +1517,11 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
#endif
-/* Check if bytecode op returns. */
-static int bcopisret(BCOp op)
-{
- switch (op) {
- case BC_CALLMT: case BC_CALLT:
- case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1:
- return 1;
- default:
- return 0;
- }
-}
-
/* Fixup return instruction for prototype. */
static void fs_fixup_ret(FuncState *fs)
{
BCPos lastpc = fs->pc;
- if (lastpc <= fs->lasttarget || !bcopisret(bc_op(fs->bcbase[lastpc-1].ins))) {
+ if (lastpc <= fs->lasttarget || !bc_isret_or_tail(bc_op(fs->bcbase[lastpc-1].ins))) {
if ((fs->bl->flags & FSCOPE_UPVAL))
bcemit_AJ(fs, BC_UCLO, 0, 0);
bcemit_AD(fs, BC_RET0, 0, 1); /* Need final return. */
@@ -1605,8 +1593,8 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
fs_fixup_line(fs, pt, (void *)((char *)pt + ofsli), numline);
fs_fixup_var(ls, pt, (uint8_t *)((char *)pt + ofsdbg), ofsvar);
- lj_vmevent_send(L, BC,
- setprotoV(L, L->top++, pt);
+ lj_vmevent_send(G(L), BC,
+ setprotoV(V, V->top++, pt);
);
L->top--; /* Pop table of constants. */
diff --git a/src/lj_prng.c b/src/lj_prng.c
index 02146b273a..1bbb7eaba3 100644
--- a/src/lj_prng.c
+++ b/src/lj_prng.c
@@ -125,7 +125,7 @@ static PRGR libfunc_rgr;
#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200
#define LJ_TARGET_HAS_GETENTROPY 1
#endif
-#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX
+#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX || LJ_TARGET_HURD
#define LJ_TARGET_HAS_GETENTROPY 1
#endif
diff --git a/src/lj_record.c b/src/lj_record.c
index c6a082d438..6543f2745c 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -973,7 +973,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lj_trace_err(J, LJ_TRERR_LLEAVE);
} else if (J->needsnap) { /* Tailcalled to ff with side-effects. */
lj_trace_err(J, LJ_TRERR_NYIRETL); /* No way to insert snapshot here. */
- } else if (1 + pt->framesize >= LJ_MAX_JSLOTS) {
+ } else if (1 + pt->framesize >= LJ_MAX_JSLOTS ||
+ J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) {
lj_trace_err(J, LJ_TRERR_STACKOV);
} else { /* Return to lower frame. Guard for the target we return to. */
TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
@@ -1107,7 +1108,10 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
return 0; /* No metamethod. */
}
/* The cdata metatable is treated as immutable. */
- if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
+ if (LJ_HASFFI && tref_iscdata(ix->tab)) {
+ mix.tab = TREF_NIL;
+ goto immutable_mt;
+ }
ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
goto nocheck;
diff --git a/src/lj_snap.c b/src/lj_snap.c
index cb1044392f..d0d28c817e 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -956,8 +956,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
const BCIns *pc = snap_pc(&map[nent]);
lua_State *L = J->L;
- /* Set interpreter PC to the next PC to get correct error messages. */
- setcframe_pc(L->cframe, pc+1);
+ /* Set interpreter PC to the next PC to get correct error messages.
+ ** But not for returns or tail calls, since pc+1 may be out-of-range.
+ */
+ setcframe_pc(L->cframe, bc_isret_or_tail(bc_op(*pc)) ? pc : pc+1);
setcframe_pc(cframe_raw(cframe_prev(L->cframe)), pc);
/* Make sure the stack is big enough for the slots from the snapshot. */
diff --git a/src/lj_state.c b/src/lj_state.c
index d8fc545a0d..68e78faad1 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -195,12 +195,14 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
lj_meta_init(L);
lj_lex_init(L);
fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */
+ fixstring(lj_err_str(L, LJ_ERR_ERRERR)); /* Preallocate err in err msg. */
g->gc.threshold = 4*g->gc.total;
#if LJ_HASFFI
lj_ctype_initfin(L);
#endif
lj_trace_initstate(g);
lj_err_verify();
+ setgcref(g->vmthref, obj2gco(lj_state_new(L)));
return NULL;
}
@@ -260,7 +262,11 @@ LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd)
}
#endif
GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State));
- if (GG == NULL || !checkptrGC(GG)) return NULL;
+ if (GG == NULL) return NULL;
+ if (!checkptrGC(GG)) {
+ allocf(allocd, GG, sizeof(GG_State), 0);
+ return NULL;
+ }
memset(GG, 0, sizeof(GG_State));
L = &GG->L;
g = &GG->g;
@@ -371,6 +377,10 @@ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
lj_assertG(L != mainthread(g), "free of main thread");
if (obj2gco(L) == gcref(g->cur_L))
setgcrefnull(g->cur_L);
+#if LJ_HASFFI
+ if (ctype_ctsG(g) && ctype_ctsG(g)->L == L) /* Avoid dangling cts->L. */
+ ctype_ctsG(g)->L = mainthread(g);
+#endif
if (gcref(L->openupval) != NULL) {
lj_func_closeuv(L, tvref(L->stack));
lj_trace_abort(g); /* For aa_uref soundness. */
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
index 04aebaa472..bb649fc840 100644
--- a/src/lj_strfmt.c
+++ b/src/lj_strfmt.c
@@ -170,7 +170,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
} else if (tvisbuf(o)) {
SBufExt *sbx = bufV(o);
*lenp = sbufxlen(sbx);
- return sbx->r;
+ return sbx->r ? sbx->r : "";
} else if (tvisint(o)) {
sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
} else if (tvisnum(o)) {
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index d0bbc5a5fb..947545f821 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -190,6 +190,7 @@ typedef enum ARMIns {
ARMI_LDRSB = 0xe01000d0,
ARMI_LDRSH = 0xe01000f0,
ARMI_LDRD = 0xe00000d0,
+ ARMI_LDRL = 0xe51f0000,
ARMI_STR = 0xe4000000,
ARMI_STRB = 0xe4400000,
ARMI_STRH = 0xe00000b0,
@@ -200,6 +201,7 @@ typedef enum ARMIns {
ARMI_BL = 0xeb000000,
ARMI_BLX = 0xfa000000,
ARMI_BLXr = 0xe12fff30,
+ ARMI_BX = 0xe12fff10,
/* ARMv6 */
ARMI_REV = 0xe6bf0f30,
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
index 9274187117..3113d1410a 100644
--- a/src/lj_target_arm64.h
+++ b/src/lj_target_arm64.h
@@ -110,6 +110,7 @@ typedef struct {
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
{
while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
+ if ((LJ_LE ? p[1] >> 28 : p[1] & 0xf) == 0xf) p++; /* Skip A64I_LDRx. */
return p + 3 + exitno;
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
@@ -265,6 +266,10 @@ typedef enum A64Ins {
A64I_BRAAZ = 0xd61f081f,
A64I_BLRAAZ = 0xd63f081f,
+ A64I_BTI_C = 0xd503245f,
+ A64I_BTI_J = 0xd503249f,
+ A64I_BTI_JC = 0xd50324df,
+
A64I_NOP = 0xd503201f,
/* FP */
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index 5a1b5a7cca..58f311884f 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -115,6 +115,7 @@ typedef struct {
static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
{
while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */
+ if (p[3] == 0x4e800421) p += 2; /* Indirect branch PPCI_BCTRL. */
return p + 3 + exitno;
}
/* Avoid dependence on lj_jit.h if only including lj_target.h. */
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 6a528e8288..fa32a5d46f 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -242,6 +242,9 @@ typedef enum {
XV_SHLX = XV_660f38(f7),
XV_SHRX = XV_f20f38(f7),
+ /* Special NOP instructions. */
+ XI_ENDBR64 = 0xfa1e0ff3,
+
/* Variable-length opcodes. XO_* prefix. */
XO_OR = XO_(0b),
XO_MOV = XO_(8b),
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 0e948e8d08..47d7faa5c9 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -296,8 +296,8 @@ int lj_trace_flushall(lua_State *L)
/* Free the whole machine code and invalidate all exit stub groups. */
lj_mcode_free(J);
memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
- lj_vmevent_send(L, TRACE,
- setstrV(L, L->top++, lj_str_newlit(L, "flush"));
+ lj_vmevent_send(J2G(J), TRACE,
+ setstrV(V, V->top++, lj_str_newlit(V, "flush"));
);
return 0;
}
@@ -343,6 +343,14 @@ void lj_trace_initstate(global_State *g)
J->k32[LJ_K32_M2P64] = 0xdf800000;
#endif
#endif
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS32
+ J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler;
+ J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp;
+#endif
+#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
+ J->k64[LJ_K64_VM_EXIT_HANDLER].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_handler, 0);
+ J->k64[LJ_K64_VM_EXIT_INTERP].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_interp, 0);
+#endif
}
/* Free everything associated with the JIT compiler state. */
@@ -408,7 +416,6 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e)
/* Start tracing. */
static void trace_start(jit_State *J)
{
- lua_State *L;
TraceNo traceno;
if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
@@ -458,20 +465,19 @@ static void trace_start(jit_State *J)
J->ktrace = 0;
setgcref(J->cur.startpt, obj2gco(J->pt));
- L = J->L;
- lj_vmevent_send(L, TRACE,
- setstrV(L, L->top++, lj_str_newlit(L, "start"));
- setintV(L->top++, traceno);
- setfuncV(L, L->top++, J->fn);
- setintV(L->top++, proto_bcpos(J->pt, J->pc));
+ lj_vmevent_send(J2G(J), TRACE,
+ setstrV(V, V->top++, lj_str_newlit(V, "start"));
+ setintV(V->top++, traceno);
+ setfuncV(V, V->top++, J->fn);
+ setintV(V->top++, proto_bcpos(J->pt, J->pc));
if (J->parent) {
- setintV(L->top++, J->parent);
- setintV(L->top++, J->exitno);
+ setintV(V->top++, J->parent);
+ setintV(V->top++, J->exitno);
} else {
BCOp op = bc_op(*J->pc);
if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) {
- setintV(L->top++, J->exitno); /* Parent of stitched trace. */
- setintV(L->top++, -1);
+ setintV(V->top++, J->exitno); /* Parent of stitched trace. */
+ setintV(V->top++, -1);
}
}
);
@@ -486,7 +492,6 @@ static void trace_stop(jit_State *J)
GCproto *pt = &gcref(J->cur.startpt)->pt;
TraceNo traceno = J->cur.traceno;
GCtrace *T = J->curfinal;
- lua_State *L;
switch (op) {
case BC_FORL:
@@ -543,11 +548,10 @@ static void trace_stop(jit_State *J)
J->postproc = LJ_POST_NONE;
trace_save(J, T);
- L = J->L;
- lj_vmevent_send(L, TRACE,
- setstrV(L, L->top++, lj_str_newlit(L, "stop"));
- setintV(L->top++, traceno);
- setfuncV(L, L->top++, J->fn);
+ lj_vmevent_send(J2G(J), TRACE,
+ setstrV(V, V->top++, lj_str_newlit(V, "stop"));
+ setintV(V->top++, traceno);
+ setfuncV(V, V->top++, J->fn);
);
}
@@ -602,18 +606,17 @@ static int trace_abort(jit_State *J)
/* Is there anything to abort? */
traceno = J->cur.traceno;
if (traceno) {
- ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */
J->cur.link = 0;
J->cur.linktype = LJ_TRLINK_NONE;
- lj_vmevent_send(L, TRACE,
+ lj_vmevent_send(J2G(J), TRACE,
cTValue *bot = tvref(L->stack)+LJ_FR2;
cTValue *frame;
const BCIns *pc;
BCPos pos = 0;
- setstrV(L, L->top++, lj_str_newlit(L, "abort"));
- setintV(L->top++, traceno);
+ setstrV(V, V->top++, lj_str_newlit(V, "abort"));
+ setintV(V->top++, traceno);
/* Find original Lua function call to generate a better error message. */
- for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) {
+ for (frame = L->base-1, pc = J->pc; ; frame = frame_prev(frame)) {
if (isluafunc(frame_func(frame))) {
pos = proto_bcpos(funcproto(frame_func(frame)), pc);
break;
@@ -625,10 +628,10 @@ static int trace_abort(jit_State *J)
pc = frame_pc(frame) - 1;
}
}
- setfuncV(L, L->top++, frame_func(frame));
- setintV(L->top++, pos);
- copyTV(L, L->top++, restorestack(L, errobj));
- copyTV(L, L->top++, &J->errinfo);
+ setfuncV(V, V->top++, frame_func(frame));
+ setintV(V->top++, pos);
+ copyTV(V, V->top++, L->top-1);
+ copyTV(V, V->top++, &J->errinfo);
);
/* Drop aborted trace after the vmevent (which may still access it). */
setgcrefnull(J->trace[traceno]);
@@ -637,10 +640,15 @@ static int trace_abort(jit_State *J)
J->cur.traceno = 0;
}
L->top--; /* Remove error object */
- if (e == LJ_TRERR_DOWNREC)
+ if (e == LJ_TRERR_DOWNREC) {
return trace_downrec(J);
- else if (e == LJ_TRERR_MCODEAL)
+ } else if (e == LJ_TRERR_MCODEAL) {
+ if (!J->mcarea) { /* Disable JIT compiler if first mcode alloc fails. */
+ J->flags &= ~JIT_F_ON;
+ lj_dispatch_update(J2G(J));
+ }
lj_trace_flushall(L);
+ }
return 0;
}
@@ -679,16 +687,16 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
case LJ_TRACE_RECORD:
trace_pendpatch(J, 0);
setvmstate(J2G(J), RECORD);
- lj_vmevent_send_(L, RECORD,
+ lj_vmevent_send_(J2G(J), RECORD,
/* Save/restore state for trace recorder. */
TValue savetv = J2G(J)->tmptv;
TValue savetv2 = J2G(J)->tmptv2;
TraceNo parent = J->parent;
ExitNo exitno = J->exitno;
- setintV(L->top++, J->cur.traceno);
- setfuncV(L, L->top++, J->fn);
- setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1);
- setintV(L->top++, J->framedepth);
+ setintV(V->top++, J->cur.traceno);
+ setfuncV(V, V->top++, J->fn);
+ setintV(V->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1);
+ setintV(V->top++, J->framedepth);
,
J2G(J)->tmptv = savetv;
J2G(J)->tmptv2 = savetv2;
@@ -826,23 +834,23 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud)
#ifndef LUAJIT_DISABLE_VMEVENT
/* Push all registers from exit state. */
-static void trace_exit_regs(lua_State *L, ExitState *ex)
+static void trace_exit_regs(lua_State *V, ExitState *ex)
{
int32_t i;
- setintV(L->top++, RID_NUM_GPR);
- setintV(L->top++, RID_NUM_FPR);
+ setintV(V->top++, RID_NUM_GPR);
+ setintV(V->top++, RID_NUM_FPR);
for (i = 0; i < RID_NUM_GPR; i++) {
if (sizeof(ex->gpr[i]) == sizeof(int32_t))
- setintV(L->top++, (int32_t)ex->gpr[i]);
+ setintV(V->top++, (int32_t)ex->gpr[i]);
else
- setnumV(L->top++, (lua_Number)ex->gpr[i]);
+ setnumV(V->top++, (lua_Number)ex->gpr[i]);
}
#if !LJ_SOFTFP
for (i = 0; i < RID_NUM_FPR; i++) {
- setnumV(L->top, ex->fpr[i]);
- if (LJ_UNLIKELY(tvisnan(L->top)))
- setnanV(L->top);
- L->top++;
+ setnumV(V->top, ex->fpr[i]);
+ if (LJ_UNLIKELY(tvisnan(V->top)))
+ setnanV(V->top);
+ V->top++;
}
#endif
}
@@ -884,6 +892,8 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
#ifdef EXITSTATE_PCREG
J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]);
+#else
+ UNUSED(ex);
#endif
T = traceref(J, J->parent); UNUSED(T);
#ifdef EXITSTATE_CHECKEXIT
@@ -904,11 +914,11 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */
if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
- lj_vmevent_send(L, TEXIT,
- lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
- setintV(L->top++, J->parent);
- setintV(L->top++, J->exitno);
- trace_exit_regs(L, ex);
+ lj_vmevent_send(G(L), TEXIT,
+ lj_state_checkstack(V, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
+ setintV(V->top++, J->parent);
+ setintV(V->top++, J->exitno);
+ trace_exit_regs(V, ex);
);
pc = exd.pc;
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
index 070c6144aa..8913ead946 100644
--- a/src/lj_vmevent.c
+++ b/src/lj_vmevent.c
@@ -38,6 +38,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
void lj_vmevent_call(lua_State *L, ptrdiff_t argbase)
{
global_State *g = G(L);
+ lua_State *oldL = gco2th(gcref(g->cur_L));
uint8_t oldmask = g->vmevmask;
uint8_t oldh = hook_save(g);
int status;
@@ -51,6 +52,10 @@ void lj_vmevent_call(lua_State *L, ptrdiff_t argbase)
fputs(tvisstr(L->top) ? strVdata(L->top) : "?", stderr);
fputc('\n', stderr);
}
+ setgcref(g->cur_L, obj2gco(oldL));
+#if LJ_HASJIT
+ G2J(g)->L = oldL;
+#endif
hook_restore(g, oldh);
if (g->vmevmask != VMEVENT_NOCACHE)
g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */
diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h
index 8a99536068..cdd4f75825 100644
--- a/src/lj_vmevent.h
+++ b/src/lj_vmevent.h
@@ -32,23 +32,25 @@ typedef enum {
} VMEvent;
#ifdef LUAJIT_DISABLE_VMEVENT
-#define lj_vmevent_send(L, ev, args) UNUSED(L)
-#define lj_vmevent_send_(L, ev, args, post) UNUSED(L)
+#define lj_vmevent_send(g, ev, args) UNUSED(g)
+#define lj_vmevent_send_(g, ev, args, post) UNUSED(g)
#else
-#define lj_vmevent_send(L, ev, args) \
- if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \
- ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \
+#define lj_vmevent_send(g, ev, args) \
+ if ((g)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \
+ lua_State *V = vmthread(g); \
+ ptrdiff_t argbase = lj_vmevent_prepare(V, LJ_VMEVENT_##ev); \
if (argbase) { \
args \
- lj_vmevent_call(L, argbase); \
+ lj_vmevent_call(V, argbase); \
} \
}
-#define lj_vmevent_send_(L, ev, args, post) \
- if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \
- ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \
+#define lj_vmevent_send_(g, ev, args, post) \
+ if ((g)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \
+ lua_State *V = vmthread(g); \
+ ptrdiff_t argbase = lj_vmevent_prepare(V, LJ_VMEVENT_##ev); \
if (argbase) { \
args \
- lj_vmevent_call(L, argbase); \
+ lj_vmevent_call(V, argbase); \
post \
} \
}
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 69c0c61a9f..3f32e1a0e5 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -5,11 +5,12 @@
@rem Then cd to this directory and run this script. Use the following
@rem options (in order), if needed. The default is a dynamic release build.
@rem
-@rem nogc64 disable LJ_GC64 mode for x64
-@rem debug emit debug symbols
-@rem amalg amalgamated build
-@rem static create static lib to statically link into your project
-@rem mixed create static lib to build a DLL in your project
+@rem nogc64 disable LJ_GC64 mode for x64
+@rem lua52compat enable extra Lua 5.2 extensions
+@rem debug emit debug symbols
+@rem amalg amalgamated build
+@rem static create static lib to statically link into your project
+@rem mixed create static lib to build a DLL in your project
@if not defined INCLUDE goto :FAIL
@@ -18,7 +19,7 @@
@set DEBUGCFLAGS=
@set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline
@set LJDYNBUILD=/DLUA_BUILD_AS_DLL /MD
-@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd
+@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd
@set LJCOMPILETARGET=/Zi
@set LJLINKTYPE=/DEBUG /RELEASE
@set LJLINKTYPE_DEBUG=/DEBUG
@@ -64,6 +65,10 @@ if exist minilua.exe.manifest^
@set DASC=vm_x86.dasc
@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
:DA
+@if "%1" neq "lua52compat" goto :NOLUA52COMPAT
+@shift
+@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_LUA52COMPAT
+:NOLUA52COMPAT
minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index ca08fc117e..86bef0cfbc 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -1717,8 +1717,8 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|.endmacro
|
- | math_minmax math_min, gt, pl
- | math_minmax math_max, lt, le
+ | math_minmax math_min, gt, hs
+ | math_minmax math_max, lt, ls
|
|//-- String library -----------------------------------------------------
|
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
index 58efe400e4..a437b65766 100644
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -92,6 +92,17 @@
|.macro ret_auth; ret; .endmacro
|.endif
|
+|// ARM64 branch target identification (BTI).
+|.if BRANCH_TRACK
+|.macro bti_jump; bti j; .endmacro
+|.macro bti_call; bti c; .endmacro
+|.macro bti_tailcall; bti jc; .endmacro
+|.else
+|.macro bti_jump; .endmacro
+|.macro bti_call; .endmacro
+|.macro bti_tailcall; .endmacro
+|.endif
+|
|//-----------------------------------------------------------------------
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
@@ -439,24 +450,28 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_unwind_c: // Unwind C stack, return from vm_pcall.
| // (void *cframe, int errcode)
+ | bti_tailcall
| add fp, CARG1, # SAVE_FP_LR_
| mov sp, CARG1
| mov CRET1, CARG2
| ldr L, SAVE_L
| ldr GL, L->glref
|->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | bti_tailcall
| mv_vmstate TMP0w, C
| st_vmstate TMP0w
| b ->vm_leave_unw
|
|->vm_unwind_ff: // Unwind C stack, return from ff pcall.
| // (void *cframe)
+ | bti_tailcall
| add fp, CARG1, # SAVE_FP_LR_
| mov sp, CARG1
| ldr L, SAVE_L
| init_constants
| ldr GL, L->glref // Setup pointer to global state.
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | bti_tailcall
| mov RC, #16 // 2 results: false + error message.
| ldr BASE, L->base
| mov_false TMP0
@@ -632,6 +647,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->cont_cat: // RA = resultptr, CARG4 = meta base
+ | bti_jump
| ldr INSw, [PC, #-4]
| sub CARG2, CARG4, #32
| ldr TMP0, [RA]
@@ -789,9 +805,11 @@ static void build_subroutines(BuildCtx *ctx)
| sub RB, RB, #0x20000
| csel PC, PC, RB, lo
|->cont_nop:
+ | bti_jump
| ins_next
|
|->cont_ra: // RA = resultptr
+ | bti_jump
| ldr INSw, [PC, #-4]
| ldr TMP0, [RA]
| decode_RA TMP1, INS
@@ -799,12 +817,14 @@ static void build_subroutines(BuildCtx *ctx)
| b ->cont_nop
|
|->cont_condt: // RA = resultptr
+ | bti_jump
| ldr TMP0, [RA]
| mov_true TMP1
| cmp TMP1, TMP0 // Branch if result is true.
| b <4
|
|->cont_condf: // RA = resultptr
+ | bti_jump
| ldr TMP0, [RA]
| mov_false TMP1
| cmp TMP0, TMP1 // Branch if result is false.
@@ -956,10 +976,12 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc, name
|->ff_ .. name:
+ | bti_jump
|.endmacro
|
|.macro .ffunc_1, name
|->ff_ .. name:
+ | bti_jump
| ldr CARG1, [BASE]
| cmp NARGS8:RC, #8
| blo ->fff_fallback
@@ -967,6 +989,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc_2, name
|->ff_ .. name:
+ | bti_jump
| ldp CARG1, CARG2, [BASE]
| cmp NARGS8:RC, #16
| blo ->fff_fallback
@@ -1810,6 +1833,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_record: // Dispatch target for recording phase.
|.if JIT
+ | bti_jump
| ldrb CARG1w, GL->hookmask
| tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
| bne >5
@@ -1825,6 +1849,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->vm_rethook: // Dispatch target for return hooks.
+ | bti_jump
| ldrb TMP2w, GL->hookmask
| tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active?
|5: // Re-dispatch to static ins.
@@ -1832,6 +1857,7 @@ static void build_subroutines(BuildCtx *ctx)
| br_auth TMP0
|
|->vm_inshook: // Dispatch target for instr/line hooks.
+ | bti_jump
| ldrb TMP2w, GL->hookmask
| ldr TMP3w, GL->hookcount
| tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active?
@@ -1858,6 +1884,7 @@ static void build_subroutines(BuildCtx *ctx)
| br_auth TMP0
|
|->cont_hook: // Continue from hook yield.
+ | bti_jump
| ldr CARG1, [CARG4, #-40]
| add PC, PC, #4
| str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins.
@@ -1881,6 +1908,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->vm_callhook: // Dispatch target for call hooks.
+ | bti_jump
| mov CARG2, PC
|.if JIT
| b >1
@@ -1910,6 +1938,7 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_stitch: // Trace stitching.
|.if JIT
| // RA = resultptr, CARG4 = meta base
+ | bti_jump
| ldr RBw, SAVE_MULTRES
| ldr INSw, [PC, #-4]
| ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
@@ -1958,6 +1987,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_profhook: // Dispatch target for profiler hook.
#if LJ_HASPROFILE
+ | bti_jump
| mov CARG1, L
| str BASE, L->base
| mov CARG2, PC
@@ -1979,6 +2009,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_exit_handler:
|.if JIT
+ | bti_call
| sub sp, sp, #(64*8)
| savex_, 0, 1
| savex_, 2, 3
@@ -2029,6 +2060,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->vm_exit_interp:
+ | bti_jump
| // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
|.if JIT
| ldr L, SAVE_L
@@ -2106,6 +2138,7 @@ static void build_subroutines(BuildCtx *ctx)
|
| // int lj_vm_modi(int dividend, int divisor);
|->vm_modi:
+ | bti_call
| eor CARG4w, CARG1w, CARG2w
| cmp CARG4w, #0
| eor CARG3w, CARG1w, CARG1w, asr #31
@@ -2142,6 +2175,7 @@ static void build_subroutines(BuildCtx *ctx)
|// Next idx returned in CRET2w.
|->vm_next:
|.if JIT
+ | bti_call
| ldr NEXT_LIM, NEXT_TAB->asize
| ldr NEXT_TMP1, NEXT_TAB->array
|1: // Traverse array part.
@@ -2286,6 +2320,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|=>defop:
switch (op) {
+#if !LJ_HASJIT
+ case BC_FORL:
+ case BC_JFORI:
+ case BC_JFORL:
+ case BC_ITERL:
+ case BC_JITERL:
+ case BC_LOOP:
+ case BC_JLOOP:
+ case BC_FUNCF:
+ case BC_JFUNCF:
+ case BC_JFUNCV:
+#endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ break; /* Avoid redundant bti instructions. */
+ default:
+ | bti_jump
+ break;
+ }
+
+ switch (op) {
/* -- Comparison ops ---------------------------------------------------- */
@@ -3443,6 +3497,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
|->vm_IITERN:
| // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ |.if JIT
+ | bti_jump
+ |.endif
| add RA, BASE, RA, lsl #3
| ldr TAB:RB, [RA, #-16]
| ldrh TMP3w, [PC, # OFS_RD]
@@ -4122,6 +4179,19 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.align 3\n"
".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
#endif
+#endif
+#if LJ_TARGET_LINUX && LJ_ABI_BRANCH_TRACK
+ fprintf(ctx->fp,
+ "\t.section .note.gnu.property,\"a\"\n"
+ "\t.align 3\n"
+ "\t.long 4\n"
+ "\t.long 16\n"
+ "\t.long 5\n"
+ "\t.long 0x00554e47\n"
+ "\t.long 0xc0000000\n"
+ "\t.long 4\n"
+ "\t.long 1\n"
+ "\t.long 0\n");
#endif
break;
#if !LJ_NO_UNWIND
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
index f501495b11..4cfb7b6ad2 100644
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -189,6 +189,14 @@
|
|.endif
|
+|//-- Control-Flow Enforcement Technique (CET) ---------------------------
+|
+|.if BRANCH_TRACK
+|.macro endbr; endbr64; .endmacro
+|.else
+|.macro endbr; .endmacro
+|.endif
+|
|//-----------------------------------------------------------------------
|
|// Instruction headers.
@@ -484,15 +492,18 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_unwind_c: // Unwind C stack, return from vm_pcall.
| // (void *cframe, int errcode)
+ | endbr
| mov eax, CARG2d // Error return status for vm_pcall.
| mov rsp, CARG1
|->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | endbr
| mov L:RB, SAVE_L
| mov GL:RB, L:RB->glref
| mov dword GL:RB->vmstate, ~LJ_VMST_C
| jmp ->vm_leave_unw
|
|->vm_unwind_rethrow:
+ | endbr
|.if not X64WIN
| mov CARG1, SAVE_L
| mov CARG2d, eax
@@ -502,9 +513,11 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_unwind_ff: // Unwind C stack, return from ff pcall.
| // (void *cframe)
+ | endbr
| and CARG1, CFRAME_RAWMASK
| mov rsp, CARG1
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | endbr
| mov L:RB, SAVE_L
| mov RDd, 1+1 // Really 1+2 results, incr. later.
| mov BASE, L:RB->base
@@ -706,6 +719,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->cont_cat: // BASE = base, RC = result, RB = mbase
+ | endbr
| movzx RAd, PC_RB
| sub RB, 32
| lea RA, [BASE+RA*8]
@@ -774,6 +788,7 @@ static void build_subroutines(BuildCtx *ctx)
| test RC, RC
| jz >3
|->cont_ra: // BASE = base, RC = result
+ | endbr
| movzx RAd, PC_RA
| mov RB, [RC]
| mov [BASE+RA*8], RB
@@ -851,6 +866,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov RB, [BASE+RA*8]
| mov [RC], RB
|->cont_nop: // BASE = base, (RC = result)
+ | endbr
| ins_next
|
|3: // Call __newindex metamethod.
@@ -921,6 +937,7 @@ static void build_subroutines(BuildCtx *ctx)
| ins_next
|
|->cont_condt: // BASE = base, RC = result
+ | endbr
| add PC, 4
| mov ITYPE, [RC]
| sar ITYPE, 47
@@ -929,6 +946,7 @@ static void build_subroutines(BuildCtx *ctx)
| jmp <6
|
|->cont_condf: // BASE = base, RC = result
+ | endbr
| mov ITYPE, [RC]
| sar ITYPE, 47
| cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
@@ -1132,16 +1150,17 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc, name
|->ff_ .. name:
+ | endbr
|.endmacro
|
|.macro .ffunc_1, name
|->ff_ .. name:
- | cmp NARGS:RDd, 1+1; jb ->fff_fallback
+ | endbr; cmp NARGS:RDd, 1+1; jb ->fff_fallback
|.endmacro
|
|.macro .ffunc_2, name
|->ff_ .. name:
- | cmp NARGS:RDd, 2+1; jb ->fff_fallback
+ | endbr; cmp NARGS:RDd, 2+1; jb ->fff_fallback
|.endmacro
|
|.macro .ffunc_n, name, op
@@ -2207,6 +2226,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_record: // Dispatch target for recording phase.
|.if JIT
+ | endbr
| movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
| test RDL, HOOK_VMEVENT // No recording while in vmevent.
| jnz >5
@@ -2220,12 +2240,14 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->vm_rethook: // Dispatch target for return hooks.
+ | endbr
| movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
| test RDL, HOOK_ACTIVE // Hook already active?
| jnz >5
| jmp >1
|
|->vm_inshook: // Dispatch target for instr/line hooks.
+ | endbr
| movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
| test RDL, HOOK_ACTIVE // Hook already active?
| jnz >5
@@ -2253,6 +2275,7 @@ static void build_subroutines(BuildCtx *ctx)
| jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
|
|->cont_hook: // Continue from hook yield.
+ | endbr
| add PC, 4
| mov RA, [RB-40]
| mov MULTRES, RAd // Restore MULTRES for *M ins.
@@ -2277,6 +2300,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|
|->vm_callhook: // Dispatch target for call hooks.
+ | endbr
| mov SAVE_PC, PC
|.if JIT
| jmp >1
@@ -2313,6 +2337,7 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_stitch: // Trace stitching.
|.if JIT
| // BASE = base, RC = result, RB = mbase
+ | endbr
| mov TRACE:ITYPE, [RB-40] // Save previous trace.
| cleartp TRACE:ITYPE
| mov TMPRd, MULTRES
@@ -2364,6 +2389,7 @@ static void build_subroutines(BuildCtx *ctx)
|
|->vm_profhook: // Dispatch target for profiler hook.
#if LJ_HASPROFILE
+ | endbr
| mov L:RB, SAVE_L
| mov L:RB->base, BASE
| mov CARG2, PC // Caveat: CARG2 == BASE
@@ -2383,6 +2409,7 @@ static void build_subroutines(BuildCtx *ctx)
|// The 16 bit exit number is stored with two (sign-extended) push imm8.
|->vm_exit_handler:
|.if JIT
+ | endbr
| push r13; push r12
| push r11; push r10; push r9; push r8
| push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
@@ -2432,6 +2459,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|->vm_exit_interp:
| // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
+ | endbr
|.if JIT
| // Restore additional callee-save registers only used in compiled code.
|.if X64WIN
@@ -2524,6 +2552,7 @@ static void build_subroutines(BuildCtx *ctx)
|.macro vm_round, name, mode, cond
|->name:
|->name .. _sse:
+ | endbr
| sseconst_abs xmm2, RD
| sseconst_2p52 xmm3, RD
| movaps xmm1, xmm0
@@ -2634,6 +2663,7 @@ static void build_subroutines(BuildCtx *ctx)
|// Next idx returned in edx.
|->vm_next:
|.if JIT
+ | endbr
| mov NEXT_ASIZE, NEXT_TAB->asize
|1: // Traverse array part.
| cmp NEXT_IDX, NEXT_ASIZE; jae >5
@@ -2817,6 +2847,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|=>defop:
switch (op) {
+#if !LJ_HASJIT
+ case BC_FORL:
+ case BC_JFORI:
+ case BC_JFORL:
+ case BC_ITERL:
+ case BC_JITERL:
+ case BC_LOOP:
+ case BC_JLOOP:
+ case BC_FUNCF:
+ case BC_JFUNCF:
+ case BC_JFUNCV:
+#endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ break; /* Avoid redundant endbr instructions. */
+ default:
+ | endbr
+ break;
+ }
+
+ switch (op) {
/* -- Comparison ops ---------------------------------------------------- */
@@ -4090,6 +4140,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| hotloop RBd
|.endif
|->vm_IITERN:
+ |.if JIT
+ | endbr
+ |.endif
| ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
| mov TAB:RB, [BASE+RA*8-16]
| cleartp TAB:RB
@@ -4847,6 +4900,30 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.align 8\n"
".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
#endif
+#endif
+#if LJ_TARGET_LINUX && (LJ_ABI_BRANCH_TRACK || LJ_ABI_SHADOW_STACK)
+ fprintf(ctx->fp,
+ "\t.section .note.gnu.property,\"a\"\n"
+ "\t.align 8\n"
+ "\t.long 4\n"
+ "\t.long 16\n"
+ "\t.long 5\n"
+ "\t.long 0x00554e47\n"
+ "\t.long 0xc0000002\n"
+ "\t.long 4\n"
+ "\t.long %d\n"
+ "\t.long 0\n",
+#if LJ_ABI_BRANCH_TRACK
+ 1|
+#else
+ 0|
+#endif
+#if LJ_ABI_SHADOW_STACK
+ 2
+#else
+ 0
+#endif
+ );
#endif
break;
#if !LJ_NO_UNWIND