diff --git a/doc/running.html b/doc/running.html index f71eee42f6..56d4c7bfbe 100644 --- a/doc/running.html +++ b/doc/running.html @@ -299,9 +299,9 @@

-O[level]
recunroll2Min. unroll factor for true recursion -sizemcode32Size of each machine code area in KBytes (Windows: 64K) +sizemcode64Size of each machine code area in KBytes -maxmcode512Max. total size of all machine code areas in KBytes +maxmcode2048Max. total size of all machine code areas in KBytes
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua index 8b27e9625c..db3adb4845 100644 --- a/dynasm/dasm_arm64.lua +++ b/dynasm/dasm_arm64.lua @@ -244,6 +244,10 @@ local map_cond = { hs = 2, lo = 3, } +local map_bti = { + c = 0x40, j = 0x80, jc = 0xc0, +} + ------------------------------------------------------------------------------ local parse_reg_type @@ -475,6 +479,12 @@ local function parse_cond(expr, inv) return shl(bit.bxor(c, inv), 12) end +local function parse_map(expr, map) + local x = map[expr] + if not x then werror("bad operand") end + return x +end + local function parse_load(params, nparams, n, op) if params[n+2] then werror("too many operands") end local scale = shr(op, 30) @@ -823,11 +833,21 @@ map_op = { tbz_3 = "36000000DTBw|36000000DTBx", tbnz_3 = "37000000DTBw|37000000DTBx", + -- Branch Target Identification. + bti_1 = "d503241ft", + -- ARM64e: Pointer authentication codes (PAC). blraaz_1 = "d63f081fNx", + blrabz_1 = "d63f0c1fNx", braa_2 = "d71f0800NDx", + brab_2 = "d71f0c00NDx", braaz_1 = "d61f081fNx", + brabz_1 = "d61f0c1fNx", + paciasp_0 = "d503233f", pacibsp_0 = "d503237f", + autiasp_0 = "d50323bf", + autibsp_0 = "d50323ff", + retaa_0 = "d65f0bff", retab_0 = "d65f0fff", -- Miscellaneous instructions. @@ -996,6 +1016,8 @@ local function parse_template(params, template, nparams, pos) op = op + parse_cond(q, 0); n = n + 1 elseif p == "c" then op = op + parse_cond(q, 1); n = n + 1 + elseif p == "t" then + op = op + parse_map(q, map_bti); n = n + 1 else assert(false) diff --git a/src/Makefile b/src/Makefile index 4a56d1e8e5..e657af1343 100644 --- a/src/Makefile +++ b/src/Makefile @@ -302,6 +302,9 @@ endif ifneq (,$(INSTALL_LJLIBD)) TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\" endif +ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1)) + TARGET_XCFLAGS+= -fno-strict-float-cast-overflow +endif ############################################################################## # Target system detection. @@ -354,6 +357,9 @@ else ifeq (GNU/kFreeBSD,$(TARGET_SYS)) TARGET_XLIBS+= -ldl endif + ifeq (GNU,$(TARGET_SYS)) + TARGET_XLIBS+= -ldl + endif endif endif endif @@ -440,6 +446,14 @@ ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D PAUTH TARGET_ARCH+= -DLJ_ABI_PAUTH=1 endif +ifneq (,$(findstring LJ_ABI_BRANCH_TRACK 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D BRANCH_TRACK + TARGET_ARCH+= -DLJ_ABI_BRANCH_TRACK=1 +endif +ifneq (,$(findstring LJ_ABI_SHADOW_STACK 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SHADOW_STACK + TARGET_ARCH+= -DLJ_ABI_SHADOW_STACK=1 +endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index a30a34b6be..e4ca19779d 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -465,9 +465,11 @@ typedef struct { mach_segment_command_64 seg; mach_section_64 sec; mach_symtab_command sym; +} mach_obj_64; +typedef struct { mach_nlist_64 sym_entry; uint8_t space[4096]; -} mach_obj_64; +} mach_obj_64_tail; ]] local symname = '_'..LJBC_PREFIX..ctx.modname local cputype, cpusubtype = 0x01000007, 3 @@ -479,7 +481,10 @@ typedef struct { -- Create Mach-O object and fill in header. local o = ffi.new("mach_obj_64") - local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8) + local t = ffi.new("mach_obj_64_tail") + local ofs_bc = ffi.sizeof(o) + local sz_bc = aligned(#s, 8) + local ofs_sym = ofs_bc + sz_bc -- Fill in sections and symbols. o.hdr.magic = 0xfeedfacf @@ -491,7 +496,7 @@ typedef struct { o.seg.cmd = 0x19 o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec) o.seg.vmsize = #s - o.seg.fileoff = mach_size + o.seg.fileoff = ofs_bc o.seg.filesize = #s o.seg.maxprot = 1 o.seg.initprot = 1 @@ -499,22 +504,23 @@ typedef struct { ffi.copy(o.sec.sectname, "__data") ffi.copy(o.sec.segname, "__DATA") o.sec.size = #s - o.sec.offset = mach_size + o.sec.offset = ofs_bc o.sym.cmd = 2 o.sym.cmdsize = ffi.sizeof(o.sym) - o.sym.symoff = ffi.offsetof(o, "sym_entry") + o.sym.symoff = ofs_sym o.sym.nsyms = 1 - o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry) + o.sym.stroff = ofs_sym + ffi.offsetof(t, "space") o.sym.strsize = aligned(#symname+2, 8) - o.sym_entry.type = 0xf - o.sym_entry.sect = 1 - o.sym_entry.strx = 1 - ffi.copy(o.space+1, symname) + t.sym_entry.type = 0xf + t.sym_entry.sect = 1 + t.sym_entry.strx = 1 + ffi.copy(t.space+1, symname) -- Write Mach-O object file. local fp = savefile(output, "wb") - fp:write(ffi.string(o, mach_size)) - bcsave_tail(fp, output, s) + fp:write(ffi.string(o, ofs_bc)) + fp:write(s, ("\0"):rep(sz_bc - #s)) + bcsave_tail(fp, output, ffi.string(t, ffi.offsetof(t, "space") + o.sym.strsize)) end local function bcsave_obj(ctx, output, s) diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua index 4457aac080..facc6e4a1e 100644 --- a/src/jit/dis_arm64.lua +++ b/src/jit/dis_arm64.lua @@ -695,7 +695,10 @@ local map_br = { -- Branches, exception generating and system instructions. }, { -- System instructions. shift = 0, mask = 0x3fffff, - [0x03201f] = "nop" + [0x03201f] = "nop", + [0x03245f] = "bti c", + [0x03249f] = "bti j", + [0x0324df] = "bti jc", }, { -- Unconditional branch, register. shift = 0, mask = 0xfffc1f, @@ -920,7 +923,7 @@ local function disass_ins(ctx) elseif p == "B" then local addr = ctx.addr + pos + parse_immpc(op, name) ctx.rel = addr - x = "0x"..tohex(addr) + x = format("0x%08x", addr) elseif p == "T" then x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) elseif p == "V" then @@ -1171,6 +1174,9 @@ local function disass_ins(ctx) end end second0 = true + elseif p == " " then + operands[#operands+1] = pat:match(" (.*)") + break else assert(false) end diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index b1de0eeae1..6b04ee8495 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua @@ -122,7 +122,7 @@ local map_opc2 = { "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", "movhpsXmr||movhpdXmr", "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", -"hintnopVm","hintnopVm","hintnopVm","hintnopVm", +"hintnopVm","hintnopVm","endbr*hintnopVm","hintnopVm", --2x "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, "movapsXrm||movapdXrm", @@ -804,6 +804,24 @@ map_act = { return dispatch(ctx, map_opcvm[ctx.mrm]) end, + -- Special NOP for endbr64/endbr32. + endbr = function(ctx, name, pat) + if ctx.rep then + local pos = ctx.pos + local b = byte(ctx.code, pos) + local text + if b == 0xfa then text = "endbr64" + elseif b == 0xfb then text = "endbr64" + end + if text then + ctx.pos = pos + 1 + ctx.rep = nil + return putop(ctx, text) + end + end + return dispatch(ctx, pat) + end, + -- Floating point opcode dispatch. fp = function(ctx, name, pat) local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end diff --git a/src/lib_jit.c b/src/lib_jit.c index fd8e585b83..0f75c5ac64 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -479,12 +479,21 @@ static int jitopt_param(jit_State *J, const char *str) size_t len = *(const uint8_t *)lst; lj_assertJ(len != 0, "bad JIT_P_STRING"); if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { - int32_t n = 0; + uint32_t n = 0; const char *p = &str[len+1]; while (*p >= '0' && *p <= '9') n = n*10 + (*p++ - '0'); - if (*p) return 0; /* Malformed number. */ - J->param[i] = n; + if (*p || (int32_t)n < 0) return 0; /* Malformed number. */ + if (i == JIT_P_sizemcode) { /* Adjust to required range here. */ +#if LJ_TARGET_JUMPRANGE + uint32_t maxkb = ((1 << (LJ_TARGET_JUMPRANGE - 10)) - 64); +#else + uint32_t maxkb = ((1 << (31 - 10)) - 64); +#endif + n = (n + (LJ_PAGESIZE >> 10) - 1) & ~((LJ_PAGESIZE >> 10) - 1); + if (n > maxkb) n = maxkb; + } + J->param[i] = (int32_t)n; if (i == JIT_P_hotloop) lj_dispatch_init_hotcount(J2G(J)); return 1; /* Ok. */ @@ -714,7 +723,16 @@ static void jit_init(lua_State *L) jit_State *J = L2J(L); J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); +#if LJ_TARGET_UNALIGNED + G(L)->tmptv.u64 = U64x(0000504d,4d500000); +#endif lj_dispatch_update(G(L)); +#if LJ_TARGET_UNALIGNED + /* If you get a crash below then your toolchain indicates unaligned + ** accesses are OK, but your kernel disagrees. I.e. fix your toolchain. + */ + if (*(uint32_t *)((char *)&G(L)->tmptv + 2) != 0x504d4d50u) L->top = NULL; +#endif } #endif diff --git a/src/lj_arch.h b/src/lj_arch.h index a4eecf27e0..5f3880680b 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -96,6 +96,9 @@ #elif defined(__QNX__) #define LJ_TARGET_QNX 1 #define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__GNU__) +#define LJ_TARGET_HURD 1 +#define LUAJIT_OS LUAJIT_OS_POSIX #else #define LUAJIT_OS LUAJIT_OS_OTHER #endif @@ -216,6 +219,29 @@ #error "macOS requires GC64 -- don't disable it" #endif +#if !defined(LJ_ABI_BRANCH_TRACK) && (__CET__ & 1) && \ + LJ_TARGET_GC64 && defined(LUAJIT_ENABLE_CET_BR) +/* +** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT). +** This is not enabled by default because it causes a notable slowdown of +** the interpreter on all x64 CPUs, whether they have CET enabled or not. +** If your toolchain enables -fcf-protection=branch by default, you need +** to build with: make amalg XCFLAGS=-DLUAJIT_ENABLE_CET_BR +*/ +#define LJ_ABI_BRANCH_TRACK 1 +#endif + +#if !defined(LJ_ABI_SHADOW_STACK) && (__CET__ & 2) +/* +** Control-Flow Enforcement Technique (CET) shadow stack (CET-SS). +** It has no code overhead and doesn't cause any slowdowns when unused. +** It can also be unconditionally enabled since all code already follows +** a strict CALL to RET correspondence for performance reasons (all modern +** CPUs use a (non-enforcing) shadow stack for return branch prediction). +*/ +#define LJ_ABI_SHADOW_STACK 1 +#endif + #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM #define LJ_ARCH_NAME "arm" @@ -262,6 +288,11 @@ #if !defined(LJ_ABI_PAUTH) && defined(__arm64e__) #define LJ_ABI_PAUTH 1 #endif +#if !defined(LJ_ABI_BRANCH_TRACK) && (__ARM_FEATURE_BTI_DEFAULT & 1) && \ + defined(LUAJIT_ENABLE_CET_BR) +/* See comments about LUAJIT_ENABLE_CET_BR above. */ +#define LJ_ABI_BRANCH_TRACK 1 +#endif #define LJ_TARGET_ARM64 1 #define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_EHRAREG 30 @@ -270,8 +301,13 @@ #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_GC64 1 +#define LJ_PAGESIZE 16384 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#if __ARM_FEATURE_UNALIGNED +#define LJ_TARGET_UNALIGNED 1 +#endif + #define LJ_ARCH_VERSION 80 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC @@ -425,7 +461,7 @@ #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 #define LJ_TARGET_EHRAREG 31 -#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ +#define LJ_TARGET_JUMPRANGE 28 /* 2^28 = 256MB-aligned region */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ diff --git a/src/lj_asm.c b/src/lj_asm.c index fec4351251..0e888c294a 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -93,6 +93,10 @@ typedef struct ASMState { MCode *invmcp; /* Points to invertible loop branch (or NULL). */ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *realign; /* Realign loop if not NULL. */ + MCode *mctail; /* Tail of trace before stack adjust + jmp. */ +#if LJ_TARGET_PPC || LJ_TARGET_ARM64 + MCode *mcexit; /* Pointer to exit stubs. */ +#endif #ifdef LUAJIT_RANDOM_RA /* Randomize register allocation. OK for fuzz testing, not for production. */ @@ -2541,7 +2545,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) RA_DBGX((as, "===== STOP =====")); /* General trace setup. Emit tail of trace. */ - asm_tail_prep(as); + asm_tail_prep(as, T->link); as->mcloop = NULL; as->flagmcp = NULL; as->topslot = 0; @@ -2586,6 +2590,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) asm_head_side(as); else asm_head_root(as); +#if LJ_ABI_BRANCH_TRACK + emit_branch_track(as); +#endif asm_phi_fixup(as); if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index de435057e1..406360d26a 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -79,18 +79,43 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) /* Generate an exit stub group at the bottom of the reserved MCode memory. */ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) { + ExitNo i; + int ind = 0; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mcbot; - int i; - if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop) + if (mxp + 6+EXITSTUBS_PER_GROUP >= as->mctop) asm_mclimit(as); - /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ - *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); - *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); - mxp++; + if ((((target - mxp - 2) + 0x00800000u) >> 24) == 0) { + /* str lr, [sp]; bl ->vm_exit_handler; + ** .long DISPATCH_address, group. + */ + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp = ARMI_BL | ((target - mxp - 2) & 0x00ffffffu); mxp++; + } else if ((as->flags & JIT_F_ARMV6T2)) { + /* + ** str lr, [sp]; movw/movt lr, vm_exit_handler; blx lr; + ** .long DISPATCH_address, group; + */ + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR); + *mxp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR); + *mxp++ = ARMI_BLXr | ARMF_M(RID_LR); + ind = 2; + } else { + /* .long vm_exit_handler; + ** str lr, [sp]; ldr lr, [pc, #-16]; blx lr; + ** .long DISPATCH_address, group; + */ + *mxp++ = (MCode)target; + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp++ = ARMI_LDRL | ARMF_D(RID_LR) | 16; + *mxp++ = ARMI_BLXr | ARMF_M(RID_LR); + ind = 1; + } *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ *mxp++ = group*EXITSTUBS_PER_GROUP; for (i = 0; i < EXITSTUBS_PER_GROUP; i++) - *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); + *mxp++ = ARMI_B | ((-6-ind-i) & 0x00ffffffu); lj_mcode_sync(as->mcbot, mxp); lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -1927,7 +1952,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { as->curins--; /* Always skip the loword min/max. */ if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); + asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS); return; #elif LJ_HASFFI } else if ((ir-1)->o == IR_CONV) { @@ -2210,33 +2235,46 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; - MCode *target; + MCode *target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - as->mctop = --p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ uint32_t k = emit_isk12(ARMI_ADD, spadj); lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); - p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + *mcp++ = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + } + if ((((target - mcp - 2) + 0x00800000u) >> 24) == 0) { + *mcp = ARMI_B | ((target - mcp - 2) & 0x00ffffffu); mcp++; + } else if ((as->flags & JIT_F_ARMV6T2)) { + *mcp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR); + *mcp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR); + *mcp++ = ARMI_BX | ARMF_M(RID_LR); + } else { + *mcp++ = ARMI_LDRL | ARMI_LS_U | ARMF_D(RID_LR) | 0; + *mcp++ = ARMI_BX | ARMF_M(RID_LR); + *mcp++ = (MCode)target; } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu); + while (as->mctop > mcp) *--as->mctop = ARMI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + MCode *target = (MCode *)(void *)lj_vm_exit_interp; + if ((((target - p - 2) + 0x00800000u) >> 24) || + (((target - p - 1) + 0x00800000u) >> 24)) p -= 2; + } + p--; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } *p = 0; /* Prevent load/store merging. */ + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 4feaa3b0c2..fdcff1db24 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -51,15 +51,27 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) static void asm_exitstub_setup(ASMState *as, ExitNo nexits) { ExitNo i; + int ind; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) asm_mclimit(as); - /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ + ind = !A64F_S_OK(target - (mxp - nexits - 2), 26); + /* !ind: 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; + ** ind: 1: str lr,[sp]; ldr lr, [gl, K64_VXH]; blr lr; movz w0,traceno; + ** bl <1; bl <1; ... + */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); + *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-ind-i)); + as->mcexit = mxp; *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); - mxp--; - *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); + if (ind) { + *--mxp = A64I_LE(A64I_BLR_AUTH | A64F_N(RID_LR)); + *--mxp = A64I_LE(A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]) >> 3)); + } else { + mxp--; + *mxp = A64I_LE(A64I_BL | A64F_S26(target-mxp)); + } *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); as->mctop = mxp; } @@ -67,7 +79,7 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) { /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; + return as->mcexit + exitno; } /* Emit conditional branch to exit for guard. */ @@ -1044,7 +1056,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); } @@ -1917,34 +1930,42 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; + MCode *mcp = as->mctail; MCode *target; /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); - if (spadj == 0) { - *--p = A64I_LE(A64I_NOP); - as->mctop = p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ uint32_t k = emit_isk12(spadj); lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); - p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); + *mcp++ = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = A64I_B | A64F_S26((target-p)+1); + /* Emit exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + if (lnk || A64F_S_OK(target - mcp, 26)) { + *mcp = A64I_B | A64F_S26(target - mcp); mcp++; + } else { + *mcp++ = A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]) >> 3); + *mcp++ = A64I_BR_AUTH | A64F_N(RID_LR); + } + while (as->mctop > mcp) *--as->mctop = A64I_LE(A64I_NOP); } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + MCode *target = (MCode *)(void *)lj_vm_exit_interp; + if (!A64F_S_OK(target - p, 26) || !A64F_S_OK(target - (p+1), 26)) p--; + } + p--; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; *p = 0; /* Prevent load/store merging. */ } diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index af0e714f15..8dadabe4a0 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -92,13 +92,23 @@ static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) /* Setup exit stub after the end of each trace. */ static void asm_exitstub_setup(ASMState *as) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ - *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; - *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, - "branch target out of range"); - *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; + *--mxp = MIPSI_LI | MIPSF_T(RID_TMP) | as->T->traceno; + if (((uintptr_t)(mxp-1) ^ target) >> 28 == 0) { + /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ + *--mxp = MIPSI_J | ((target >> 2) & 0x03ffffffu); + } else { + /* sw TMP, 0(sp); li TMP, K*_VXH(jgl); jr TMP ; li TMP, traceno */ + *--mxp = MIPSI_JR | MIPSF_S(RID_TMP); + *--mxp = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) | +#if LJ_64 + jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]); +#else + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]); +#endif + } + *--mxp = MIPSI_SW | MIPSF_T(RID_TMP) | MIPSF_S(RID_SP) | 0; as->mctop = mxp; } @@ -428,7 +438,8 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) + RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)| + RID2RSET(RID_CFUNCADDR) #if LJ_TARGET_MIPSR6 |RID2RSET(RID_F21) #endif @@ -514,7 +525,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg r) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| - RID2RSET(RID_R1)|RID2RSET(RID_R12); + RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_CFUNCADDR); if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ra_evictset(as, drop); /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ @@ -2699,18 +2710,37 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; + uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; - p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); + if (((uintptr_t)mcp ^ target) >> 28 == 0) { + *mcp++ = MIPSI_J | ((target >> 2) & 0x03ffffffu); + } else { + *mcp++ = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) | +#if LJ_64 + jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]); +#else + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]); +#endif + *mcp++ = MIPSI_JR | MIPSF_S(RID_TMP); + } + *mcp++ = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { - as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ - as->invmcp = as->loopref ? as->mcp : NULL; + as->mcp = as->mctop - 2; /* Leave room for branch plus nop or stack adj. */ + if (as->loopref) { + as->invmcp = as->mcp; + } else { + if (!lnk) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp; + if (((uintptr_t)as->mcp ^ target) >> 28 != 0) as->mcp--; + } + as->invmcp = NULL; + } + as->mctail = as->mcp; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index df1ac42f7a..d77c45ce9b 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -48,23 +48,38 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) static void asm_exitstub_setup(ASMState *as, ExitNo nexits) { ExitNo i; + int ind; + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) asm_mclimit(as); - /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ + ind = ((target - (uintptr_t)(mxp - nexits - 2) + 0x02000000u) >> 26) ? 2 : 0; + /* !ind: 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; + ** ind: 1: lwz r0, K32_VXH(jgl); mtctr r0; mflr r0; bctrl; li r0, traceno; + ** bl <1; bl <1; ... + */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); + *--mxp = PPCI_BL | (((-3-ind-i) & 0x00ffffffu) << 2); + as->mcexit = mxp; *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ - mxp--; - *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); - *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); + if (ind) { + *--mxp = PPCI_BCTRL; + *--mxp = PPCI_MFLR | PPCF_T(RID_TMP); + *--mxp = PPCI_MTCTR | PPCF_T(RID_TMP); + *--mxp = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) | + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]); + } else { + mxp--; + *mxp = PPCI_BL | ((target - (uintptr_t)mxp) & 0x03fffffcu); + *--mxp = PPCI_MFLR | PPCF_T(RID_TMP); + } as->mctop = mxp; } static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) { /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; + return as->mcexit + exitno; } /* Emit conditional branch to exit for guard. */ @@ -2218,34 +2233,43 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; - MCode *target; + uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - *--p = PPCI_NOP; - *--p = PPCI_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); - p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); - p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; + *mcp++ = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); + *mcp++ = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); + /* Emit exit branch. */ + if ((((target - (uintptr_t)mcp) + 0x02000000u) >> 26) == 0) { + *mcp = PPCI_B | ((target - (uintptr_t)mcp) & 0x03fffffcu); mcp++; + } else { + *mcp++ = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) | + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]); + *mcp++ = PPCI_MTCTR | PPCF_T(RID_TMP); + *mcp++ = PPCI_BCTR; + } + while (as->mctop > mcp) *--as->mctop = PPCI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-2; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp; + if ((((target - (uintptr_t)p) + 0x02000000u) >> 26) || + (((target - (uintptr_t)(p-2)) + 0x02000000u) >> 26)) p -= 2; + } + p -= 2; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 936ff438fd..f3c2238a2f 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -9,9 +9,12 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) { ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mcbot; MCode *mxpstart = mxp; - if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop) + if (mxp + ((2+2)*EXITSTUBS_PER_GROUP + + (LJ_GC64 ? 0 : 8) + + (LJ_64 ? 6 : 5)) >= as->mctop) asm_mclimit(as); /* Push low byte of exitno for each exit stub. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs; @@ -30,8 +33,13 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; #endif /* Jump to exit handler which fills in the ExitState. */ - *mxp++ = XI_JMP; mxp += 4; - *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); + if (jmprel_ok(mxp + 5, target)) { /* Direct jump. */ + *mxp++ = XI_JMP; mxp += 4; + *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, target); + } else { /* RIP-relative indirect jump. */ + *mxp++ = XI_GROUP5; *mxp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mxp += 4; + *((int32_t *)(mxp-4)) = (int32_t)((group ? as->J->exitstubgroup[0] : mxpstart) - 8 - mxp); + } /* Commit the code for this group (even if assembly fails later on). */ lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -45,6 +53,16 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) ExitNo i; if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) lj_trace_err(as->J, LJ_TRERR_SNAPOV); +#if LJ_64 + if (as->J->exitstubgroup[0] == NULL) { + /* Store the two potentially out-of-range targets below group 0. */ + MCode *mxp = as->mcbot; + while ((uintptr_t)mxp & 7) *mxp++ = XI_INT3; + *((void **)mxp) = (void *)lj_vm_exit_interp; mxp += 8; + *((void **)mxp) = (void *)lj_vm_exit_handler; mxp += 8; + as->mcbot = mxp; /* Don't bother to commit, done in asm_exitstub_gen. */ + } +#endif for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) if (as->J->exitstubgroup[i] == NULL) as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); @@ -396,7 +414,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) "bad interned 64 bit constant"); } else { while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; - *(uint64_t*)as->mcbot = *k; + *(uint64_t *)as->mcbot = *k; ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; @@ -728,7 +746,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) p = (MCode *)(void *)ir_k64(irf)->u64; else p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i; - if (p - as->mcp == (int32_t)(p - as->mcp)) + if (jmprel_ok(p, as->mcp)) return p; /* Call target is still in +-2GB range. */ /* Avoid the indirect case of emit_call(). Try to hoist func addr. */ } @@ -2084,7 +2102,8 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) RegSet allow = RSET_GPR; Reg dest, right; int32_t k = 0; - if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */ + if (as->flagmcp == as->mcp && xa != XOg_X_IMUL) { + /* Drop test r,r instruction. */ MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2); MCode *q = p[0] == 0x0f ? p+1 : p; if ((*q & 15) < 14) { @@ -2805,6 +2824,8 @@ static void asm_gc_check(ASMState *as) emit_rr(as, XO_TEST, RID_RET, RID_RET); args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ + /* Insert nop to simplify GC exit recognition in lj_asm_patchexit. */ + if (!jmprel_ok(as->mcp, (MCode *)(void *)ci->func)) *--as->mcp = XI_NOP; asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); #if LJ_GC64 @@ -2918,40 +2939,36 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) static void asm_tail_fixup(ASMState *as, TraceNo lnk) { /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */ - MCode *p = as->mctop; - MCode *target, *q; + MCode *mcp = as->mctail; + MCode *target; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - p -= LJ_64 ? 7 : 6; - } else { - MCode *p1; - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ + if (LJ_64) *mcp++ = 0x48; if (checki8(spadj)) { - p -= 3; - p1 = p-6; - *p1 = (MCode)spadj; + *mcp++ = XI_ARITHi8; + *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP); + *mcp++ = (MCode)spadj; } else { - p1 = p-9; - *(int32_t *)p1 = spadj; + *mcp++ = XI_ARITHi; + *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP); + *(int32_t *)mcp = spadj; mcp += 4; } -#if LJ_64 - p1[-3] = 0x48; -#endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - *(int32_t *)(p-4) = jmprel(as->J, p, target); - p[-5] = XI_JMP; + /* Emit exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + if (lnk || jmprel_ok(mcp + 5, target)) { /* Direct jump. */ + *mcp++ = XI_JMP; mcp += 4; + *(int32_t *)(mcp-4) = jmprel(as->J, mcp, target); + } else { /* RIP-relative indirect jump. */ + *mcp++ = XI_GROUP5; *mcp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mcp += 4; + *((int32_t *)(mcp-4)) = (int32_t)(as->J->exitstubgroup[0] - 16 - mcp); + } /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ - for (q = as->mctop-1; q >= p; q--) - *q = XI_NOP; - as->mctop = p; + while (as->mctop > mcp) *--as->mctop = XI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop; /* Realign and leave room for backwards loop branch or exit branch. */ @@ -2963,15 +2980,17 @@ static void asm_tail_prep(ASMState *as) as->mctop = p; p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ } else { - p -= 5; /* Space for exit branch (near jmp). */ + p -= (LJ_64 && !lnk) ? 6 : 5; /* Space for exit branch. */ } if (as->loopref) { as->invmcp = as->mcp = p; } else { - /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (LJ_64 ? 7 : 6); + /* Leave room for ESP adjustment: add esp, imm */ + p -= LJ_64 ? 7 : 6; + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ @@ -3131,6 +3150,10 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } else if (*p == XI_CALL && (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { pgc = p+7; /* Do not patch GC check exit. */ + } else if (LJ_64 && *p == 0xff && + p[1] == MODRM(XM_REG, XOg_CALL, RID_RET) && + p[2] == XI_NOP) { + pgc = p+5; /* Do not patch GC check exit. */ } } lj_mcode_sync(T->mcode, T->mcode + T->szmcode); diff --git a/src/lj_bc.h b/src/lj_bc.h index a94ea4e4a8..53b3e50123 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h @@ -259,6 +259,11 @@ static LJ_AINLINE int bc_isret(BCOp op) return (op == BC_RETM || op == BC_RET || op == BC_RET0 || op == BC_RET1); } +static LJ_AINLINE int bc_isret_or_tail(BCOp op) +{ + return (op == BC_CALLMT || op == BC_CALLT || bc_isret(op)); +} + LJ_DATA const uint16_t lj_bc_mode[]; LJ_DATA const uint16_t lj_bc_ofs[]; diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 37e909b391..5570952208 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -191,7 +191,7 @@ static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t) } else if (tp == BCDUMP_KTAB_NUM) { o->u32.lo = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls); - } else if (tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */ + } else if (t && tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */ settabV(ls->L, o, t); } else { lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); @@ -209,13 +209,13 @@ static GCtab *bcread_ktab(LexState *ls) MSize i; TValue *o = tvref(t->array); for (i = 0; i < narray; i++, o++) - bcread_ktabk(ls, o, t); + bcread_ktabk(ls, o, NULL); } if (nhash) { /* Read hash entries. */ MSize i; for (i = 0; i < nhash; i++) { TValue key; - bcread_ktabk(ls, &key, t); + bcread_ktabk(ls, &key, NULL); lj_assertLS(!tvisnil(&key), "nil key"); bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t); } diff --git a/src/lj_ccall.c b/src/lj_ccall.c index ae69cd28d1..d5f092ea5e 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -781,17 +781,24 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) { CTSize sz = ct->size; unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); - while (ct->sib) { + while (ct->sib && n <= 4) { + unsigned int m = 1; CType *sct; ct = ctype_get(cts, ct->sib); if (ctype_isfield(ct->info)) { sct = ctype_rawchild(cts, ct); + if (ctype_isarray(sct->info)) { + CType *cct = ctype_rawchild(cts, sct); + if (!cct->size) continue; + m = sct->size / cct->size; + sct = cct; + } if (ctype_isfp(sct->info)) { r |= sct->size; - if (!isu) n++; else if (n == 0) n = 1; + if (!isu) n += m; else if (n < m) n = m; } else if (ctype_iscomplex(sct->info)) { r |= (sct->size >> 1); - if (!isu) n += 2; else if (n < 2) n = 2; + if (!isu) n += 2*m; else if (n < 2*m) n = 2*m; } else if (ctype_isstruct(sct->info)) { goto substruct; } else { @@ -803,10 +810,11 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) sct = ctype_rawchild(cts, ct); substruct: if (sct->size > 0) { - unsigned int s = ccall_classify_struct(cts, sct); + unsigned int s = ccall_classify_struct(cts, sct), sn; if (s <= 1) goto noth; r |= (s & 255); - if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + sn = (s >> 8) * m; + if (!isu) n += sn; else if (n < sn) n = sn; } } } @@ -893,7 +901,9 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, /* -- Common C call handling ---------------------------------------------- */ -/* Infer the destination CTypeID for a vararg argument. */ +/* Infer the destination CTypeID for a vararg argument. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o) { if (tvisnumber(o)) { @@ -921,13 +931,16 @@ CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o) } } -/* Setup arguments for C call. */ +/* Setup arguments for C call. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CCallState *cc) { int gcsteps = 0; TValue *o, *top = L->top; CTypeID fid; + CTInfo info = ct->info; /* lj_ccall_ctid_vararg may invalidate ct pointer. */ CType *ctr; MSize maxgpr, ngpr = 0, nsp = 0, narg; #if CCALL_NARG_FPR @@ -946,7 +959,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #if LJ_TARGET_X86 /* x86 has several different calling conventions. */ cc->resx87 = 0; - switch (ctype_cconv(ct->info)) { + switch (ctype_cconv(info)) { case CTCC_FASTCALL: maxgpr = 2; break; case CTCC_THISCALL: maxgpr = 1; break; default: maxgpr = 0; break; @@ -963,7 +976,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } else if (ctype_iscomplex(ctr->info) || ctype_isstruct(ctr->info)) { /* Preallocate cdata object and anchor it after arguments. */ CTSize sz = ctr->size; - GCcdata *cd = lj_cdata_new(cts, ctype_cid(ct->info), sz); + GCcdata *cd = lj_cdata_new(cts, ctype_cid(info), sz); void *dp = cdataptr(cd); setcdataV(L, L->top++, cd); if (ctype_isstruct(ctr->info)) { @@ -986,7 +999,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } #if LJ_TARGET_ARM64 && LJ_ABI_WIN - if ((ct->info & CTF_VARARG)) { + if ((info & CTF_VARARG)) { nsp -= maxgpr * CTSIZE_PTR; /* May end up with negative nsp. */ ngpr = maxgpr; nfpr = CCALL_NARG_FPR; @@ -1007,7 +1020,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, lj_assertL(ctype_isfield(ctf->info), "field expected"); did = ctype_cid(ctf->info); } else { - if (!(ct->info & CTF_VARARG)) + if (!(info & CTF_VARARG)) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too many arguments. */ did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ isva = 1; @@ -1178,11 +1191,11 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) ct = ctype_rawchild(cts, ct); } if (ctype_isfunc(ct->info)) { + CTypeID id = ctype_typeid(cts, ct); CCallState cc; int gcsteps, ret; cc.func = (void (*)(void))cdata_getptr(cdataptr(cd), sz); gcsteps = ccall_set_args(L, cts, ct, &cc); - ct = (CType *)((intptr_t)ct-(intptr_t)cts->tab); cts->cb.slot = ~0u; lj_vm_ffi_call(&cc); if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ @@ -1190,7 +1203,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000); setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); } - ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ + ct = ctype_get(cts, id); /* Table may have been reallocated. */ gcsteps += ccall_get_results(L, cts, ct, &cc, &ret); #if LJ_TARGET_X86 && LJ_ABI_WIN /* Automatically detect __stdcall and fix up C function declaration. */ diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index d93dbc6457..c4b25cd7d1 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -34,22 +34,29 @@ #elif LJ_TARGET_X86ORX64 +#if LJ_ABI_BRANCH_TRACK +#define CALLBACK_MCODE_SLOTSZ 8 +#else +#define CALLBACK_MCODE_SLOTSZ 4 +#endif +#define CALLBACK_MCODE_NSLOT (128 / CALLBACK_MCODE_SLOTSZ) + #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) #define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) #define CALLBACK_SLOT2OFS(slot) \ - (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) + (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/CALLBACK_MCODE_NSLOT) + CALLBACK_MCODE_SLOTSZ*(slot)) static MSize CALLBACK_OFS2SLOT(MSize ofs) { MSize group; ofs -= CALLBACK_MCODE_HEAD; - group = ofs / (32*4 + CALLBACK_MCODE_GROUP); - return (ofs % (32*4 + CALLBACK_MCODE_GROUP))/4 + group*32; + group = ofs / (128 + CALLBACK_MCODE_GROUP); + return (ofs % (128 + CALLBACK_MCODE_GROUP))/CALLBACK_MCODE_SLOTSZ + group*CALLBACK_MCODE_NSLOT; } #define CALLBACK_MAX_SLOT \ - (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32) + (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+128))*CALLBACK_MCODE_NSLOT) #elif LJ_TARGET_ARM @@ -57,6 +64,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #elif LJ_TARGET_ARM64 +#if LJ_ABI_BRANCH_TRACK +#define CALLBACK_MCODE_SLOTSZ 12 +#endif + #define CALLBACK_MCODE_HEAD 32 #elif LJ_TARGET_PPC @@ -81,8 +92,11 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #endif #ifndef CALLBACK_SLOT2OFS -#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) +#ifndef CALLBACK_MCODE_SLOTSZ +#define CALLBACK_MCODE_SLOTSZ 8 +#endif +#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_SLOTSZ*(slot)) +#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/CALLBACK_MCODE_SLOTSZ) #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) #endif @@ -118,9 +132,13 @@ static void *callback_mcode_init(global_State *g, uint8_t *page) *(void **)p = target; p += 8; #endif for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { +#if LJ_ABI_BRANCH_TRACK + *(uint32_t *)p = XI_ENDBR64; p += 4; +#endif /* mov al, slot; jmp group */ *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot; - if ((slot & 31) == 31 || slot == CALLBACK_MAX_SLOT-1) { + if ((slot & (CALLBACK_MCODE_NSLOT-1)) == (CALLBACK_MCODE_NSLOT-1) || + slot == CALLBACK_MAX_SLOT-1) { /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ *p++ = XI_PUSH + RID_EBP; *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); @@ -140,7 +158,8 @@ static void *callback_mcode_init(global_State *g, uint8_t *page) *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4; #endif } else { - *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); + *p++ = XI_JMPs; + *p++ = (uint8_t)(CALLBACK_MCODE_SLOTSZ*(CALLBACK_MCODE_NSLOT-1-(slot&(CALLBACK_MCODE_NSLOT-1))) - 2); } } return p; @@ -181,6 +200,9 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) ((void **)p)[1] = g; p += 4; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { +#if LJ_ABI_BRANCH_TRACK + *p++ = A64I_BTI_C; +#endif *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); p++; @@ -263,7 +285,7 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) #endif /* Check for macOS hardened runtime. */ -#if LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 +#if defined(LUAJIT_ENABLE_OSX_HRT) && LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 #include #define CCMAP_CREATE MAP_JIT #else diff --git a/src/lj_crecord.c b/src/lj_crecord.c index f88cddfd95..27f2c1dd99 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1101,12 +1101,15 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) crec_finalizer(J, trcd, 0, fin); } -/* Record argument conversions. */ +/* Record argument conversions. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ static TRef crec_call_args(jit_State *J, RecordFFData *rd, CTState *cts, CType *ct) { TRef args[CCI_NARGS_MAX]; CTypeID fid; + CTInfo info = ct->info; /* lj_ccall_ctid_vararg may invalidate ct pointer. */ MSize i, n; TRef tr, *base; cTValue *o; @@ -1115,9 +1118,9 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, TRef *arg0 = NULL, *arg1 = NULL; #endif int ngpr = 0; - if (ctype_cconv(ct->info) == CTCC_THISCALL) + if (ctype_cconv(info) == CTCC_THISCALL) ngpr = 1; - else if (ctype_cconv(ct->info) == CTCC_FASTCALL) + else if (ctype_cconv(info) == CTCC_FASTCALL) ngpr = 2; #elif LJ_TARGET_ARM64 && LJ_TARGET_OSX int ngpr = CCALL_NARG_GPR; @@ -1144,7 +1147,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, lj_assertJ(ctype_isfield(ctf->info), "field expected"); did = ctype_cid(ctf->info); } else { - if (!(ct->info & CTF_VARARG)) + if (!(info & CTF_VARARG)) lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */ #if LJ_TARGET_ARM64 && LJ_TARGET_OSX if (ngpr >= 0) { @@ -1248,14 +1251,17 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) { CTState *cts = ctype_ctsG(J2G(J)); CType *ct = ctype_raw(cts, cd->ctypeid); + CTInfo info; IRType tp = IRT_PTR; if (ctype_isptr(ct->info)) { tp = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; ct = ctype_rawchild(cts, ct); } - if (ctype_isfunc(ct->info)) { + info = ct->info; /* crec_call_args may invalidate ct pointer. */ + if (ctype_isfunc(info)) { TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR); CType *ctr = ctype_rawchild(cts, ct); + CTInfo ctr_info = ctr->info; /* crec_call_args may invalidate ctr. */ IRType t = crec_ct2irt(cts, ctr); TRef tr; TValue tv; @@ -1263,22 +1269,22 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000); if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) lj_trace_err(J, LJ_TRERR_BLACKL); - if (ctype_isvoid(ctr->info)) { + if (ctype_isvoid(ctr_info)) { t = IRT_NIL; rd->nres = 0; - } else if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info) || - ctype_isenum(ctr->info)) || t == IRT_CDATA) { + } else if (!(ctype_isnum(ctr_info) || ctype_isptr(ctr_info) || + ctype_isenum(ctr_info)) || t == IRT_CDATA) { lj_trace_err(J, LJ_TRERR_NYICALL); } - if ((ct->info & CTF_VARARG) + if ((info & CTF_VARARG) #if LJ_TARGET_X86 - || ctype_cconv(ct->info) != CTCC_CDECL + || ctype_cconv(info) != CTCC_CDECL #endif ) func = emitir(IRT(IR_CARG, IRT_NIL), func, lj_ir_kint(J, ctype_typeid(cts, ct))); tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func); - if (ctype_isbool(ctr->info)) { + if (ctype_isbool(ctr_info)) { if (frame_islua(J->L->base-1) && bc_b(frame_pc(J->L->base-1)[-1]) == 1) { /* Don't check result if ignored. */ tr = TREF_NIL; @@ -1294,8 +1300,8 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) tr = TREF_TRUE; } } else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) || - t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr->info)) { - TRef trid = lj_ir_kint(J, ctype_cid(ct->info)); + t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr_info)) { + TRef trid = lj_ir_kint(J, ctype_cid(info)); tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr); if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); } else if (t == IRT_FLOAT || t == IRT_U32) { diff --git a/src/lj_debug.c b/src/lj_debug.c index b3d52afc8f..f9392d8e6b 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -101,6 +101,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) pt = funcproto(fn); pos = proto_bcpos(pt, ins) - 1; #if LJ_HASJIT + if (pos == NO_BCPOS) return 1; /* Pretend it's the first bytecode. */ if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ if (bc_isret(bc_op(ins[-1]))) { GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index c60e7d7560..3e1eb64bfc 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -173,6 +173,11 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) return 0; /* Failed. */ } +#define emit_movw_k(k) \ + (ARMI_MOVW | ((k) & 0x0fffu) | (((k) & 0xf000u) << 4)) +#define emit_movt_k(k) \ + (ARMI_MOVT | (((k) >> 16) & 0x0fffu) | ((((k) >> 16) & 0xf000u) << 4)) + /* Load a 32 bit constant into a GPR. */ static void emit_loadi(ASMState *as, Reg rd, int32_t i) { @@ -184,13 +189,13 @@ static void emit_loadi(ASMState *as, Reg rd, int32_t i) emit_d(as, ARMI_MOV^k, rd); } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + emit_d(as, emit_movw_k(i), rd); } else if (emit_kdelta1(as, rd, i)) { /* One step delta relative to another constant. */ } else if ((as->flags & JIT_F_ARMV6T2)) { /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + emit_d(as, emit_movt_k(i), rd); + emit_d(as, emit_movw_k(i), rd); } else if (emit_kdelta2(as, rd, i)) { /* Two step delta relative to another constant. */ } else { diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index ca1269b7c3..a8be741562 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -409,6 +409,13 @@ static void emit_call(ASMState *as, ASMFunction target) } } +#if LJ_ABI_BRANCH_TRACK +static void emit_branch_track(ASMState *as) +{ + *--as->mcp = A64I_BTI_J; +} +#endif + /* -- Emit generic operations --------------------------------------------- */ /* Generic move between two regs. */ diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index d8104959aa..d65b1c5777 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -80,6 +80,9 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, /* -- Emit loads/stores --------------------------------------------------- */ +#define jglofs(as, k) \ + (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff) + /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index b13f00fe5b..56928e4235 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -53,6 +53,9 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) /* -- Emit loads/stores --------------------------------------------------- */ +#define jglofs(as, k) \ + (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff) + /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index f477301162..858fe753be 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -70,6 +70,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, return p; } +#if LJ_ABI_BRANCH_TRACK +static void emit_branch_track(ASMState *as) +{ + emit_u32(as, XI_ENDBR64); +} +#endif + /* op + modrm */ #define emit_opm(xo, mode, rr, rb, p, delta) \ (p[(delta)-1] = MODRM((mode), (rr), (rb)), \ @@ -471,6 +478,17 @@ static void emit_sfixup(ASMState *as, MCLabel source) /* Return label pointing to current PC. */ #define emit_label(as) ((as)->mcp) +/* Check if two adresses are in relative jump range. */ +static LJ_AINLINE int jmprel_ok(MCode *a, MCode *b) +{ +#if LJ_64 + return a - b == (int32_t)(a - b); +#else + UNUSED(a); UNUSED(b); + return 1; +#endif +} + /* Compute relative 32 bit offset for jump and call instructions. */ static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) { @@ -504,7 +522,7 @@ static void emit_call_(ASMState *as, MCode *target) { MCode *p = as->mcp; #if LJ_64 - if (target-p != (int32_t)(target-p)) { + if (!jmprel_ok(target, p)) { /* Assumes RID_RET is never an argument to calls and always clobbered. */ emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET); emit_loadu64(as, RID_RET, (uint64_t)target); diff --git a/src/lj_err.c b/src/lj_err.c index 03b5030be6..e8e1875805 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -803,9 +803,17 @@ LJ_NOINLINE GCstr *lj_err_str(lua_State *L, ErrMsg em) return lj_str_newz(L, err2msg(em)); } +LJ_NORET LJ_NOINLINE static void lj_err_err(lua_State *L) +{ + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRERR)); + lj_err_throw(L, LUA_ERRERR); +} + /* Out-of-memory error. */ LJ_NOINLINE void lj_err_mem(lua_State *L) { + if (L->status == LUA_ERRERR) + lj_err_err(L); if (L->status == LUA_ERRERR+1) /* Don't touch the stack during lua_open. */ lj_vm_unwind_c(L->cframe, LUA_ERRMEM); if (LJ_HASJIT) { @@ -902,6 +910,8 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) /* Stack overflow error. */ void LJ_FASTCALL lj_err_stkov(lua_State *L) { + if (L->status == LUA_ERRERR) + lj_err_err(L); lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL); lj_err_run(L); } diff --git a/src/lj_gc.c b/src/lj_gc.c index d9581d20d3..c779d583e9 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -106,6 +106,7 @@ static void gc_mark_start(global_State *g) setgcrefnull(g->gc.weak); gc_markobj(g, mainthread(g)); gc_markobj(g, tabref(mainthread(g)->env)); + gc_markobj(g, vmthread(g)); gc_marktv(g, &g->registrytv); gc_mark_gcroot(g); g->gc.state = GCSpropagate; @@ -507,24 +508,25 @@ static void gc_call_finalizer(global_State *g, lua_State *L, uint8_t oldh = hook_save(g); GCSize oldt = g->gc.threshold; int errcode; + lua_State *VL = vmthread(g); TValue *top; lj_trace_abort(g); hook_entergc(g); /* Disable hooks and new traces during __gc. */ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ - top = L->top; - copyTV(L, top++, mo); + top = VL->top; + copyTV(VL, top++, mo); if (LJ_FR2) setnilV(top++); - setgcV(L, top, o, ~o->gch.gct); - L->top = top+1; - errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */ + setgcV(VL, top, o, ~o->gch.gct); + VL->top = top+1; + errcode = lj_vm_pcall(VL, top, 1+0, -1); /* Stack: |mo|o| -> | */ + setgcref(g->cur_L, obj2gco(L)); hook_restore(g, oldh); if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = oldt; /* Restore GC threshold. */ if (errcode) { - ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ - lj_vmevent_send(L, ERRFIN, - copyTV(L, L->top++, restorestack(L, errobj)); + lj_vmevent_send(g, ERRFIN, + copyTV(V, V->top++, L->top-1); ); L->top--; } diff --git a/src/lj_jit.h b/src/lj_jit.h index 102ba0b4b7..05a8e9bbe9 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -104,14 +104,6 @@ /* -- JIT engine parameters ----------------------------------------------- */ -#if LJ_TARGET_WINDOWS || LJ_64 -/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */ -#define JIT_P_sizemcode_DEFAULT 64 -#else -/* Could go as low as 4K, but the mmap() overhead would be rather high. */ -#define JIT_P_sizemcode_DEFAULT 32 -#endif - /* Optimization parameters and their defaults. Length is a char in octal! */ #define JIT_PARAMDEF(_) \ _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ @@ -131,9 +123,9 @@ _(\011, recunroll, 2) /* Min. unroll for true recursion. */ \ \ /* Size of each machine code area (in KBytes). */ \ - _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ + _(\011, sizemcode, 64) \ /* Max. total size of all machine code areas (in KBytes). */ \ - _(\010, maxmcode, 512) \ + _(\010, maxmcode, 2048) \ /* End of list. */ enum { @@ -374,10 +366,14 @@ enum { LJ_K64_2P63, /* 2^63 */ LJ_K64_M2P64, /* -2^64 */ #endif +#endif +#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 + LJ_K64_VM_EXIT_HANDLER, + LJ_K64_VM_EXIT_INTERP, #endif LJ_K64__MAX, }; -#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) +#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) enum { #if LJ_TARGET_X86ORX64 @@ -393,6 +389,10 @@ enum { #if LJ_TARGET_MIPS64 LJ_K32_2P63, /* 2^63 */ LJ_K32_M2P64, /* -2^64 */ +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 + LJ_K32_VM_EXIT_HANDLER, + LJ_K32_VM_EXIT_INTERP, #endif LJ_K32__MAX }; @@ -513,6 +513,7 @@ typedef struct jit_State { MCode *mcbot; /* Bottom of current mcode area. */ size_t szmcarea; /* Size of current mcode area. */ size_t szallmcarea; /* Total size of all allocated mcode areas. */ + uintptr_t mcmin, mcmax; /* Mcode allocation range. */ TValue errinfo; /* Additional info element for trace errors. */ diff --git a/src/lj_load.c b/src/lj_load.c index 828bf8aea0..24b660a8b1 100644 --- a/src/lj_load.c +++ b/src/lj_load.c @@ -122,8 +122,9 @@ LUALIB_API int luaL_loadfilex(lua_State *L, const char *filename, copyTV(L, L->top-1, L->top); } if (err) { + const char *fname = filename ? filename : "stdin"; L->top--; - lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(err)); + lua_pushfstring(L, "cannot read %s: %s", fname, strerror(err)); return LUA_ERRFILE; } return status; diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 43694226a6..c3032f4e2d 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -63,31 +63,46 @@ void lj_mcode_sync(void *start, void *end) #if LJ_HASJIT +#if LUAJIT_SECURITY_MCODE != 0 +/* Protection twiddling failed. Probably due to kernel security. */ +static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) +{ + lua_CFunction panic = J2G(J)->panic; + if (panic) { + lua_State *L = J->L; + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); + panic(L); + } + exit(EXIT_FAILURE); +} +#endif + #if LJ_TARGET_WINDOWS #define MCPROT_RW PAGE_READWRITE #define MCPROT_RX PAGE_EXECUTE_READ #define MCPROT_RWX PAGE_EXECUTE_READWRITE -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) +static void *mcode_alloc_at(uintptr_t hint, size_t sz, DWORD prot) { - void *p = LJ_WIN_VALLOC((void *)hint, sz, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); - if (!p && !hint) - lj_trace_err(J, LJ_TRERR_MCODEAL); - return p; + return LJ_WIN_VALLOC((void *)hint, sz, + MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); } -static void mcode_free(jit_State *J, void *p, size_t sz) +static void mcode_free(void *p, size_t sz) { - UNUSED(J); UNUSED(sz); + UNUSED(sz); VirtualFree(p, 0, MEM_RELEASE); } -static int mcode_setprot(void *p, size_t sz, DWORD prot) +static void mcode_setprot(jit_State *J, void *p, size_t sz, DWORD prot) { +#if LUAJIT_SECURITY_MCODE != 0 DWORD oprot; - return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); + if (!LJ_WIN_VPROTECT(p, sz, prot, &oprot)) mcode_protfail(J); +#else + UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot); +#endif } #elif LJ_TARGET_POSIX @@ -99,7 +114,7 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #endif /* Check for macOS hardened runtime. */ -#if LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 +#if defined(LUAJIT_ENABLE_OSX_HRT) && LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 #include #define MCMAP_CREATE MAP_JIT #else @@ -111,37 +126,39 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) #ifdef PROT_MPROTECT #define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX)) +#elif MCMAP_CREATE +#define MCPROT_CREATE PROT_EXEC #else #define MCPROT_CREATE 0 #endif -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) +static void *mcode_alloc_at(uintptr_t hint, size_t sz, int prot) { void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0); - if (p == MAP_FAILED) { - if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); - p = NULL; + if (p == MAP_FAILED) return NULL; #if MCMAP_CREATE - } else { - pthread_jit_write_protect_np(0); + pthread_jit_write_protect_np(0); #endif - } return p; } -static void mcode_free(jit_State *J, void *p, size_t sz) +static void mcode_free(void *p, size_t sz) { - UNUSED(J); munmap(p, sz); } -static int mcode_setprot(void *p, size_t sz, int prot) +static void mcode_setprot(jit_State *J, void *p, size_t sz, int prot) { +#if LUAJIT_SECURITY_MCODE != 0 #if MCMAP_CREATE - pthread_jit_write_protect_np((prot & PROC_EXEC)); + UNUSED(J); UNUSED(p); UNUSED(sz); + pthread_jit_write_protect_np((prot & PROT_EXEC)); return 0; #else - return mprotect(p, sz, prot); + if (mprotect(p, sz, prot)) mcode_protfail(J); +#endif +#else + UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot); #endif } @@ -151,6 +168,49 @@ static int mcode_setprot(void *p, size_t sz, int prot) #endif +#ifdef LUAJIT_MCODE_TEST +/* Test wrapper for mcode allocation. DO NOT ENABLE in production! Try: +** LUAJIT_MCODE_TEST=hhhhhhhhhhhhhhhh luajit -jv main.lua +** LUAJIT_MCODE_TEST=F luajit -jv main.lua +*/ +static void *mcode_alloc_at_TEST(jit_State *J, uintptr_t hint, size_t sz, int prot) +{ + static int test_ofs = 0; + static const char *test_str; + if (!test_str) { + test_str = getenv("LUAJIT_MCODE_TEST"); + if (!test_str) test_str = ""; + } + switch (test_str[test_ofs]) { + case 'a': /* OK for one allocation. */ + test_ofs++; + /* fallthrough */ + case '\0': /* EOS: OK for any further allocations. */ + break; + case 'h': /* Ignore one hint. */ + test_ofs++; + /* fallthrough */ + case 'H': /* Ignore any further hints. */ + hint = 0u; + break; + case 'r': /* Randomize one hint. */ + test_ofs++; + /* fallthrough */ + case 'R': /* Randomize any further hints. */ + hint = lj_prng_u64(&J2G(J)->prng) & ~(uintptr_t)0xffffu; + hint &= ((uintptr_t)1 << (LJ_64 ? 47 : 31)) - 1; + break; + case 'f': /* Fail one allocation. */ + test_ofs++; + /* fallthrough */ + default: /* 'F' or unknown: Fail any further allocations. */ + return NULL; + } + return mcode_alloc_at(hint, sz, prot); +} +#define mcode_alloc_at(hint, sz, prot) mcode_alloc_at_TEST(J, hint, sz, prot) +#endif + /* -- MCode area protection ----------------------------------------------- */ #if LUAJIT_SECURITY_MCODE == 0 @@ -172,7 +232,7 @@ static int mcode_setprot(void *p, size_t sz, int prot) static void mcode_protect(jit_State *J, int prot) { - UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); + UNUSED(J); UNUSED(prot); } #else @@ -188,24 +248,11 @@ static void mcode_protect(jit_State *J, int prot) #define MCPROT_GEN MCPROT_RW #define MCPROT_RUN MCPROT_RX -/* Protection twiddling failed. Probably due to kernel security. */ -static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) -{ - lua_CFunction panic = J2G(J)->panic; - if (panic) { - lua_State *L = J->L; - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); - panic(L); - } - exit(EXIT_FAILURE); -} - /* Change protection of MCode area. */ static void mcode_protect(jit_State *J, int prot) { if (J->mcprot != prot) { - if (LJ_UNLIKELY(mcode_setprot(J->mcarea, J->szmcarea, prot))) - mcode_protfail(J); + mcode_setprot(J, J->mcarea, J->szmcarea, prot); J->mcprot = prot; } } @@ -214,47 +261,74 @@ static void mcode_protect(jit_State *J, int prot) /* -- MCode area allocation ----------------------------------------------- */ -#if LJ_64 -#define mcode_validptr(p) (p) -#else -#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000) -#endif - #ifdef LJ_TARGET_JUMPRANGE -/* Get memory within relative jump distance of our code in 64 bit mode. */ -static void *mcode_alloc(jit_State *J, size_t sz) +#define MCODE_RANGE64 ((1u << LJ_TARGET_JUMPRANGE) - 0x10000u) + +/* Set a memory range for mcode allocation with addr in the middle. */ +static void mcode_setrange(jit_State *J, uintptr_t addr) { - /* Target an address in the static assembler code (64K aligned). - ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB. - ** Use half the jump range so every address in the range can reach any other. - */ #if LJ_TARGET_MIPS - /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & - ~(uintptr_t)0x0fffffffu) + 0x08000000u; + /* Use the whole 256MB-aligned region. */ + J->mcmin = addr & ~(uintptr_t)((1u << LJ_TARGET_JUMPRANGE) - 1); + J->mcmax = J->mcmin + (1u << LJ_TARGET_JUMPRANGE); #else - uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; + /* Every address in the 64KB-aligned range should be able to reach + ** any other, so MCODE_RANGE64 is only half the (signed) branch range. + */ + J->mcmin = (addr - (MCODE_RANGE64 >> 1) + 0xffffu) & ~(uintptr_t)0xffffu; + J->mcmax = J->mcmin + MCODE_RANGE64; #endif - const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21); - /* First try a contiguous area below the last one. */ - uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; - int i; - /* Limit probing iterations, depending on the available pool size. */ - for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) { - if (mcode_validptr(hint)) { - void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN); - - if (mcode_validptr(p) && - ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range)) - return p; - if (p) mcode_free(J, p, sz); /* Free badly placed area. */ - } - /* Next try probing 64K-aligned pseudo-random addresses. */ + /* Avoid wrap-around and the 64KB corners. */ + if (addr < J->mcmin || !J->mcmin) J->mcmin = 0x10000u; + if (addr > J->mcmax) J->mcmax = ~(uintptr_t)0xffffu; +} + +/* Check if an address is in range of the mcode allocation range. */ +static LJ_AINLINE int mcode_inrange(jit_State *J, uintptr_t addr, size_t sz) +{ + /* Take care of unsigned wrap-around of addr + sz, too. */ + return addr >= J->mcmin && addr + sz >= J->mcmin && addr + sz <= J->mcmax; +} + +/* Get memory within a specific jump range in 64 bit mode. */ +static void *mcode_alloc(jit_State *J, size_t sz) +{ + uintptr_t hint; + int i = 0, j; + if (!J->mcmin) /* Place initial range near the interpreter code. */ + mcode_setrange(J, (uintptr_t)(void *)lj_vm_exit_handler); + else if (!J->mcmax) /* Switch to a new range (already flushed). */ + goto newrange; + /* First try a contiguous area below the last one (if in range). */ + hint = (uintptr_t)J->mcarea - sz; + if (!mcode_inrange(J, hint, sz)) /* Also takes care of NULL J->mcarea. */ + goto probe; + for (; i < 16; i++) { + void *p = mcode_alloc_at(hint, sz, MCPROT_GEN); + if (mcode_inrange(J, (uintptr_t)p, sz)) + return p; /* Success. */ + else if (p) + mcode_free(p, sz); /* Free badly placed area. */ + probe: + /* Next try probing 64KB-aligned pseudo-random addresses. */ + j = 0; do { - hint = lj_prng_u64(&J2G(J)->prng) & ((1u<mcmin + (lj_prng_u64(&J2G(J)->prng) & MCODE_RANGE64); + if (++j > 15) goto fail; + } while (!mcode_inrange(J, hint, sz)); + } +fail: + if (!J->mcarea) { /* Switch to a new range now. */ + void *p; + newrange: + p = mcode_alloc_at(0, sz, MCPROT_GEN); + if (p) { + mcode_setrange(J, (uintptr_t)p + (sz >> 1)); + return p; /* Success. */ + } + } else { + J->mcmax = 0; /* Switch to a new range after the flush. */ } lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */ return NULL; @@ -267,15 +341,13 @@ static void *mcode_alloc(jit_State *J, size_t sz) { #if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP /* Allow better executable memory allocation for OpenBSD W^X mode. */ - void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); - if (p && mcode_setprot(p, sz, MCPROT_GEN)) { - mcode_free(J, p, sz); - return NULL; - } - return p; + void *p = mcode_alloc_at(0, sz, MCPROT_RUN); + if (p) mcode_setprot(J, p, sz, MCPROT_GEN); #else - return mcode_alloc_at(J, 0, sz, MCPROT_GEN); + void *p = mcode_alloc_at(0, sz, MCPROT_GEN); #endif + if (!p) lj_trace_err(J, LJ_TRERR_MCODEAL); + return p; } #endif @@ -287,7 +359,6 @@ static void mcode_allocarea(jit_State *J) { MCode *oldarea = J->mcarea; size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10; - sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); J->mcarea = (MCode *)mcode_alloc(J, sz); J->szmcarea = sz; J->mcprot = MCPROT_GEN; @@ -309,7 +380,7 @@ void lj_mcode_free(jit_State *J) MCode *next = ((MCLink *)mc)->next; size_t sz = ((MCLink *)mc)->size; lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); - mcode_free(J, mc, sz); + mcode_free(mc, sz); mc = next; } } @@ -345,32 +416,25 @@ void lj_mcode_abort(jit_State *J) MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) { if (finish) { -#if LUAJIT_SECURITY_MCODE if (J->mcarea == ptr) mcode_protect(J, MCPROT_RUN); - else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) - mcode_protfail(J); -#endif + else + mcode_setprot(J, ptr, ((MCLink *)ptr)->size, MCPROT_RUN); return NULL; } else { - MCode *mc = J->mcarea; + uintptr_t base = (uintptr_t)J->mcarea, addr = (uintptr_t)ptr; /* Try current area first to use the protection cache. */ - if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { -#if LUAJIT_SECURITY_MCODE + if (addr >= base && addr < base + J->szmcarea) { mcode_protect(J, MCPROT_GEN); -#endif - return mc; + return (MCode *)base; } /* Otherwise search through the list of MCode areas. */ for (;;) { - mc = ((MCLink *)mc)->next; - lj_assertJ(mc != NULL, "broken MCode area chain"); - if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { -#if LUAJIT_SECURITY_MCODE - if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) - mcode_protfail(J); -#endif - return mc; + base = (uintptr_t)(((MCLink *)base)->next); + lj_assertJ(base != 0, "broken MCode area chain"); + if (addr >= base && addr < base + ((MCLink *)base)->size) { + mcode_setprot(J, (MCode *)base, ((MCLink *)base)->size, MCPROT_GEN); + return (MCode *)base; } } } @@ -382,7 +446,6 @@ void lj_mcode_limiterr(jit_State *J, size_t need) size_t sizemcode, maxmcode; lj_mcode_abort(J); sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; - sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; if (need * sizeof(MCode) > sizemcode) lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ diff --git a/src/lj_obj.h b/src/lj_obj.h index 855727bfab..73b186e256 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -647,6 +647,7 @@ typedef struct global_State { TValue tmptv, tmptv2; /* Temporary TValues. */ Node nilnode; /* Fallback 1-element hash part (nil key and value). */ TValue registrytv; /* Anchor for registry. */ + GCRef vmthref; /* Link to VM thread. */ GCupval uvhead; /* Head of double-linked list of all open upvalues. */ int32_t hookcount; /* Instruction hook countdown. */ int32_t hookcstart; /* Start count for instruction hook counter. */ @@ -663,6 +664,7 @@ typedef struct global_State { } global_State; #define mainthread(g) (&gcref(g->mainthref)->th) +#define vmthread(g) (&gcref(g->vmthref)->th) #define niltv(L) \ check_exp(tvisnil(&G(L)->nilnode.val), &G(L)->nilnode.val) #define niltvg(g) \ diff --git a/src/lj_parse.c b/src/lj_parse.c index f41163804a..181ce4d7e2 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1517,23 +1517,11 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) #endif -/* Check if bytecode op returns. */ -static int bcopisret(BCOp op) -{ - switch (op) { - case BC_CALLMT: case BC_CALLT: - case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1: - return 1; - default: - return 0; - } -} - /* Fixup return instruction for prototype. */ static void fs_fixup_ret(FuncState *fs) { BCPos lastpc = fs->pc; - if (lastpc <= fs->lasttarget || !bcopisret(bc_op(fs->bcbase[lastpc-1].ins))) { + if (lastpc <= fs->lasttarget || !bc_isret_or_tail(bc_op(fs->bcbase[lastpc-1].ins))) { if ((fs->bl->flags & FSCOPE_UPVAL)) bcemit_AJ(fs, BC_UCLO, 0, 0); bcemit_AD(fs, BC_RET0, 0, 1); /* Need final return. */ @@ -1605,8 +1593,8 @@ static GCproto *fs_finish(LexState *ls, BCLine line) fs_fixup_line(fs, pt, (void *)((char *)pt + ofsli), numline); fs_fixup_var(ls, pt, (uint8_t *)((char *)pt + ofsdbg), ofsvar); - lj_vmevent_send(L, BC, - setprotoV(L, L->top++, pt); + lj_vmevent_send(G(L), BC, + setprotoV(V, V->top++, pt); ); L->top--; /* Pop table of constants. */ diff --git a/src/lj_prng.c b/src/lj_prng.c index 02146b273a..1bbb7eaba3 100644 --- a/src/lj_prng.c +++ b/src/lj_prng.c @@ -125,7 +125,7 @@ static PRGR libfunc_rgr; #if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 #define LJ_TARGET_HAS_GETENTROPY 1 #endif -#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX +#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX || LJ_TARGET_HURD #define LJ_TARGET_HAS_GETENTROPY 1 #endif diff --git a/src/lj_record.c b/src/lj_record.c index c6a082d438..6543f2745c 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -973,7 +973,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_LLEAVE); } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ lj_trace_err(J, LJ_TRERR_NYIRETL); /* No way to insert snapshot here. */ - } else if (1 + pt->framesize >= LJ_MAX_JSLOTS) { + } else if (1 + pt->framesize >= LJ_MAX_JSLOTS || + J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) { lj_trace_err(J, LJ_TRERR_STACKOV); } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); @@ -1107,7 +1108,10 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) return 0; /* No metamethod. */ } /* The cdata metatable is treated as immutable. */ - if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; + if (LJ_HASFFI && tref_iscdata(ix->tab)) { + mix.tab = TREF_NIL; + goto immutable_mt; + } ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); goto nocheck; diff --git a/src/lj_snap.c b/src/lj_snap.c index cb1044392f..d0d28c817e 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -956,8 +956,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) const BCIns *pc = snap_pc(&map[nent]); lua_State *L = J->L; - /* Set interpreter PC to the next PC to get correct error messages. */ - setcframe_pc(L->cframe, pc+1); + /* Set interpreter PC to the next PC to get correct error messages. + ** But not for returns or tail calls, since pc+1 may be out-of-range. + */ + setcframe_pc(L->cframe, bc_isret_or_tail(bc_op(*pc)) ? pc : pc+1); setcframe_pc(cframe_raw(cframe_prev(L->cframe)), pc); /* Make sure the stack is big enough for the slots from the snapshot. */ diff --git a/src/lj_state.c b/src/lj_state.c index d8fc545a0d..68e78faad1 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -195,12 +195,14 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) lj_meta_init(L); lj_lex_init(L); fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ + fixstring(lj_err_str(L, LJ_ERR_ERRERR)); /* Preallocate err in err msg. */ g->gc.threshold = 4*g->gc.total; #if LJ_HASFFI lj_ctype_initfin(L); #endif lj_trace_initstate(g); lj_err_verify(); + setgcref(g->vmthref, obj2gco(lj_state_new(L))); return NULL; } @@ -260,7 +262,11 @@ LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) } #endif GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); - if (GG == NULL || !checkptrGC(GG)) return NULL; + if (GG == NULL) return NULL; + if (!checkptrGC(GG)) { + allocf(allocd, GG, sizeof(GG_State), 0); + return NULL; + } memset(GG, 0, sizeof(GG_State)); L = &GG->L; g = &GG->g; @@ -371,6 +377,10 @@ void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) lj_assertG(L != mainthread(g), "free of main thread"); if (obj2gco(L) == gcref(g->cur_L)) setgcrefnull(g->cur_L); +#if LJ_HASFFI + if (ctype_ctsG(g) && ctype_ctsG(g)->L == L) /* Avoid dangling cts->L. */ + ctype_ctsG(g)->L = mainthread(g); +#endif if (gcref(L->openupval) != NULL) { lj_func_closeuv(L, tvref(L->stack)); lj_trace_abort(g); /* For aa_uref soundness. */ diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index 04aebaa472..bb649fc840 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -170,7 +170,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) } else if (tvisbuf(o)) { SBufExt *sbx = bufV(o); *lenp = sbufxlen(sbx); - return sbx->r; + return sbx->r ? sbx->r : ""; } else if (tvisint(o)) { sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); } else if (tvisnum(o)) { diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index d0bbc5a5fb..947545f821 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h @@ -190,6 +190,7 @@ typedef enum ARMIns { ARMI_LDRSB = 0xe01000d0, ARMI_LDRSH = 0xe01000f0, ARMI_LDRD = 0xe00000d0, + ARMI_LDRL = 0xe51f0000, ARMI_STR = 0xe4000000, ARMI_STRB = 0xe4400000, ARMI_STRH = 0xe00000b0, @@ -200,6 +201,7 @@ typedef enum ARMIns { ARMI_BL = 0xeb000000, ARMI_BLX = 0xfa000000, ARMI_BLXr = 0xe12fff30, + ARMI_BX = 0xe12fff10, /* ARMv6 */ ARMI_REV = 0xe6bf0f30, diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 9274187117..3113d1410a 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -110,6 +110,7 @@ typedef struct { static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ + if ((LJ_LE ? p[1] >> 28 : p[1] & 0xf) == 0xf) p++; /* Skip A64I_LDRx. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ @@ -265,6 +266,10 @@ typedef enum A64Ins { A64I_BRAAZ = 0xd61f081f, A64I_BLRAAZ = 0xd63f081f, + A64I_BTI_C = 0xd503245f, + A64I_BTI_J = 0xd503249f, + A64I_BTI_JC = 0xd50324df, + A64I_NOP = 0xd503201f, /* FP */ diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index 5a1b5a7cca..58f311884f 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -115,6 +115,7 @@ typedef struct { static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */ + if (p[3] == 0x4e800421) p += 2; /* Indirect branch PPCI_BCTRL. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 6a528e8288..fa32a5d46f 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -242,6 +242,9 @@ typedef enum { XV_SHLX = XV_660f38(f7), XV_SHRX = XV_f20f38(f7), + /* Special NOP instructions. */ + XI_ENDBR64 = 0xfa1e0ff3, + /* Variable-length opcodes. XO_* prefix. */ XO_OR = XO_(0b), XO_MOV = XO_(8b), diff --git a/src/lj_trace.c b/src/lj_trace.c index 0e948e8d08..47d7faa5c9 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -296,8 +296,8 @@ int lj_trace_flushall(lua_State *L) /* Free the whole machine code and invalidate all exit stub groups. */ lj_mcode_free(J); memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "flush")); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "flush")); ); return 0; } @@ -343,6 +343,14 @@ void lj_trace_initstate(global_State *g) J->k32[LJ_K32_M2P64] = 0xdf800000; #endif #endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 + J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; + J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; +#endif +#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_VM_EXIT_HANDLER].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_handler, 0); + J->k64[LJ_K64_VM_EXIT_INTERP].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_interp, 0); +#endif } /* Free everything associated with the JIT compiler state. */ @@ -408,7 +416,6 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) /* Start tracing. */ static void trace_start(jit_State *J) { - lua_State *L; TraceNo traceno; if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ @@ -458,20 +465,19 @@ static void trace_start(jit_State *J) J->ktrace = 0; setgcref(J->cur.startpt, obj2gco(J->pt)); - L = J->L; - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "start")); - setintV(L->top++, traceno); - setfuncV(L, L->top++, J->fn); - setintV(L->top++, proto_bcpos(J->pt, J->pc)); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "start")); + setintV(V->top++, traceno); + setfuncV(V, V->top++, J->fn); + setintV(V->top++, proto_bcpos(J->pt, J->pc)); if (J->parent) { - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); + setintV(V->top++, J->parent); + setintV(V->top++, J->exitno); } else { BCOp op = bc_op(*J->pc); if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { - setintV(L->top++, J->exitno); /* Parent of stitched trace. */ - setintV(L->top++, -1); + setintV(V->top++, J->exitno); /* Parent of stitched trace. */ + setintV(V->top++, -1); } } ); @@ -486,7 +492,6 @@ static void trace_stop(jit_State *J) GCproto *pt = &gcref(J->cur.startpt)->pt; TraceNo traceno = J->cur.traceno; GCtrace *T = J->curfinal; - lua_State *L; switch (op) { case BC_FORL: @@ -543,11 +548,10 @@ static void trace_stop(jit_State *J) J->postproc = LJ_POST_NONE; trace_save(J, T); - L = J->L; - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "stop")); - setintV(L->top++, traceno); - setfuncV(L, L->top++, J->fn); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "stop")); + setintV(V->top++, traceno); + setfuncV(V, V->top++, J->fn); ); } @@ -602,18 +606,17 @@ static int trace_abort(jit_State *J) /* Is there anything to abort? */ traceno = J->cur.traceno; if (traceno) { - ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ J->cur.link = 0; J->cur.linktype = LJ_TRLINK_NONE; - lj_vmevent_send(L, TRACE, + lj_vmevent_send(J2G(J), TRACE, cTValue *bot = tvref(L->stack)+LJ_FR2; cTValue *frame; const BCIns *pc; BCPos pos = 0; - setstrV(L, L->top++, lj_str_newlit(L, "abort")); - setintV(L->top++, traceno); + setstrV(V, V->top++, lj_str_newlit(V, "abort")); + setintV(V->top++, traceno); /* Find original Lua function call to generate a better error message. */ - for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { + for (frame = L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { if (isluafunc(frame_func(frame))) { pos = proto_bcpos(funcproto(frame_func(frame)), pc); break; @@ -625,10 +628,10 @@ static int trace_abort(jit_State *J) pc = frame_pc(frame) - 1; } } - setfuncV(L, L->top++, frame_func(frame)); - setintV(L->top++, pos); - copyTV(L, L->top++, restorestack(L, errobj)); - copyTV(L, L->top++, &J->errinfo); + setfuncV(V, V->top++, frame_func(frame)); + setintV(V->top++, pos); + copyTV(V, V->top++, L->top-1); + copyTV(V, V->top++, &J->errinfo); ); /* Drop aborted trace after the vmevent (which may still access it). */ setgcrefnull(J->trace[traceno]); @@ -637,10 +640,15 @@ static int trace_abort(jit_State *J) J->cur.traceno = 0; } L->top--; /* Remove error object */ - if (e == LJ_TRERR_DOWNREC) + if (e == LJ_TRERR_DOWNREC) { return trace_downrec(J); - else if (e == LJ_TRERR_MCODEAL) + } else if (e == LJ_TRERR_MCODEAL) { + if (!J->mcarea) { /* Disable JIT compiler if first mcode alloc fails. */ + J->flags &= ~JIT_F_ON; + lj_dispatch_update(J2G(J)); + } lj_trace_flushall(L); + } return 0; } @@ -679,16 +687,16 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) case LJ_TRACE_RECORD: trace_pendpatch(J, 0); setvmstate(J2G(J), RECORD); - lj_vmevent_send_(L, RECORD, + lj_vmevent_send_(J2G(J), RECORD, /* Save/restore state for trace recorder. */ TValue savetv = J2G(J)->tmptv; TValue savetv2 = J2G(J)->tmptv2; TraceNo parent = J->parent; ExitNo exitno = J->exitno; - setintV(L->top++, J->cur.traceno); - setfuncV(L, L->top++, J->fn); - setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1); - setintV(L->top++, J->framedepth); + setintV(V->top++, J->cur.traceno); + setfuncV(V, V->top++, J->fn); + setintV(V->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1); + setintV(V->top++, J->framedepth); , J2G(J)->tmptv = savetv; J2G(J)->tmptv2 = savetv2; @@ -826,23 +834,23 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) #ifndef LUAJIT_DISABLE_VMEVENT /* Push all registers from exit state. */ -static void trace_exit_regs(lua_State *L, ExitState *ex) +static void trace_exit_regs(lua_State *V, ExitState *ex) { int32_t i; - setintV(L->top++, RID_NUM_GPR); - setintV(L->top++, RID_NUM_FPR); + setintV(V->top++, RID_NUM_GPR); + setintV(V->top++, RID_NUM_FPR); for (i = 0; i < RID_NUM_GPR; i++) { if (sizeof(ex->gpr[i]) == sizeof(int32_t)) - setintV(L->top++, (int32_t)ex->gpr[i]); + setintV(V->top++, (int32_t)ex->gpr[i]); else - setnumV(L->top++, (lua_Number)ex->gpr[i]); + setnumV(V->top++, (lua_Number)ex->gpr[i]); } #if !LJ_SOFTFP for (i = 0; i < RID_NUM_FPR; i++) { - setnumV(L->top, ex->fpr[i]); - if (LJ_UNLIKELY(tvisnan(L->top))) - setnanV(L->top); - L->top++; + setnumV(V->top, ex->fpr[i]); + if (LJ_UNLIKELY(tvisnan(V->top))) + setnanV(V->top); + V->top++; } #endif } @@ -884,6 +892,8 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) #ifdef EXITSTATE_PCREG J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); +#else + UNUSED(ex); #endif T = traceref(J, J->parent); UNUSED(T); #ifdef EXITSTATE_CHECKEXIT @@ -904,11 +914,11 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */ if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) - lj_vmevent_send(L, TEXIT, - lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); - trace_exit_regs(L, ex); + lj_vmevent_send(G(L), TEXIT, + lj_state_checkstack(V, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); + setintV(V->top++, J->parent); + setintV(V->top++, J->exitno); + trace_exit_regs(V, ex); ); pc = exd.pc; diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c index 070c6144aa..8913ead946 100644 --- a/src/lj_vmevent.c +++ b/src/lj_vmevent.c @@ -38,6 +38,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) { global_State *g = G(L); + lua_State *oldL = gco2th(gcref(g->cur_L)); uint8_t oldmask = g->vmevmask; uint8_t oldh = hook_save(g); int status; @@ -51,6 +52,10 @@ void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) fputs(tvisstr(L->top) ? strVdata(L->top) : "?", stderr); fputc('\n', stderr); } + setgcref(g->cur_L, obj2gco(oldL)); +#if LJ_HASJIT + G2J(g)->L = oldL; +#endif hook_restore(g, oldh); if (g->vmevmask != VMEVENT_NOCACHE) g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */ diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h index 8a99536068..cdd4f75825 100644 --- a/src/lj_vmevent.h +++ b/src/lj_vmevent.h @@ -32,23 +32,25 @@ typedef enum { } VMEvent; #ifdef LUAJIT_DISABLE_VMEVENT -#define lj_vmevent_send(L, ev, args) UNUSED(L) -#define lj_vmevent_send_(L, ev, args, post) UNUSED(L) +#define lj_vmevent_send(g, ev, args) UNUSED(g) +#define lj_vmevent_send_(g, ev, args, post) UNUSED(g) #else -#define lj_vmevent_send(L, ev, args) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ +#define lj_vmevent_send(g, ev, args) \ + if ((g)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ + lua_State *V = vmthread(g); \ + ptrdiff_t argbase = lj_vmevent_prepare(V, LJ_VMEVENT_##ev); \ if (argbase) { \ args \ - lj_vmevent_call(L, argbase); \ + lj_vmevent_call(V, argbase); \ } \ } -#define lj_vmevent_send_(L, ev, args, post) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ +#define lj_vmevent_send_(g, ev, args, post) \ + if ((g)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ + lua_State *V = vmthread(g); \ + ptrdiff_t argbase = lj_vmevent_prepare(V, LJ_VMEVENT_##ev); \ if (argbase) { \ args \ - lj_vmevent_call(L, argbase); \ + lj_vmevent_call(V, argbase); \ post \ } \ } diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 69c0c61a9f..3f32e1a0e5 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -5,11 +5,12 @@ @rem Then cd to this directory and run this script. Use the following @rem options (in order), if needed. The default is a dynamic release build. @rem -@rem nogc64 disable LJ_GC64 mode for x64 -@rem debug emit debug symbols -@rem amalg amalgamated build -@rem static create static lib to statically link into your project -@rem mixed create static lib to build a DLL in your project +@rem nogc64 disable LJ_GC64 mode for x64 +@rem lua52compat enable extra Lua 5.2 extensions +@rem debug emit debug symbols +@rem amalg amalgamated build +@rem static create static lib to statically link into your project +@rem mixed create static lib to build a DLL in your project @if not defined INCLUDE goto :FAIL @@ -18,7 +19,7 @@ @set DEBUGCFLAGS= @set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline @set LJDYNBUILD=/DLUA_BUILD_AS_DLL /MD -@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd +@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd @set LJCOMPILETARGET=/Zi @set LJLINKTYPE=/DEBUG /RELEASE @set LJLINKTYPE_DEBUG=/DEBUG @@ -64,6 +65,10 @@ if exist minilua.exe.manifest^ @set DASC=vm_x86.dasc @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 :DA +@if "%1" neq "lua52compat" goto :NOLUA52COMPAT +@shift +@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_LUA52COMPAT +:NOLUA52COMPAT minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% @if errorlevel 1 goto :BAD diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index ca08fc117e..86bef0cfbc 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -1717,8 +1717,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif |.endmacro | - | math_minmax math_min, gt, pl - | math_minmax math_max, lt, le + | math_minmax math_min, gt, hs + | math_minmax math_max, lt, ls | |//-- String library ----------------------------------------------------- | diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 58efe400e4..a437b65766 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -92,6 +92,17 @@ |.macro ret_auth; ret; .endmacro |.endif | +|// ARM64 branch target identification (BTI). +|.if BRANCH_TRACK +|.macro bti_jump; bti j; .endmacro +|.macro bti_call; bti c; .endmacro +|.macro bti_tailcall; bti jc; .endmacro +|.else +|.macro bti_jump; .endmacro +|.macro bti_call; .endmacro +|.macro bti_tailcall; .endmacro +|.endif +| |//----------------------------------------------------------------------- | |// Stack layout while in interpreter. Must match with lj_frame.h. @@ -439,24 +450,28 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | // (void *cframe, int errcode) + | bti_tailcall | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | mov CRET1, CARG2 | ldr L, SAVE_L | ldr GL, L->glref |->vm_unwind_c_eh: // Landing pad for external unwinder. + | bti_tailcall | mv_vmstate TMP0w, C | st_vmstate TMP0w | b ->vm_leave_unw | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) + | bti_tailcall | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | ldr L, SAVE_L | init_constants | ldr GL, L->glref // Setup pointer to global state. |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | bti_tailcall | mov RC, #16 // 2 results: false + error message. | ldr BASE, L->base | mov_false TMP0 @@ -632,6 +647,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->cont_cat: // RA = resultptr, CARG4 = meta base + | bti_jump | ldr INSw, [PC, #-4] | sub CARG2, CARG4, #32 | ldr TMP0, [RA] @@ -789,9 +805,11 @@ static void build_subroutines(BuildCtx *ctx) | sub RB, RB, #0x20000 | csel PC, PC, RB, lo |->cont_nop: + | bti_jump | ins_next | |->cont_ra: // RA = resultptr + | bti_jump | ldr INSw, [PC, #-4] | ldr TMP0, [RA] | decode_RA TMP1, INS @@ -799,12 +817,14 @@ static void build_subroutines(BuildCtx *ctx) | b ->cont_nop | |->cont_condt: // RA = resultptr + | bti_jump | ldr TMP0, [RA] | mov_true TMP1 | cmp TMP1, TMP0 // Branch if result is true. | b <4 | |->cont_condf: // RA = resultptr + | bti_jump | ldr TMP0, [RA] | mov_false TMP1 | cmp TMP0, TMP1 // Branch if result is false. @@ -956,10 +976,12 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: + | bti_jump |.endmacro | |.macro .ffunc_1, name |->ff_ .. name: + | bti_jump | ldr CARG1, [BASE] | cmp NARGS8:RC, #8 | blo ->fff_fallback @@ -967,6 +989,7 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc_2, name |->ff_ .. name: + | bti_jump | ldp CARG1, CARG2, [BASE] | cmp NARGS8:RC, #16 | blo ->fff_fallback @@ -1810,6 +1833,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_record: // Dispatch target for recording phase. |.if JIT + | bti_jump | ldrb CARG1w, GL->hookmask | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | bne >5 @@ -1825,6 +1849,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_rethook: // Dispatch target for return hooks. + | bti_jump | ldrb TMP2w, GL->hookmask | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? |5: // Re-dispatch to static ins. @@ -1832,6 +1857,7 @@ static void build_subroutines(BuildCtx *ctx) | br_auth TMP0 | |->vm_inshook: // Dispatch target for instr/line hooks. + | bti_jump | ldrb TMP2w, GL->hookmask | ldr TMP3w, GL->hookcount | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? @@ -1858,6 +1884,7 @@ static void build_subroutines(BuildCtx *ctx) | br_auth TMP0 | |->cont_hook: // Continue from hook yield. + | bti_jump | ldr CARG1, [CARG4, #-40] | add PC, PC, #4 | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. @@ -1881,6 +1908,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_callhook: // Dispatch target for call hooks. + | bti_jump | mov CARG2, PC |.if JIT | b >1 @@ -1910,6 +1938,7 @@ static void build_subroutines(BuildCtx *ctx) |->cont_stitch: // Trace stitching. |.if JIT | // RA = resultptr, CARG4 = meta base + | bti_jump | ldr RBw, SAVE_MULTRES | ldr INSw, [PC, #-4] | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. @@ -1958,6 +1987,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE + | bti_jump | mov CARG1, L | str BASE, L->base | mov CARG2, PC @@ -1979,6 +2009,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_exit_handler: |.if JIT + | bti_call | sub sp, sp, #(64*8) | savex_, 0, 1 | savex_, 2, 3 @@ -2029,6 +2060,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_exit_interp: + | bti_jump | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. |.if JIT | ldr L, SAVE_L @@ -2106,6 +2138,7 @@ static void build_subroutines(BuildCtx *ctx) | | // int lj_vm_modi(int dividend, int divisor); |->vm_modi: + | bti_call | eor CARG4w, CARG1w, CARG2w | cmp CARG4w, #0 | eor CARG3w, CARG1w, CARG1w, asr #31 @@ -2142,6 +2175,7 @@ static void build_subroutines(BuildCtx *ctx) |// Next idx returned in CRET2w. |->vm_next: |.if JIT + | bti_call | ldr NEXT_LIM, NEXT_TAB->asize | ldr NEXT_TMP1, NEXT_TAB->array |1: // Traverse array part. @@ -2286,6 +2320,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |=>defop: switch (op) { +#if !LJ_HASJIT + case BC_FORL: + case BC_JFORI: + case BC_JFORL: + case BC_ITERL: + case BC_JITERL: + case BC_LOOP: + case BC_JLOOP: + case BC_FUNCF: + case BC_JFUNCF: + case BC_JFUNCV: +#endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + break; /* Avoid redundant bti instructions. */ + default: + | bti_jump + break; + } + + switch (op) { /* -- Comparison ops ---------------------------------------------------- */ @@ -3443,6 +3497,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |->vm_IITERN: | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + |.if JIT + | bti_jump + |.endif | add RA, BASE, RA, lsl #3 | ldr TAB:RB, [RA, #-16] | ldrh TMP3w, [PC, # OFS_RD] @@ -4122,6 +4179,19 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.align 3\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif +#endif +#if LJ_TARGET_LINUX && LJ_ABI_BRANCH_TRACK + fprintf(ctx->fp, + "\t.section .note.gnu.property,\"a\"\n" + "\t.align 3\n" + "\t.long 4\n" + "\t.long 16\n" + "\t.long 5\n" + "\t.long 0x00554e47\n" + "\t.long 0xc0000000\n" + "\t.long 4\n" + "\t.long 1\n" + "\t.long 0\n"); #endif break; #if !LJ_NO_UNWIND diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index f501495b11..4cfb7b6ad2 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -189,6 +189,14 @@ | |.endif | +|//-- Control-Flow Enforcement Technique (CET) --------------------------- +| +|.if BRANCH_TRACK +|.macro endbr; endbr64; .endmacro +|.else +|.macro endbr; .endmacro +|.endif +| |//----------------------------------------------------------------------- | |// Instruction headers. @@ -484,15 +492,18 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | // (void *cframe, int errcode) + | endbr | mov eax, CARG2d // Error return status for vm_pcall. | mov rsp, CARG1 |->vm_unwind_c_eh: // Landing pad for external unwinder. + | endbr | mov L:RB, SAVE_L | mov GL:RB, L:RB->glref | mov dword GL:RB->vmstate, ~LJ_VMST_C | jmp ->vm_leave_unw | |->vm_unwind_rethrow: + | endbr |.if not X64WIN | mov CARG1, SAVE_L | mov CARG2d, eax @@ -502,9 +513,11 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) + | endbr | and CARG1, CFRAME_RAWMASK | mov rsp, CARG1 |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | endbr | mov L:RB, SAVE_L | mov RDd, 1+1 // Really 1+2 results, incr. later. | mov BASE, L:RB->base @@ -706,6 +719,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->cont_cat: // BASE = base, RC = result, RB = mbase + | endbr | movzx RAd, PC_RB | sub RB, 32 | lea RA, [BASE+RA*8] @@ -774,6 +788,7 @@ static void build_subroutines(BuildCtx *ctx) | test RC, RC | jz >3 |->cont_ra: // BASE = base, RC = result + | endbr | movzx RAd, PC_RA | mov RB, [RC] | mov [BASE+RA*8], RB @@ -851,6 +866,7 @@ static void build_subroutines(BuildCtx *ctx) | mov RB, [BASE+RA*8] | mov [RC], RB |->cont_nop: // BASE = base, (RC = result) + | endbr | ins_next | |3: // Call __newindex metamethod. @@ -921,6 +937,7 @@ static void build_subroutines(BuildCtx *ctx) | ins_next | |->cont_condt: // BASE = base, RC = result + | endbr | add PC, 4 | mov ITYPE, [RC] | sar ITYPE, 47 @@ -929,6 +946,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp <6 | |->cont_condf: // BASE = base, RC = result + | endbr | mov ITYPE, [RC] | sar ITYPE, 47 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. @@ -1132,16 +1150,17 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: + | endbr |.endmacro | |.macro .ffunc_1, name |->ff_ .. name: - | cmp NARGS:RDd, 1+1; jb ->fff_fallback + | endbr; cmp NARGS:RDd, 1+1; jb ->fff_fallback |.endmacro | |.macro .ffunc_2, name |->ff_ .. name: - | cmp NARGS:RDd, 2+1; jb ->fff_fallback + | endbr; cmp NARGS:RDd, 2+1; jb ->fff_fallback |.endmacro | |.macro .ffunc_n, name, op @@ -2207,6 +2226,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_record: // Dispatch target for recording phase. |.if JIT + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_VMEVENT // No recording while in vmevent. | jnz >5 @@ -2220,12 +2240,14 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_rethook: // Dispatch target for return hooks. + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_ACTIVE // Hook already active? | jnz >5 | jmp >1 | |->vm_inshook: // Dispatch target for instr/line hooks. + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_ACTIVE // Hook already active? | jnz >5 @@ -2253,6 +2275,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. | |->cont_hook: // Continue from hook yield. + | endbr | add PC, 4 | mov RA, [RB-40] | mov MULTRES, RAd // Restore MULTRES for *M ins. @@ -2277,6 +2300,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_callhook: // Dispatch target for call hooks. + | endbr | mov SAVE_PC, PC |.if JIT | jmp >1 @@ -2313,6 +2337,7 @@ static void build_subroutines(BuildCtx *ctx) |->cont_stitch: // Trace stitching. |.if JIT | // BASE = base, RC = result, RB = mbase + | endbr | mov TRACE:ITYPE, [RB-40] // Save previous trace. | cleartp TRACE:ITYPE | mov TMPRd, MULTRES @@ -2364,6 +2389,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE + | endbr | mov L:RB, SAVE_L | mov L:RB->base, BASE | mov CARG2, PC // Caveat: CARG2 == BASE @@ -2383,6 +2409,7 @@ static void build_subroutines(BuildCtx *ctx) |// The 16 bit exit number is stored with two (sign-extended) push imm8. |->vm_exit_handler: |.if JIT + | endbr | push r13; push r12 | push r11; push r10; push r9; push r8 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp @@ -2432,6 +2459,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif |->vm_exit_interp: | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. + | endbr |.if JIT | // Restore additional callee-save registers only used in compiled code. |.if X64WIN @@ -2524,6 +2552,7 @@ static void build_subroutines(BuildCtx *ctx) |.macro vm_round, name, mode, cond |->name: |->name .. _sse: + | endbr | sseconst_abs xmm2, RD | sseconst_2p52 xmm3, RD | movaps xmm1, xmm0 @@ -2634,6 +2663,7 @@ static void build_subroutines(BuildCtx *ctx) |// Next idx returned in edx. |->vm_next: |.if JIT + | endbr | mov NEXT_ASIZE, NEXT_TAB->asize |1: // Traverse array part. | cmp NEXT_IDX, NEXT_ASIZE; jae >5 @@ -2817,6 +2847,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |=>defop: switch (op) { +#if !LJ_HASJIT + case BC_FORL: + case BC_JFORI: + case BC_JFORL: + case BC_ITERL: + case BC_JITERL: + case BC_LOOP: + case BC_JLOOP: + case BC_FUNCF: + case BC_JFUNCF: + case BC_JFUNCV: +#endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + break; /* Avoid redundant endbr instructions. */ + default: + | endbr + break; + } + + switch (op) { /* -- Comparison ops ---------------------------------------------------- */ @@ -4090,6 +4140,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | hotloop RBd |.endif |->vm_IITERN: + |.if JIT + | endbr + |.endif | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | mov TAB:RB, [BASE+RA*8-16] | cleartp TAB:RB @@ -4847,6 +4900,30 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.align 8\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif +#endif +#if LJ_TARGET_LINUX && (LJ_ABI_BRANCH_TRACK || LJ_ABI_SHADOW_STACK) + fprintf(ctx->fp, + "\t.section .note.gnu.property,\"a\"\n" + "\t.align 8\n" + "\t.long 4\n" + "\t.long 16\n" + "\t.long 5\n" + "\t.long 0x00554e47\n" + "\t.long 0xc0000002\n" + "\t.long 4\n" + "\t.long %d\n" + "\t.long 0\n", +#if LJ_ABI_BRANCH_TRACK + 1| +#else + 0| +#endif +#if LJ_ABI_SHADOW_STACK + 2 +#else + 0 +#endif + ); #endif break; #if !LJ_NO_UNWIND