From e3c70a7d813ec7e3226510acedd64fc96021d4b0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Mar 2025 00:05:08 +0100 Subject: [PATCH 01/32] macOS: Fix support for Apple hardened runtime. Reported by Christian Clason. #1334 --- src/lj_mcode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 43694226a6..d8fa165e1d 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -138,7 +138,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz) static int mcode_setprot(void *p, size_t sz, int prot) { #if MCMAP_CREATE - pthread_jit_write_protect_np((prot & PROC_EXEC)); + pthread_jit_write_protect_np((prot & PROT_EXEC)); return 0; #else return mprotect(p, sz, prot); From 4f2bb199fe7138247e0b075c886c9e9197cf0271 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Mar 2025 02:53:20 +0100 Subject: [PATCH 02/32] macOS: Fix Apple hardened runtime support and put behind build option. Reported by vanc. #1334 --- src/lj_ccallback.c | 2 +- src/lj_mcode.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index d93dbc6457..7f08f0a848 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -263,7 +263,7 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) #endif /* Check for macOS hardened runtime. */ -#if LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 +#if defined(LUAJIT_ENABLE_OSX_HRT) && LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 #include #define CCMAP_CREATE MAP_JIT #else diff --git a/src/lj_mcode.c b/src/lj_mcode.c index d8fa165e1d..2b8ac2df58 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -99,7 +99,7 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #endif /* Check for macOS hardened runtime. */ -#if LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 +#if defined(LUAJIT_ENABLE_OSX_HRT) && LUAJIT_SECURITY_MCODE != 0 && defined(MAP_JIT) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 110000 #include #define MCMAP_CREATE MAP_JIT #else @@ -111,6 +111,8 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) #ifdef PROT_MPROTECT #define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX)) +#elif MCMAP_CREATE +#define MCPROT_CREATE PROT_EXEC #else #define MCPROT_CREATE 0 #endif From 84cb21ffaf648b472ff3884556e2c413e8abe179 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Mar 2025 02:56:07 +0100 Subject: [PATCH 03/32] REVERT: Change handling of nil value markers in template tables. --- src/lj_bcread.c | 10 ++++------ src/lj_bcwrite.c | 8 +++----- src/lj_opt_fold.c | 6 ++---- src/lj_opt_mem.c | 4 +--- src/lj_parse.c | 20 +++++++++++++++----- src/lj_tab.c | 1 - 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 37e909b391..ee7d7c1870 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -179,7 +179,7 @@ static const void *bcread_varinfo(GCproto *pt) } /* Read a single constant key/value of a template table. */ -static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t) +static void bcread_ktabk(LexState *ls, TValue *o) { MSize tp = bcread_uleb128(ls); if (tp >= BCDUMP_KTAB_STR) { @@ -191,8 +191,6 @@ static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t) } else if (tp == BCDUMP_KTAB_NUM) { o->u32.lo = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls); - } else if (tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */ - settabV(ls->L, o, t); } else { lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); setpriV(o, ~tp); @@ -209,15 +207,15 @@ static GCtab *bcread_ktab(LexState *ls) MSize i; TValue *o = tvref(t->array); for (i = 0; i < narray; i++, o++) - bcread_ktabk(ls, o, t); + bcread_ktabk(ls, o); } if (nhash) { /* Read hash entries. */ MSize i; for (i = 0; i < nhash; i++) { TValue key; - bcread_ktabk(ls, &key, t); + bcread_ktabk(ls, &key); lj_assertLS(!tvisnil(&key), "nil key"); - bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t); + bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); } } return t; diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index ec6f13c8d5..de200ef4ad 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -71,8 +71,6 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) *p++ = BCDUMP_KTAB_NUM; p = lj_strfmt_wuleb128(p, o->u32.lo); p = lj_strfmt_wuleb128(p, o->u32.hi); - } else if (tvistab(o)) { /* Write the nil value marker as a nil. */ - *p++ = BCDUMP_KTAB_NIL; } else { lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); *p++ = BCDUMP_KTAB_NIL+~itype(o); @@ -135,7 +133,7 @@ static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash) TValue **heap = ctx->heap; MSize i = nhash; for (;; node--) { /* Build heap. */ - if (!tvisnil(&node->val)) { + if (!tvisnil(&node->key)) { bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key); if (i == 0) break; } @@ -165,7 +163,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) MSize i, hmask = t->hmask; Node *node = noderef(t->node); for (i = 0; i <= hmask; i++) - nhash += !tvisnil(&node[i].val); + nhash += !tvisnil(&node[i].key); } /* Write number of array slots and hash slots. */ p = lj_strfmt_wuleb128(p, narray); @@ -186,7 +184,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) } else { MSize i = nhash; for (;; node--) - if (!tvisnil(&node->val)) { + if (!tvisnil(&node->key)) { bcwrite_ktabk(ctx, &node->key, 0); bcwrite_ktabk(ctx, &node->val, 1); if (--i == 0) break; diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 6fdf45663f..36aacebb03 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2217,11 +2217,9 @@ LJFOLD(HREF TDUP KNUM) LJFOLDF(fwd_href_tdup) { TValue keyv; - cTValue *val; lj_ir_kvalue(J->L, &keyv, fright); - val = lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv); - /* Check for either nil or the nil value marker in the template table. */ - if ((tvisnil(val) || tvistab(val)) && lj_opt_fwd_href_nokey(J)) + if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) && + lj_opt_fwd_href_nokey(J)) return lj_ir_kkptr(J, niltvg(J2G(J))); return NEXTFOLD; } diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 6f956b37e9..8cacfcfef9 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -233,9 +233,7 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) return lj_ir_knum_u64(J, tv->u64); else if (tvisint(tv)) return lj_ir_kint(J, intV(tv)); - else if (tvistab(tv)) /* Template table nil value marker. */ - return TREF_NIL; - else if (tvisstr(tv)) + else if (tvisgcv(tv)) return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ diff --git a/src/lj_parse.c b/src/lj_parse.c index f41163804a..7009759808 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1725,7 +1725,7 @@ static void expr_table(LexState *ls, ExpDesc *e) FuncState *fs = ls->fs; BCLine line = ls->linenumber; GCtab *t = NULL; - int vcall = 0, needarr = 0; + int vcall = 0, needarr = 0, fixt = 0; uint32_t narr = 1; /* First array index. */ uint32_t nhash = 0; /* Number of hash entries. */ BCReg freg = fs->freereg; @@ -1769,10 +1769,9 @@ static void expr_table(LexState *ls, ExpDesc *e) lj_gc_anybarriert(fs->L, t); if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ expr_kvalue(fs, v, &val); - /* Mark nil value with table value itself to preserve the key. */ - if (key.k == VKSTR && tvisnil(v)) settabV(fs->L, v, t); - } else { /* Preserve the key for the following non-const store. */ - settabV(fs->L, v, t); + } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ + settabV(fs->L, v, t); /* Preserve key with table itself as value. */ + fixt = 1; /* Fix this later, after all resizes. */ goto nonconst; } } else { @@ -1814,6 +1813,17 @@ static void expr_table(LexState *ls, ExpDesc *e) } else { if (needarr && t->asize < narr) lj_tab_reasize(fs->L, t, narr-1); + if (fixt) { /* Fix value for dummy keys in template table. */ + Node *node = noderef(t->node); + uint32_t i, hmask = t->hmask; + for (i = 0; i <= hmask; i++) { + Node *n = &node[i]; + if (tvistab(&n->val)) { + lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); + setnilV(&n->val); /* Turn value into nil. */ + } + } + } lj_gc_check(fs->L); } } diff --git a/src/lj_tab.c b/src/lj_tab.c index 62e336111a..2d08055206 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -194,7 +194,6 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) Node *next = nextnode(kn); /* Don't use copyTV here, since it asserts on a copy of a dead key. */ n->val = kn->val; n->key = kn->key; - if (tvistab(&n->val)) setnilV(&n->val); /* Replace nil value marker. */ setmref(n->next, next == NULL? next : (Node *)((char *)next + d)); } } From 538a82133ad6fddfd0ca64de167c4aca3bc1a2da Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 11 Mar 2025 23:04:30 +0100 Subject: [PATCH 04/32] Change handling of nil value markers in template tables. Reported by Bernhard M. Wiedemann. #1348 #1155 Fixes from Peter Cawley, Christian Clason, Lewis Russell. --- src/lj_bcread.c | 10 ++++++---- src/lj_bcwrite.c | 8 +++++--- src/lj_opt_fold.c | 6 ++++-- src/lj_opt_mem.c | 4 +++- src/lj_parse.c | 20 +++++--------------- src/lj_tab.c | 1 + 6 files changed, 24 insertions(+), 25 deletions(-) diff --git a/src/lj_bcread.c b/src/lj_bcread.c index ee7d7c1870..5570952208 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -179,7 +179,7 @@ static const void *bcread_varinfo(GCproto *pt) } /* Read a single constant key/value of a template table. */ -static void bcread_ktabk(LexState *ls, TValue *o) +static void bcread_ktabk(LexState *ls, TValue *o, GCtab *t) { MSize tp = bcread_uleb128(ls); if (tp >= BCDUMP_KTAB_STR) { @@ -191,6 +191,8 @@ static void bcread_ktabk(LexState *ls, TValue *o) } else if (tp == BCDUMP_KTAB_NUM) { o->u32.lo = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls); + } else if (t && tp == BCDUMP_KTAB_NIL) { /* Restore nil value marker. */ + settabV(ls->L, o, t); } else { lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); setpriV(o, ~tp); @@ -207,15 +209,15 @@ static GCtab *bcread_ktab(LexState *ls) MSize i; TValue *o = tvref(t->array); for (i = 0; i < narray; i++, o++) - bcread_ktabk(ls, o); + bcread_ktabk(ls, o, NULL); } if (nhash) { /* Read hash entries. */ MSize i; for (i = 0; i < nhash; i++) { TValue key; - bcread_ktabk(ls, &key); + bcread_ktabk(ls, &key, NULL); lj_assertLS(!tvisnil(&key), "nil key"); - bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); + bcread_ktabk(ls, lj_tab_set(ls->L, t, &key), t); } } return t; diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index de200ef4ad..ec6f13c8d5 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -71,6 +71,8 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) *p++ = BCDUMP_KTAB_NUM; p = lj_strfmt_wuleb128(p, o->u32.lo); p = lj_strfmt_wuleb128(p, o->u32.hi); + } else if (tvistab(o)) { /* Write the nil value marker as a nil. */ + *p++ = BCDUMP_KTAB_NIL; } else { lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); *p++ = BCDUMP_KTAB_NIL+~itype(o); @@ -133,7 +135,7 @@ static void bcwrite_ktab_sorted_hash(BCWriteCtx *ctx, Node *node, MSize nhash) TValue **heap = ctx->heap; MSize i = nhash; for (;; node--) { /* Build heap. */ - if (!tvisnil(&node->key)) { + if (!tvisnil(&node->val)) { bcwrite_ktabk_heap_insert(heap, --i, nhash, &node->key); if (i == 0) break; } @@ -163,7 +165,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) MSize i, hmask = t->hmask; Node *node = noderef(t->node); for (i = 0; i <= hmask; i++) - nhash += !tvisnil(&node[i].key); + nhash += !tvisnil(&node[i].val); } /* Write number of array slots and hash slots. */ p = lj_strfmt_wuleb128(p, narray); @@ -184,7 +186,7 @@ static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) } else { MSize i = nhash; for (;; node--) - if (!tvisnil(&node->key)) { + if (!tvisnil(&node->val)) { bcwrite_ktabk(ctx, &node->key, 0); bcwrite_ktabk(ctx, &node->val, 1); if (--i == 0) break; diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index 36aacebb03..6fdf45663f 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -2217,9 +2217,11 @@ LJFOLD(HREF TDUP KNUM) LJFOLDF(fwd_href_tdup) { TValue keyv; + cTValue *val; lj_ir_kvalue(J->L, &keyv, fright); - if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) && - lj_opt_fwd_href_nokey(J)) + val = lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv); + /* Check for either nil or the nil value marker in the template table. */ + if ((tvisnil(val) || tvistab(val)) && lj_opt_fwd_href_nokey(J)) return lj_ir_kkptr(J, niltvg(J2G(J))); return NEXTFOLD; } diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index 8cacfcfef9..6f956b37e9 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -233,7 +233,9 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) return lj_ir_knum_u64(J, tv->u64); else if (tvisint(tv)) return lj_ir_kint(J, intV(tv)); - else if (tvisgcv(tv)) + else if (tvistab(tv)) /* Template table nil value marker. */ + return TREF_NIL; + else if (tvisstr(tv)) return lj_ir_kstr(J, strV(tv)); } /* Othwerwise: don't intern as a constant. */ diff --git a/src/lj_parse.c b/src/lj_parse.c index 7009759808..f41163804a 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1725,7 +1725,7 @@ static void expr_table(LexState *ls, ExpDesc *e) FuncState *fs = ls->fs; BCLine line = ls->linenumber; GCtab *t = NULL; - int vcall = 0, needarr = 0, fixt = 0; + int vcall = 0, needarr = 0; uint32_t narr = 1; /* First array index. */ uint32_t nhash = 0; /* Number of hash entries. */ BCReg freg = fs->freereg; @@ -1769,9 +1769,10 @@ static void expr_table(LexState *ls, ExpDesc *e) lj_gc_anybarriert(fs->L, t); if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ expr_kvalue(fs, v, &val); - } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ - settabV(fs->L, v, t); /* Preserve key with table itself as value. */ - fixt = 1; /* Fix this later, after all resizes. */ + /* Mark nil value with table value itself to preserve the key. */ + if (key.k == VKSTR && tvisnil(v)) settabV(fs->L, v, t); + } else { /* Preserve the key for the following non-const store. */ + settabV(fs->L, v, t); goto nonconst; } } else { @@ -1813,17 +1814,6 @@ static void expr_table(LexState *ls, ExpDesc *e) } else { if (needarr && t->asize < narr) lj_tab_reasize(fs->L, t, narr-1); - if (fixt) { /* Fix value for dummy keys in template table. */ - Node *node = noderef(t->node); - uint32_t i, hmask = t->hmask; - for (i = 0; i <= hmask; i++) { - Node *n = &node[i]; - if (tvistab(&n->val)) { - lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); - setnilV(&n->val); /* Turn value into nil. */ - } - } - } lj_gc_check(fs->L); } } diff --git a/src/lj_tab.c b/src/lj_tab.c index 2d08055206..62e336111a 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -194,6 +194,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) Node *next = nextnode(kn); /* Don't use copyTV here, since it asserts on a copy of a dead key. */ n->val = kn->val; n->key = kn->key; + if (tvistab(&n->val)) setnilV(&n->val); /* Replace nil value marker. */ setmref(n->next, next == NULL? next : (Node *)((char *)next + d)); } } From e9e4b6d302b5e7e4a04a3c7f78cb561a2c156a37 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 7 Apr 2025 09:22:07 +0200 Subject: [PATCH 05/32] Initialize unused value when specializing to cdata metatable. Reported by jakitliang. #1354 --- src/lj_record.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/lj_record.c b/src/lj_record.c index 20a7ea36e1..d336f642e4 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -854,7 +854,10 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) return 0; /* No metamethod. */ } /* The cdata metatable is treated as immutable. */ - if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; + if (LJ_HASFFI && tref_iscdata(ix->tab)) { + mix.tab = TREF_NIL; + goto immutable_mt; + } ix->mt = mix.tab = lj_ir_ktab(J, mt); goto nocheck; } From e76bb50d44702f601ec5dd167b03b475ed53860c Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 7 Apr 2025 10:27:40 +0200 Subject: [PATCH 06/32] Fix error generation in load*. Reported by Sergey Kaplun. #1353 --- src/lj_load.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_load.c b/src/lj_load.c index 90a61027ad..6c8ae9f154 100644 --- a/src/lj_load.c +++ b/src/lj_load.c @@ -108,8 +108,9 @@ LUALIB_API int luaL_loadfilex(lua_State *L, const char *filename, copyTV(L, L->top-1, L->top); } if (err) { + const char *fname = filename ? filename : "stdin"; L->top--; - lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(err)); + lua_pushfstring(L, "cannot read %s: %s", fname, strerror(err)); return LUA_ERRFILE; } return status; From c262976486e1e007b56380b6a36bfbea5f51d470 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 10 Apr 2025 22:06:47 +0200 Subject: [PATCH 07/32] ARM64: Fix pass-by-value struct calling conventions. Reported by AnthonyK213. #1357 --- src/lj_ccall.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index ae69cd28d1..f003d75674 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -781,17 +781,24 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) { CTSize sz = ct->size; unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); - while (ct->sib) { + while (ct->sib && n <= 4) { + unsigned int m = 1; CType *sct; ct = ctype_get(cts, ct->sib); if (ctype_isfield(ct->info)) { sct = ctype_rawchild(cts, ct); + if (ctype_isarray(sct->info)) { + CType *cct = ctype_rawchild(cts, sct); + if (!cct->size) continue; + m = sct->size / cct->size; + sct = cct; + } if (ctype_isfp(sct->info)) { r |= sct->size; - if (!isu) n++; else if (n == 0) n = 1; + if (!isu) n += m; else if (n < m) n = m; } else if (ctype_iscomplex(sct->info)) { r |= (sct->size >> 1); - if (!isu) n += 2; else if (n < 2) n = 2; + if (!isu) n += 2*m; else if (n < 2*m) n = 2*m; } else if (ctype_isstruct(sct->info)) { goto substruct; } else { @@ -803,10 +810,11 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) sct = ctype_rawchild(cts, ct); substruct: if (sct->size > 0) { - unsigned int s = ccall_classify_struct(cts, sct); + unsigned int s = ccall_classify_struct(cts, sct), sn; if (s <= 1) goto noth; r |= (s & 255); - if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + sn = (s >> 8) * m; + if (!isu) n += sn; else if (n < sn) n = sn; } } } From 51d4c26ec7805d77bfc3470fdf99b73c4ef2faec Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 10 Apr 2025 22:45:38 +0200 Subject: [PATCH 08/32] ARM: Fix soft-float math.min()/math.max(). Reported by Dong Jianqiang. #1356 --- src/lj_asm_arm.h | 2 +- src/vm_arm.dasc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index de435057e1..24deaeae27 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -1927,7 +1927,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { as->curins--; /* Always skip the loword min/max. */ if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); + asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HS : CC_LS); return; #elif LJ_HASFFI } else if ((ir-1)->o == IR_CONV) { diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index ca08fc117e..86bef0cfbc 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -1717,8 +1717,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif |.endmacro | - | math_minmax math_min, gt, pl - | math_minmax math_max, lt, le + | math_minmax math_min, gt, hs + | math_minmax math_max, lt, ls | |//-- String library ----------------------------------------------------- | From eec7a8016c3381b949b5d84583800d05897fa960 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 10 Apr 2025 22:53:50 +0200 Subject: [PATCH 09/32] Prevent Clang UB 'optimization' which breaks integerness checks. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thanks to Kacper Michajłow. #1351 #1355 --- src/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Makefile b/src/Makefile index 4a56d1e8e5..c83abfa0b6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -302,6 +302,9 @@ endif ifneq (,$(INSTALL_LJLIBD)) TARGET_XCFLAGS+= -DLUA_LJDIR=\"$(INSTALL_LJLIBD)\" endif +ifeq (,$(shell $(TARGET_CC) -o /dev/null -c -x c /dev/null -fno-strict-float-cast-overflow 2>/dev/null || echo 1)) + TARGET_XCFLAGS+= -fno-strict-float-cast-overflow +endif ############################################################################## # Target system detection. From 9c8eb7cfe10ef5939d9b358a0bd805a610818ba5 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 28 May 2025 20:36:24 +0200 Subject: [PATCH 10/32] FFI: Fix dangling CType references. Reported by Sergey Kaplun. Collateral of #1360 --- src/lj_ccall.c | 19 ++++++++++++------- src/lj_crecord.c | 21 +++++++++++++-------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 9c99bec7fa..5d6bb03d50 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -623,7 +623,9 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) /* -- Common C call handling ---------------------------------------------- */ -/* Infer the destination CTypeID for a vararg argument. */ +/* Infer the destination CTypeID for a vararg argument. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o) { if (tvisnumber(o)) { @@ -651,13 +653,16 @@ CTypeID lj_ccall_ctid_vararg(CTState *cts, cTValue *o) } } -/* Setup arguments for C call. */ +/* Setup arguments for C call. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, CCallState *cc) { int gcsteps = 0; TValue *o, *top = L->top; CTypeID fid; + CTInfo info = ct->info; /* lj_ccall_ctid_vararg may invalidate ct pointer. */ CType *ctr; MSize maxgpr, ngpr = 0, nsp = 0, narg; #if CCALL_NARG_FPR @@ -676,7 +681,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, #if LJ_TARGET_X86 /* x86 has several different calling conventions. */ cc->resx87 = 0; - switch (ctype_cconv(ct->info)) { + switch (ctype_cconv(info)) { case CTCC_FASTCALL: maxgpr = 2; break; case CTCC_THISCALL: maxgpr = 1; break; default: maxgpr = 0; break; @@ -693,7 +698,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, } else if (ctype_iscomplex(ctr->info) || ctype_isstruct(ctr->info)) { /* Preallocate cdata object and anchor it after arguments. */ CTSize sz = ctr->size; - GCcdata *cd = lj_cdata_new(cts, ctype_cid(ct->info), sz); + GCcdata *cd = lj_cdata_new(cts, ctype_cid(info), sz); void *dp = cdataptr(cd); setcdataV(L, L->top++, cd); if (ctype_isstruct(ctr->info)) { @@ -729,7 +734,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, lua_assert(ctype_isfield(ctf->info)); did = ctype_cid(ctf->info); } else { - if (!(ct->info & CTF_VARARG)) + if (!(info & CTF_VARARG)) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too many arguments. */ did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ isva = 1; @@ -869,11 +874,11 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) ct = ctype_rawchild(cts, ct); } if (ctype_isfunc(ct->info)) { + CTypeID id = ctype_typeid(cts, ct); CCallState cc; int gcsteps, ret; cc.func = (void (*)(void))cdata_getptr(cdataptr(cd), sz); gcsteps = ccall_set_args(L, cts, ct, &cc); - ct = (CType *)((intptr_t)ct-(intptr_t)cts->tab); cts->cb.slot = ~0u; lj_vm_ffi_call(&cc); if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ @@ -881,7 +886,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) setlightudV(&tv, (void *)cc.func); setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); } - ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ + ct = ctype_get(cts, id); /* Table may have been reallocated. */ gcsteps += ccall_get_results(L, cts, ct, &cc, &ret); #if LJ_TARGET_X86 && LJ_ABI_WIN /* Automatically detect __stdcall and fix up C function declaration. */ diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 216144f32d..f686b35f21 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -988,12 +988,15 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) } } -/* Record argument conversions. */ +/* Record argument conversions. +** Note: may reallocate cts->tab and invalidate CType pointers. +*/ static TRef crec_call_args(jit_State *J, RecordFFData *rd, CTState *cts, CType *ct) { TRef args[CCI_NARGS_MAX]; CTypeID fid; + CTInfo info = ct->info; /* lj_ccall_ctid_vararg may invalidate ct pointer. */ MSize i, n; TRef tr, *base; cTValue *o; @@ -1002,9 +1005,9 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, TRef *arg0 = NULL, *arg1 = NULL; #endif int ngpr = 0; - if (ctype_cconv(ct->info) == CTCC_THISCALL) + if (ctype_cconv(info) == CTCC_THISCALL) ngpr = 1; - else if (ctype_cconv(ct->info) == CTCC_FASTCALL) + else if (ctype_cconv(info) == CTCC_FASTCALL) ngpr = 2; #endif @@ -1029,7 +1032,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, lua_assert(ctype_isfield(ctf->info)); did = ctype_cid(ctf->info); } else { - if (!(ct->info & CTF_VARARG)) + if (!(info & CTF_VARARG)) lj_trace_err(J, LJ_TRERR_NYICALL); /* Too many arguments. */ did = lj_ccall_ctid_vararg(cts, o); /* Infer vararg type. */ } @@ -1112,12 +1115,14 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) { CTState *cts = ctype_ctsG(J2G(J)); CType *ct = ctype_raw(cts, cd->ctypeid); + CTInfo info; IRType tp = IRT_PTR; if (ctype_isptr(ct->info)) { tp = (LJ_64 && ct->size == 8) ? IRT_P64 : IRT_P32; ct = ctype_rawchild(cts, ct); } - if (ctype_isfunc(ct->info)) { + info = ct->info; /* crec_call_args may invalidate ct pointer. */ + if (ctype_isfunc(info)) { TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR); CType *ctr = ctype_rawchild(cts, ct); IRType t = crec_ct2irt(cts, ctr); @@ -1135,9 +1140,9 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) ctype_isenum(ctr->info)) || t == IRT_CDATA) { lj_trace_err(J, LJ_TRERR_NYICALL); } - if ((ct->info & CTF_VARARG) + if ((info & CTF_VARARG) #if LJ_TARGET_X86 - || ctype_cconv(ct->info) != CTCC_CDECL + || ctype_cconv(info) != CTCC_CDECL #endif ) func = emitir(IRT(IR_CARG, IRT_NIL), func, @@ -1160,7 +1165,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) } } else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) || t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr->info)) { - TRef trid = lj_ir_kint(J, ctype_cid(ct->info)); + TRef trid = lj_ir_kint(J, ctype_cid(info)); tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr); if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); } else if (t == IRT_FLOAT || t == IRT_U32) { From cd4af8ad80bb6430ad2e547f7af236268c9be7d9 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 28 May 2025 21:02:31 +0200 Subject: [PATCH 11/32] Avoid out-of-range PC for stack overflow error from snapshot restore. Reported by Sergey Kaplun. #1359 --- src/lj_bc.h | 5 +++++ src/lj_parse.c | 14 +------------- src/lj_snap.c | 6 ++++-- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/src/lj_bc.h b/src/lj_bc.h index 3f0563e4b9..0c7249b39f 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h @@ -255,6 +255,11 @@ static LJ_AINLINE int bc_isret(BCOp op) return (op == BC_RETM || op == BC_RET || op == BC_RET0 || op == BC_RET1); } +static LJ_AINLINE int bc_isret_or_tail(BCOp op) +{ + return (op == BC_CALLMT || op == BC_CALLT || bc_isret(op)); +} + LJ_DATA const uint16_t lj_bc_mode[]; LJ_DATA const uint16_t lj_bc_ofs[]; diff --git a/src/lj_parse.c b/src/lj_parse.c index ffd11b3bd9..3370296f0f 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1529,23 +1529,11 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) #endif -/* Check if bytecode op returns. */ -static int bcopisret(BCOp op) -{ - switch (op) { - case BC_CALLMT: case BC_CALLT: - case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1: - return 1; - default: - return 0; - } -} - /* Fixup return instruction for prototype. */ static void fs_fixup_ret(FuncState *fs) { BCPos lastpc = fs->pc; - if (lastpc <= fs->lasttarget || !bcopisret(bc_op(fs->bcbase[lastpc-1].ins))) { + if (lastpc <= fs->lasttarget || !bc_isret_or_tail(bc_op(fs->bcbase[lastpc-1].ins))) { if ((fs->bl->flags & FSCOPE_UPVAL)) bcemit_AJ(fs, BC_UCLO, 0, 0); bcemit_AD(fs, BC_RET0, 0, 1); /* Need final return. */ diff --git a/src/lj_snap.c b/src/lj_snap.c index 82ab6983d3..5426002119 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -872,8 +872,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) const BCIns *pc = snap_pc(map[nent]); lua_State *L = J->L; - /* Set interpreter PC to the next PC to get correct error messages. */ - setcframe_pc(L->cframe, pc+1); + /* Set interpreter PC to the next PC to get correct error messages. + ** But not for returns or tail calls, since pc+1 may be out-of-range. + */ + setcframe_pc(L->cframe, bc_isret_or_tail(bc_op(*pc)) ? pc : pc+1); setcframe_pc(cframe_raw(cframe_prev(L->cframe)), pc); /* Make sure the stack is big enough for the slots from the snapshot. */ From 048972dbfdb6b441fe8a9bfe4d1f048966579ba8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 28 May 2025 21:13:17 +0200 Subject: [PATCH 12/32] Fix JIT slot overflow during up-recursion. Reported by Sergey Kaplun. #1358 --- src/lj_record.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_record.c b/src/lj_record.c index d336f642e4..1d535a2299 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -749,7 +749,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_LLEAVE); } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ lj_trace_err(J, LJ_TRERR_NYIRETL); /* No way to insert snapshot here. */ - } else if (1 + pt->framesize >= LJ_MAX_JSLOTS) { + } else if (1 + pt->framesize >= LJ_MAX_JSLOTS || + J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) { lj_trace_err(J, LJ_TRERR_STACKOV); } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); From c64020f3c6d124503213147f2fb47c20335a395b Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 24 Jul 2025 15:29:54 +0200 Subject: [PATCH 13/32] FFI: Fix dangling CType references (again). Reported by Sergey Kaplun. Collateral of #1360 --- src/lj_crecord.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/lj_crecord.c b/src/lj_crecord.c index f686b35f21..80e25ef8a1 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -1125,6 +1125,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) if (ctype_isfunc(info)) { TRef func = emitir(IRT(IR_FLOAD, tp), J->base[0], IRFL_CDATA_PTR); CType *ctr = ctype_rawchild(cts, ct); + CTInfo ctr_info = ctr->info; /* crec_call_args may invalidate ctr. */ IRType t = crec_ct2irt(cts, ctr); TRef tr; TValue tv; @@ -1133,11 +1134,11 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4)); if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) lj_trace_err(J, LJ_TRERR_BLACKL); - if (ctype_isvoid(ctr->info)) { + if (ctype_isvoid(ctr_info)) { t = IRT_NIL; rd->nres = 0; - } else if (!(ctype_isnum(ctr->info) || ctype_isptr(ctr->info) || - ctype_isenum(ctr->info)) || t == IRT_CDATA) { + } else if (!(ctype_isnum(ctr_info) || ctype_isptr(ctr_info) || + ctype_isenum(ctr_info)) || t == IRT_CDATA) { lj_trace_err(J, LJ_TRERR_NYICALL); } if ((info & CTF_VARARG) @@ -1148,7 +1149,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) func = emitir(IRT(IR_CARG, IRT_NIL), func, lj_ir_kint(J, ctype_typeid(cts, ct))); tr = emitir(IRT(IR_CALLXS, t), crec_call_args(J, rd, cts, ct), func); - if (ctype_isbool(ctr->info)) { + if (ctype_isbool(ctr_info)) { if (frame_islua(J->L->base-1) && bc_b(frame_pc(J->L->base-1)[-1]) == 1) { /* Don't check result if ignored. */ tr = TREF_NIL; @@ -1164,7 +1165,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) tr = TREF_TRUE; } } else if (t == IRT_PTR || (LJ_64 && t == IRT_P32) || - t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr->info)) { + t == IRT_I64 || t == IRT_U64 || ctype_isenum(ctr_info)) { TRef trid = lj_ir_kint(J, ctype_cid(info)); tr = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, tr); if (t == IRT_I64 || t == IRT_U64) lj_needsplit(J); From e3fa3c48d8a4aadcf86429e9f7f6f1171914b15a Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 24 Jul 2025 15:35:56 +0200 Subject: [PATCH 14/32] Avoid out-of-range PC for stack overflow error from snapshot restore. Reported by Sergey Kaplun. #1369 --- src/lj_debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lj_debug.c b/src/lj_debug.c index a639cddf8f..f340964917 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -101,6 +101,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) pt = funcproto(fn); pos = proto_bcpos(pt, ins) - 1; #if LJ_HASJIT + if (pos == NO_BCPOS) return 1; /* Pretend it's the first bytecode. */ if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ if (bc_isret(bc_op(ins[-1]))) { GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); From c92d0cb19263e7e302b4740ba6617a32c201c613 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 24 Jul 2025 15:38:45 +0200 Subject: [PATCH 15/32] x86/x64: Don't use undefined MUL/IMUL zero flag. Reported by VrIgHtEr. #1376 --- src/lj_asm_x86.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 239066d4a5..8b6ce47983 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -1841,7 +1841,8 @@ static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa) RegSet allow = RSET_GPR; Reg dest, right; int32_t k = 0; - if (as->flagmcp == as->mcp) { /* Drop test r,r instruction. */ + if (as->flagmcp == as->mcp && xa != XOg_X_IMUL) { + /* Drop test r,r instruction. */ MCode *p = as->mcp + ((LJ_64 && *as->mcp < XI_TESTb) ? 3 : 2); MCode *q = p[0] == 0x0f ? p+1 : p; if ((*q & 15) < 14) { From 871db2c84ecefd70a850e03a6c340214a81739f0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 24 Jul 2025 15:45:24 +0200 Subject: [PATCH 16/32] Windows: Add lua52compat option to msvcbuild.bat. Thanks to Gil Reis. #1366 --- src/msvcbuild.bat | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 69c0c61a9f..d6aed17009 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -5,11 +5,12 @@ @rem Then cd to this directory and run this script. Use the following @rem options (in order), if needed. The default is a dynamic release build. @rem -@rem nogc64 disable LJ_GC64 mode for x64 -@rem debug emit debug symbols -@rem amalg amalgamated build -@rem static create static lib to statically link into your project -@rem mixed create static lib to build a DLL in your project +@rem nogc64 disable LJ_GC64 mode for x64 +@rem debug emit debug symbols +@rem lua52compat enable extra Lua 5.2 extensions +@rem amalg amalgamated build +@rem static create static lib to statically link into your project +@rem mixed create static lib to build a DLL in your project @if not defined INCLUDE goto :FAIL @@ -101,6 +102,10 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @set LJDYNBUILD=%LJDYNBUILD_DEBUG% @set LJLINKTYPE=%LJLINKTYPE_DEBUG% :NODEBUG +@if "%1" neq "lua52compat" goto :NOLUA52COMPAT +@shift +@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_LUA52COMPAT +:NOLUA52COMPAT @set LJCOMPILE=%LJCOMPILE% %LJCOMPILETARGET% @set LJLINK=%LJLINK% %LJLINKTYPE% %LJLINKTARGET% @if "%1"=="amalg" goto :AMALGDLL From 54a162688ed25902122077149df9b456bc5a763e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 13:11:02 +0200 Subject: [PATCH 17/32] Fix reporting of an error during error handling. Reported by Sergey Kaplun. #1381 --- src/lj_err.c | 10 ++++++++++ src/lj_state.c | 1 + 2 files changed, 11 insertions(+) diff --git a/src/lj_err.c b/src/lj_err.c index 03b5030be6..e8e1875805 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -803,9 +803,17 @@ LJ_NOINLINE GCstr *lj_err_str(lua_State *L, ErrMsg em) return lj_str_newz(L, err2msg(em)); } +LJ_NORET LJ_NOINLINE static void lj_err_err(lua_State *L) +{ + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRERR)); + lj_err_throw(L, LUA_ERRERR); +} + /* Out-of-memory error. */ LJ_NOINLINE void lj_err_mem(lua_State *L) { + if (L->status == LUA_ERRERR) + lj_err_err(L); if (L->status == LUA_ERRERR+1) /* Don't touch the stack during lua_open. */ lj_vm_unwind_c(L->cframe, LUA_ERRMEM); if (LJ_HASJIT) { @@ -902,6 +910,8 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) /* Stack overflow error. */ void LJ_FASTCALL lj_err_stkov(lua_State *L) { + if (L->status == LUA_ERRERR) + lj_err_err(L); lj_debug_addloc(L, err2msg(LJ_ERR_STKOV), L->base-1, NULL); lj_err_run(L); } diff --git a/src/lj_state.c b/src/lj_state.c index d8fc545a0d..3cad8cc184 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -195,6 +195,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) lj_meta_init(L); lj_lex_init(L); fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ + fixstring(lj_err_str(L, LJ_ERR_ERRERR)); /* Preallocate err in err msg. */ g->gc.threshold = 4*g->gc.total; #if LJ_HASFFI lj_ctype_initfin(L); From a69aef43fe1838da26c193d188580229b2387583 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 13:13:51 +0200 Subject: [PATCH 18/32] Fix io.write() of newly created buffer. Reported by vfprintf. #1386 --- src/lj_strfmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c index 04aebaa472..bb649fc840 100644 --- a/src/lj_strfmt.c +++ b/src/lj_strfmt.c @@ -170,7 +170,7 @@ const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) } else if (tvisbuf(o)) { SBufExt *sbx = bufV(o); *lenp = sbufxlen(sbx); - return sbx->r; + return sbx->r ? sbx->r : ""; } else if (tvisint(o)) { sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); } else if (tvisnum(o)) { From a21ba1c9b5218ef83eb8bc6d374764da84f77ffd Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 13:17:45 +0200 Subject: [PATCH 19/32] Add GNU/Hurd build support. Note: this is not an officially supported target. Contributed by Pino Toscano and Samuel Thibault. #1383 #1384 --- src/Makefile | 3 +++ src/lj_arch.h | 3 +++ src/lj_prng.c | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index c83abfa0b6..5dd98a31f6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -357,6 +357,9 @@ else ifeq (GNU/kFreeBSD,$(TARGET_SYS)) TARGET_XLIBS+= -ldl endif + ifeq (GNU,$(TARGET_SYS)) + TARGET_XLIBS+= -ldl + endif endif endif endif diff --git a/src/lj_arch.h b/src/lj_arch.h index a4eecf27e0..865bfa2322 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -96,6 +96,9 @@ #elif defined(__QNX__) #define LJ_TARGET_QNX 1 #define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__GNU__) +#define LJ_TARGET_HURD 1 +#define LUAJIT_OS LUAJIT_OS_POSIX #else #define LUAJIT_OS LUAJIT_OS_OTHER #endif diff --git a/src/lj_prng.c b/src/lj_prng.c index 02146b273a..1bbb7eaba3 100644 --- a/src/lj_prng.c +++ b/src/lj_prng.c @@ -125,7 +125,7 @@ static PRGR libfunc_rgr; #if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 #define LJ_TARGET_HAS_GETENTROPY 1 #endif -#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX +#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX || LJ_TARGET_HURD #define LJ_TARGET_HAS_GETENTROPY 1 #endif From 5c3254d68d2579bf8c5bd1e39e612582fb5a04f6 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 13:23:51 +0200 Subject: [PATCH 20/32] Gracefully handle broken custom allocator. Reported by Alex Orlenko. #1393 --- src/lj_state.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/lj_state.c b/src/lj_state.c index 3cad8cc184..fb6d41a5f9 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -261,7 +261,11 @@ LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) } #endif GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); - if (GG == NULL || !checkptrGC(GG)) return NULL; + if (GG == NULL) return NULL; + if (!checkptrGC(GG)) { + allocf(allocd, GG, sizeof(GG_State), 0); + return NULL; + } memset(GG, 0, sizeof(GG_State)); L = &GG->L; g = &GG->g; From 25a61a182166fec06f1a1a025eb8fabbb6cf483e Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 16 Oct 2025 14:24:52 +0200 Subject: [PATCH 21/32] x64: Add support for CET IBT. Note: this is not enabled by default, look for CET in lj_arch.h Contributed by Yuichiro Naito. #1391 --- src/Makefile | 4 ++++ src/jit/dis_x86.lua | 20 +++++++++++++++- src/lj_arch.h | 11 +++++++++ src/lj_asm.c | 3 +++ src/lj_emit_x86.h | 7 ++++++ src/lj_target_x86.h | 3 +++ src/vm_x64.dasc | 57 ++++++++++++++++++++++++++++++++++++++------- 7 files changed, 95 insertions(+), 10 deletions(-) diff --git a/src/Makefile b/src/Makefile index 5dd98a31f6..d23e0db255 100644 --- a/src/Makefile +++ b/src/Makefile @@ -446,6 +446,10 @@ ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D PAUTH TARGET_ARCH+= -DLJ_ABI_PAUTH=1 endif +ifneq (,$(findstring LJ_CET_BR 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D CET_BR + TARGET_ARCH+= -DLJ_CET_BR=1 +endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index b1de0eeae1..6b04ee8495 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua @@ -122,7 +122,7 @@ local map_opc2 = { "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", "movhpsXmr||movhpdXmr", "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", -"hintnopVm","hintnopVm","hintnopVm","hintnopVm", +"hintnopVm","hintnopVm","endbr*hintnopVm","hintnopVm", --2x "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, "movapsXrm||movapdXrm", @@ -804,6 +804,24 @@ map_act = { return dispatch(ctx, map_opcvm[ctx.mrm]) end, + -- Special NOP for endbr64/endbr32. + endbr = function(ctx, name, pat) + if ctx.rep then + local pos = ctx.pos + local b = byte(ctx.code, pos) + local text + if b == 0xfa then text = "endbr64" + elseif b == 0xfb then text = "endbr64" + end + if text then + ctx.pos = pos + 1 + ctx.rep = nil + return putop(ctx, text) + end + end + return dispatch(ctx, pat) + end, + -- Floating point opcode dispatch. fp = function(ctx, name, pat) local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end diff --git a/src/lj_arch.h b/src/lj_arch.h index 865bfa2322..42c65879bd 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -219,6 +219,17 @@ #error "macOS requires GC64 -- don't disable it" #endif +#if (__CET__ & 1) && defined(LUAJIT_ENABLE_CET_BR) +/* +** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT). +** This is not enabled by default because it causes a notable slowdown of +** the interpreter on all x64 CPUs, whether they have CET enabled or not. +** If your toolchain enables -fcf-protection=branch by default, you need +** to build with: make XCFLAGS=-DLUAJIT_ENABLE_CET_BR +*/ +#define LJ_CET_BR 1 +#endif + #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM #define LJ_ARCH_NAME "arm" diff --git a/src/lj_asm.c b/src/lj_asm.c index fec4351251..e7f3ec1cd5 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2586,6 +2586,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) asm_head_side(as); else asm_head_root(as); +#if LJ_CET_BR + emit_endbr(as); +#endif asm_phi_fixup(as); if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index f477301162..848301bce1 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -70,6 +70,13 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, return p; } +#if LJ_CET_BR +static void emit_endbr(ASMState *as) +{ + emit_u32(as, XI_ENDBR64); +} +#endif + /* op + modrm */ #define emit_opm(xo, mode, rr, rb, p, delta) \ (p[(delta)-1] = MODRM((mode), (rr), (rb)), \ diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 6a528e8288..fa32a5d46f 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -242,6 +242,9 @@ typedef enum { XV_SHLX = XV_660f38(f7), XV_SHRX = XV_f20f38(f7), + /* Special NOP instructions. */ + XI_ENDBR64 = 0xfa1e0ff3, + /* Variable-length opcodes. XO_* prefix. */ XO_OR = XO_(0b), XO_MOV = XO_(8b), diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index f501495b11..52ef88af42 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -189,16 +189,24 @@ | |.endif | +|//-- Control-Flow Enforcement Technique (CET) --------------------------- +| +|.if CET_BR +|.macro endbr; endbr64; .endmacro +|.else +|.macro endbr; .endmacro +|.endif +| |//----------------------------------------------------------------------- | |// Instruction headers. -|.macro ins_A; .endmacro -|.macro ins_AD; .endmacro -|.macro ins_AJ; .endmacro -|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro -|.macro ins_AB_; movzx RBd, RCH; .endmacro -|.macro ins_A_C; movzx RCd, RCL; .endmacro -|.macro ins_AND; not RD; .endmacro +|.macro ins_A; endbr; .endmacro +|.macro ins_AD; endbr; .endmacro +|.macro ins_AJ; endbr; .endmacro +|.macro ins_ABC; endbr; movzx RBd, RCH; movzx RCd, RCL; .endmacro +|.macro ins_AB_; endbr; movzx RBd, RCH; .endmacro +|.macro ins_A_C; endbr; movzx RCd, RCL; .endmacro +|.macro ins_AND; endbr; not RD; .endmacro | |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). |.macro ins_NEXT @@ -479,20 +487,24 @@ static void build_subroutines(BuildCtx *ctx) | jmp <3 | |->vm_unwind_yield: + | endbr | mov al, LUA_YIELD | jmp ->vm_unwind_c_eh | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | endbr | // (void *cframe, int errcode) | mov eax, CARG2d // Error return status for vm_pcall. | mov rsp, CARG1 |->vm_unwind_c_eh: // Landing pad for external unwinder. + | endbr | mov L:RB, SAVE_L | mov GL:RB, L:RB->glref | mov dword GL:RB->vmstate, ~LJ_VMST_C | jmp ->vm_leave_unw | |->vm_unwind_rethrow: + | endbr |.if not X64WIN | mov CARG1, SAVE_L | mov CARG2d, eax @@ -501,10 +513,12 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | endbr | // (void *cframe) | and CARG1, CFRAME_RAWMASK | mov rsp, CARG1 |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | endbr | mov L:RB, SAVE_L | mov RDd, 1+1 // Really 1+2 results, incr. later. | mov BASE, L:RB->base @@ -675,6 +689,7 @@ static void build_subroutines(BuildCtx *ctx) |//-- Continuation dispatch ---------------------------------------------- | |->cont_dispatch: + | endbr | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) | add RA, BASE | and PC, -8 @@ -706,6 +721,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->cont_cat: // BASE = base, RC = result, RB = mbase + | endbr | movzx RAd, PC_RB | sub RB, 32 | lea RA, [BASE+RA*8] @@ -774,6 +790,7 @@ static void build_subroutines(BuildCtx *ctx) | test RC, RC | jz >3 |->cont_ra: // BASE = base, RC = result + | endbr | movzx RAd, PC_RA | mov RB, [RC] | mov [BASE+RA*8], RB @@ -851,6 +868,7 @@ static void build_subroutines(BuildCtx *ctx) | mov RB, [BASE+RA*8] | mov [RC], RB |->cont_nop: // BASE = base, (RC = result) + | endbr | ins_next | |3: // Call __newindex metamethod. @@ -921,6 +939,7 @@ static void build_subroutines(BuildCtx *ctx) | ins_next | |->cont_condt: // BASE = base, RC = result + | endbr | add PC, 4 | mov ITYPE, [RC] | sar ITYPE, 47 @@ -929,6 +948,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp <6 | |->cont_condf: // BASE = base, RC = result + | endbr | mov ITYPE, [RC] | sar ITYPE, 47 | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. @@ -1132,16 +1152,17 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: + | endbr |.endmacro | |.macro .ffunc_1, name |->ff_ .. name: - | cmp NARGS:RDd, 1+1; jb ->fff_fallback + | endbr; cmp NARGS:RDd, 1+1; jb ->fff_fallback |.endmacro | |.macro .ffunc_2, name |->ff_ .. name: - | cmp NARGS:RDd, 2+1; jb ->fff_fallback + | endbr; cmp NARGS:RDd, 2+1; jb ->fff_fallback |.endmacro | |.macro .ffunc_n, name, op @@ -2207,6 +2228,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_record: // Dispatch target for recording phase. |.if JIT + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_VMEVENT // No recording while in vmevent. | jnz >5 @@ -2220,12 +2242,14 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_rethook: // Dispatch target for return hooks. + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_ACTIVE // Hook already active? | jnz >5 | jmp >1 | |->vm_inshook: // Dispatch target for instr/line hooks. + | endbr | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] | test RDL, HOOK_ACTIVE // Hook already active? | jnz >5 @@ -2253,6 +2277,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. | |->cont_hook: // Continue from hook yield. + | endbr | add PC, 4 | mov RA, [RB-40] | mov MULTRES, RAd // Restore MULTRES for *M ins. @@ -2277,6 +2302,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_callhook: // Dispatch target for call hooks. + | endbr | mov SAVE_PC, PC |.if JIT | jmp >1 @@ -2312,6 +2338,7 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_stitch: // Trace stitching. |.if JIT + | endbr | // BASE = base, RC = result, RB = mbase | mov TRACE:ITYPE, [RB-40] // Save previous trace. | cleartp TRACE:ITYPE @@ -2364,6 +2391,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE + | endbr | mov L:RB, SAVE_L | mov L:RB->base, BASE | mov CARG2, PC // Caveat: CARG2 == BASE @@ -2383,6 +2411,7 @@ static void build_subroutines(BuildCtx *ctx) |// The 16 bit exit number is stored with two (sign-extended) push imm8. |->vm_exit_handler: |.if JIT + | endbr | push r13; push r12 | push r11; push r10; push r9; push r8 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp @@ -2431,6 +2460,7 @@ static void build_subroutines(BuildCtx *ctx) | jmp >1 |.endif |->vm_exit_interp: + | endbr | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. |.if JIT | // Restore additional callee-save registers only used in compiled code. @@ -2524,6 +2554,7 @@ static void build_subroutines(BuildCtx *ctx) |.macro vm_round, name, mode, cond |->name: |->name .. _sse: + | endbr | sseconst_abs xmm2, RD | sseconst_2p52 xmm3, RD | movaps xmm1, xmm0 @@ -2634,6 +2665,7 @@ static void build_subroutines(BuildCtx *ctx) |// Next idx returned in edx. |->vm_next: |.if JIT + | endbr | mov NEXT_ASIZE, NEXT_TAB->asize |1: // Traverse array part. | cmp NEXT_IDX, NEXT_ASIZE; jae >5 @@ -4087,6 +4119,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: |.if JIT + | endbr | hotloop RBd |.endif |->vm_IITERN: @@ -4266,6 +4299,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | jnz >7 // Not returning to a fixarg Lua func? switch (op) { case BC_RET: + | endbr |->BC_RET_Z: | mov KBASE, BASE // Use KBASE for result move. | sub RDd, 1 @@ -4284,10 +4318,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ja >6 break; case BC_RET1: + | endbr | mov RB, [BASE+RA] | mov [BASE-16], RB /* fallthrough */ case BC_RET0: + | endbr |5: | cmp PC_RB, RDL // More results expected? | ja >6 @@ -4334,6 +4370,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FORL: |.if JIT + | endbr | hotloop RBd |.endif | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. @@ -4485,6 +4522,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERL: |.if JIT + | endbr | hotloop RBd |.endif | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. @@ -4578,6 +4616,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FUNCF: |.if JIT + | endbr | hotcall RBd |.endif case BC_FUNCV: /* NYI: compiled vararg functions. */ From e34a78acf6b8656874b1c25a12a7cd1813d73af9 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 28 Oct 2025 00:27:15 +0100 Subject: [PATCH 22/32] x64: Various fixes for CET IBT. Also add ELF notes. #1391 --- src/Makefile | 10 ++++-- src/lj_arch.h | 18 +++++++++-- src/lj_asm.c | 4 +-- src/lj_ccallback.c | 24 ++++++++++---- src/lj_emit_x86.h | 4 +-- src/vm_x64.dasc | 79 +++++++++++++++++++++++++++++++++------------- 6 files changed, 101 insertions(+), 38 deletions(-) diff --git a/src/Makefile b/src/Makefile index d23e0db255..e657af1343 100644 --- a/src/Makefile +++ b/src/Makefile @@ -446,9 +446,13 @@ ifneq (,$(findstring LJ_ABI_PAUTH 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D PAUTH TARGET_ARCH+= -DLJ_ABI_PAUTH=1 endif -ifneq (,$(findstring LJ_CET_BR 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D CET_BR - TARGET_ARCH+= -DLJ_CET_BR=1 +ifneq (,$(findstring LJ_ABI_BRANCH_TRACK 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D BRANCH_TRACK + TARGET_ARCH+= -DLJ_ABI_BRANCH_TRACK=1 +endif +ifneq (,$(findstring LJ_ABI_SHADOW_STACK 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SHADOW_STACK + TARGET_ARCH+= -DLJ_ABI_SHADOW_STACK=1 endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) diff --git a/src/lj_arch.h b/src/lj_arch.h index 42c65879bd..a775b51f4c 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -219,15 +219,27 @@ #error "macOS requires GC64 -- don't disable it" #endif -#if (__CET__ & 1) && defined(LUAJIT_ENABLE_CET_BR) +#if !defined(LJ_ABI_BRANCH_TRACK) && (__CET__ & 1) && \ + LJ_TARGET_GC64 && defined(LUAJIT_ENABLE_CET_BR) /* ** Control-Flow Enforcement Technique (CET) indirect branch tracking (IBT). ** This is not enabled by default because it causes a notable slowdown of ** the interpreter on all x64 CPUs, whether they have CET enabled or not. ** If your toolchain enables -fcf-protection=branch by default, you need -** to build with: make XCFLAGS=-DLUAJIT_ENABLE_CET_BR +** to build with: make amalg XCFLAGS=-DLUAJIT_ENABLE_CET_BR */ -#define LJ_CET_BR 1 +#define LJ_ABI_BRANCH_TRACK 1 +#endif + +#if !defined(LJ_ABI_SHADOW_STACK) && (__CET__ & 2) +/* +** Control-Flow Enforcement Technique (CET) shadow stack (CET-SS). +** It has no code overhead and doesn't cause any slowdowns when unused. +** It can also be unconditionally enabled since all code already follows +** a strict CALL to RET correspondence for performance reasons (all modern +** CPUs use a (non-enforcing) shadow stack for return branch prediction). +*/ +#define LJ_ABI_SHADOW_STACK 1 #endif #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM diff --git a/src/lj_asm.c b/src/lj_asm.c index e7f3ec1cd5..8f558a0392 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -2586,8 +2586,8 @@ void lj_asm_trace(jit_State *J, GCtrace *T) asm_head_side(as); else asm_head_root(as); -#if LJ_CET_BR - emit_endbr(as); +#if LJ_ABI_BRANCH_TRACK + emit_branch_track(as); #endif asm_phi_fixup(as); diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 7f08f0a848..5594a731e4 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -34,22 +34,29 @@ #elif LJ_TARGET_X86ORX64 +#if LJ_ABI_BRANCH_TRACK +#define CALLBACK_MCODE_SLOTSZ 8 +#else +#define CALLBACK_MCODE_SLOTSZ 4 +#endif +#define CALLBACK_MCODE_NSLOT (128 / CALLBACK_MCODE_SLOTSZ) + #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) #define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) #define CALLBACK_SLOT2OFS(slot) \ - (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) + (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/CALLBACK_MCODE_NSLOT) + CALLBACK_MCODE_SLOTSZ*(slot)) static MSize CALLBACK_OFS2SLOT(MSize ofs) { MSize group; ofs -= CALLBACK_MCODE_HEAD; - group = ofs / (32*4 + CALLBACK_MCODE_GROUP); - return (ofs % (32*4 + CALLBACK_MCODE_GROUP))/4 + group*32; + group = ofs / (128 + CALLBACK_MCODE_GROUP); + return (ofs % (128 + CALLBACK_MCODE_GROUP))/CALLBACK_MCODE_SLOTSZ + group*CALLBACK_MCODE_NSLOT; } #define CALLBACK_MAX_SLOT \ - (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+4*32))*32) + (((CALLBACK_MCODE_SIZE-CALLBACK_MCODE_HEAD)/(CALLBACK_MCODE_GROUP+128))*CALLBACK_MCODE_NSLOT) #elif LJ_TARGET_ARM @@ -118,9 +125,13 @@ static void *callback_mcode_init(global_State *g, uint8_t *page) *(void **)p = target; p += 8; #endif for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { +#if LJ_ABI_BRANCH_TRACK + *(uint32_t *)p = XI_ENDBR64; p += 4; +#endif /* mov al, slot; jmp group */ *p++ = XI_MOVrib | RID_EAX; *p++ = (uint8_t)slot; - if ((slot & 31) == 31 || slot == CALLBACK_MAX_SLOT-1) { + if ((slot & (CALLBACK_MCODE_NSLOT-1)) == (CALLBACK_MCODE_NSLOT-1) || + slot == CALLBACK_MAX_SLOT-1) { /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ *p++ = XI_PUSH + RID_EBP; *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); @@ -140,7 +151,8 @@ static void *callback_mcode_init(global_State *g, uint8_t *page) *p++ = XI_JMP; *(int32_t *)p = target-(p+4); p += 4; #endif } else { - *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); + *p++ = XI_JMPs; + *p++ = (uint8_t)(CALLBACK_MCODE_SLOTSZ*(CALLBACK_MCODE_NSLOT-1-(slot&(CALLBACK_MCODE_NSLOT-1))) - 2); } } return p; diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 848301bce1..5fd6cfa7eb 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -70,8 +70,8 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, return p; } -#if LJ_CET_BR -static void emit_endbr(ASMState *as) +#if LJ_ABI_BRANCH_TRACK +static void emit_branch_track(ASMState *as) { emit_u32(as, XI_ENDBR64); } diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 52ef88af42..2e9f05056d 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -191,7 +191,7 @@ | |//-- Control-Flow Enforcement Technique (CET) --------------------------- | -|.if CET_BR +|.if BRANCH_TRACK |.macro endbr; endbr64; .endmacro |.else |.macro endbr; .endmacro @@ -200,13 +200,13 @@ |//----------------------------------------------------------------------- | |// Instruction headers. -|.macro ins_A; endbr; .endmacro -|.macro ins_AD; endbr; .endmacro -|.macro ins_AJ; endbr; .endmacro -|.macro ins_ABC; endbr; movzx RBd, RCH; movzx RCd, RCL; .endmacro -|.macro ins_AB_; endbr; movzx RBd, RCH; .endmacro -|.macro ins_A_C; endbr; movzx RCd, RCL; .endmacro -|.macro ins_AND; endbr; not RD; .endmacro +|.macro ins_A; .endmacro +|.macro ins_AD; .endmacro +|.macro ins_AJ; .endmacro +|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro +|.macro ins_AB_; movzx RBd, RCH; .endmacro +|.macro ins_A_C; movzx RCd, RCL; .endmacro +|.macro ins_AND; not RD; .endmacro | |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). |.macro ins_NEXT @@ -487,13 +487,12 @@ static void build_subroutines(BuildCtx *ctx) | jmp <3 | |->vm_unwind_yield: - | endbr | mov al, LUA_YIELD | jmp ->vm_unwind_c_eh | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | endbr | // (void *cframe, int errcode) + | endbr | mov eax, CARG2d // Error return status for vm_pcall. | mov rsp, CARG1 |->vm_unwind_c_eh: // Landing pad for external unwinder. @@ -513,8 +512,8 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | endbr | // (void *cframe) + | endbr | and CARG1, CFRAME_RAWMASK | mov rsp, CARG1 |->vm_unwind_ff_eh: // Landing pad for external unwinder. @@ -689,7 +688,6 @@ static void build_subroutines(BuildCtx *ctx) |//-- Continuation dispatch ---------------------------------------------- | |->cont_dispatch: - | endbr | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) | add RA, BASE | and PC, -8 @@ -1152,7 +1150,7 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: - | endbr + | endbr |.endmacro | |.macro .ffunc_1, name @@ -2338,8 +2336,8 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_stitch: // Trace stitching. |.if JIT - | endbr | // BASE = base, RC = result, RB = mbase + | endbr | mov TRACE:ITYPE, [RB-40] // Save previous trace. | cleartp TRACE:ITYPE | mov TMPRd, MULTRES @@ -2460,8 +2458,8 @@ static void build_subroutines(BuildCtx *ctx) | jmp >1 |.endif |->vm_exit_interp: - | endbr | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. + | endbr |.if JIT | // Restore additional callee-save registers only used in compiled code. |.if X64WIN @@ -2849,6 +2847,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |=>defop: switch (op) { +#if !LJ_HASJIT + case BC_FORL: + case BC_JFORI: + case BC_JFORL: + case BC_ITERL: + case BC_JITERL: + case BC_LOOP: + case BC_JLOOP: + case BC_FUNCF: + case BC_JFUNCF: + case BC_JFUNCV: +#endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + break; /* Avoid redundant endbr instructions. */ + default: + | endbr + break; + } + + switch (op) { /* -- Comparison ops ---------------------------------------------------- */ @@ -4119,7 +4137,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: |.if JIT - | endbr | hotloop RBd |.endif |->vm_IITERN: @@ -4299,7 +4316,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | jnz >7 // Not returning to a fixarg Lua func? switch (op) { case BC_RET: - | endbr |->BC_RET_Z: | mov KBASE, BASE // Use KBASE for result move. | sub RDd, 1 @@ -4318,12 +4334,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ja >6 break; case BC_RET1: - | endbr | mov RB, [BASE+RA] | mov [BASE-16], RB /* fallthrough */ case BC_RET0: - | endbr |5: | cmp PC_RB, RDL // More results expected? | ja >6 @@ -4370,7 +4384,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FORL: |.if JIT - | endbr | hotloop RBd |.endif | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. @@ -4522,7 +4535,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERL: |.if JIT - | endbr | hotloop RBd |.endif | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. @@ -4616,7 +4628,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_FUNCF: |.if JIT - | endbr | hotcall RBd |.endif case BC_FUNCV: /* NYI: compiled vararg functions. */ @@ -4886,6 +4897,30 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.align 8\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif +#endif +#if LJ_TARGET_LINUX && (LJ_ABI_BRANCH_TRACK || LJ_ABI_SHADOW_STACK) + fprintf(ctx->fp, + "\t.section .note.gnu.property,\"a\"\n" + "\t.align 8\n" + "\t.long 4\n" + "\t.long 16\n" + "\t.long 5\n" + "\t.long 0x00554e47\n" + "\t.long 0xc0000002\n" + "\t.long 4\n" + "\t.long %d\n" + "\t.long 0\n", +#if LJ_ABI_BRANCH_TRACK + 1| +#else + 0| +#endif +#if LJ_ABI_SHADOW_STACK + 2 +#else + 0 +#endif + ); #endif break; #if !LJ_NO_UNWIND From 8651ef6df45189ad5ab734275568c9538038fcfa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 28 Oct 2025 04:46:10 +0100 Subject: [PATCH 23/32] ARM64: Add support for ARM BTI. Note: this is not enabled by default, look for CET in lj_arch.h. Thanks to Yuichiro Naito. #1398 --- dynasm/dasm_arm64.lua | 22 ++++++++++++++ src/jit/dis_arm64.lua | 8 +++++- src/lj_arch.h | 5 ++++ src/lj_ccallback.c | 14 +++++++-- src/lj_emit_arm64.h | 7 +++++ src/lj_target_arm64.h | 4 +++ src/vm_arm64.dasc | 67 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 124 insertions(+), 3 deletions(-) diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua index 8b27e9625c..db3adb4845 100644 --- a/dynasm/dasm_arm64.lua +++ b/dynasm/dasm_arm64.lua @@ -244,6 +244,10 @@ local map_cond = { hs = 2, lo = 3, } +local map_bti = { + c = 0x40, j = 0x80, jc = 0xc0, +} + ------------------------------------------------------------------------------ local parse_reg_type @@ -475,6 +479,12 @@ local function parse_cond(expr, inv) return shl(bit.bxor(c, inv), 12) end +local function parse_map(expr, map) + local x = map[expr] + if not x then werror("bad operand") end + return x +end + local function parse_load(params, nparams, n, op) if params[n+2] then werror("too many operands") end local scale = shr(op, 30) @@ -823,11 +833,21 @@ map_op = { tbz_3 = "36000000DTBw|36000000DTBx", tbnz_3 = "37000000DTBw|37000000DTBx", + -- Branch Target Identification. + bti_1 = "d503241ft", + -- ARM64e: Pointer authentication codes (PAC). blraaz_1 = "d63f081fNx", + blrabz_1 = "d63f0c1fNx", braa_2 = "d71f0800NDx", + brab_2 = "d71f0c00NDx", braaz_1 = "d61f081fNx", + brabz_1 = "d61f0c1fNx", + paciasp_0 = "d503233f", pacibsp_0 = "d503237f", + autiasp_0 = "d50323bf", + autibsp_0 = "d50323ff", + retaa_0 = "d65f0bff", retab_0 = "d65f0fff", -- Miscellaneous instructions. @@ -996,6 +1016,8 @@ local function parse_template(params, template, nparams, pos) op = op + parse_cond(q, 0); n = n + 1 elseif p == "c" then op = op + parse_cond(q, 1); n = n + 1 + elseif p == "t" then + op = op + parse_map(q, map_bti); n = n + 1 else assert(false) diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua index 4457aac080..944f1a6ced 100644 --- a/src/jit/dis_arm64.lua +++ b/src/jit/dis_arm64.lua @@ -695,7 +695,10 @@ local map_br = { -- Branches, exception generating and system instructions. }, { -- System instructions. shift = 0, mask = 0x3fffff, - [0x03201f] = "nop" + [0x03201f] = "nop", + [0x03245f] = "bti c", + [0x03249f] = "bti j", + [0x0324df] = "bti jc", }, { -- Unconditional branch, register. shift = 0, mask = 0xfffc1f, @@ -1171,6 +1174,9 @@ local function disass_ins(ctx) end end second0 = true + elseif p == " " then + operands[#operands+1] = pat:match(" (.*)") + break else assert(false) end diff --git a/src/lj_arch.h b/src/lj_arch.h index a775b51f4c..6d1a92714c 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -288,6 +288,11 @@ #if !defined(LJ_ABI_PAUTH) && defined(__arm64e__) #define LJ_ABI_PAUTH 1 #endif +#if !defined(LJ_ABI_BRANCH_TRACK) && (__ARM_FEATURE_BTI_DEFAULT & 1) && \ + defined(LUAJIT_ENABLE_CET_BR) +/* See comments about LUAJIT_ENABLE_CET_BR above. */ +#define LJ_ABI_BRANCH_TRACK 1 +#endif #define LJ_TARGET_ARM64 1 #define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_EHRAREG 30 diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 5594a731e4..c4b25cd7d1 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -64,6 +64,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #elif LJ_TARGET_ARM64 +#if LJ_ABI_BRANCH_TRACK +#define CALLBACK_MCODE_SLOTSZ 12 +#endif + #define CALLBACK_MCODE_HEAD 32 #elif LJ_TARGET_PPC @@ -88,8 +92,11 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #endif #ifndef CALLBACK_SLOT2OFS -#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) +#ifndef CALLBACK_MCODE_SLOTSZ +#define CALLBACK_MCODE_SLOTSZ 8 +#endif +#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_SLOTSZ*(slot)) +#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/CALLBACK_MCODE_SLOTSZ) #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) #endif @@ -193,6 +200,9 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) ((void **)p)[1] = g; p += 4; for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { +#if LJ_ABI_BRANCH_TRACK + *p++ = A64I_BTI_C; +#endif *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); p++; diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h index ca1269b7c3..a8be741562 100644 --- a/src/lj_emit_arm64.h +++ b/src/lj_emit_arm64.h @@ -409,6 +409,13 @@ static void emit_call(ASMState *as, ASMFunction target) } } +#if LJ_ABI_BRANCH_TRACK +static void emit_branch_track(ASMState *as) +{ + *--as->mcp = A64I_BTI_J; +} +#endif + /* -- Emit generic operations --------------------------------------------- */ /* Generic move between two regs. */ diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 9274187117..30aff47882 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -265,6 +265,10 @@ typedef enum A64Ins { A64I_BRAAZ = 0xd61f081f, A64I_BLRAAZ = 0xd63f081f, + A64I_BTI_C = 0xd503245f, + A64I_BTI_J = 0xd503249f, + A64I_BTI_JC = 0xd50324df, + A64I_NOP = 0xd503201f, /* FP */ diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 58efe400e4..85d38de384 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -92,6 +92,17 @@ |.macro ret_auth; ret; .endmacro |.endif | +|// ARM64 branch target identification (BTI). +|.if BRANCH_TRACK +|.macro bti_jump; bti j; .endmacro +|.macro bti_call; bti c; .endmacro +|.macro bti_tailcall; bti jc; .endmacro +|.else +|.macro bti_jump; .endmacro +|.macro bti_call; .endmacro +|.macro bti_tailcall; .endmacro +|.endif +| |//----------------------------------------------------------------------- | |// Stack layout while in interpreter. Must match with lj_frame.h. @@ -439,24 +450,28 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_unwind_c: // Unwind C stack, return from vm_pcall. | // (void *cframe, int errcode) + | bti_tailcall | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | mov CRET1, CARG2 | ldr L, SAVE_L | ldr GL, L->glref |->vm_unwind_c_eh: // Landing pad for external unwinder. + | bti_tailcall | mv_vmstate TMP0w, C | st_vmstate TMP0w | b ->vm_leave_unw | |->vm_unwind_ff: // Unwind C stack, return from ff pcall. | // (void *cframe) + | bti_tailcall | add fp, CARG1, # SAVE_FP_LR_ | mov sp, CARG1 | ldr L, SAVE_L | init_constants | ldr GL, L->glref // Setup pointer to global state. |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | bti_tailcall | mov RC, #16 // 2 results: false + error message. | ldr BASE, L->base | mov_false TMP0 @@ -632,6 +647,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->cont_cat: // RA = resultptr, CARG4 = meta base + | bti_jump | ldr INSw, [PC, #-4] | sub CARG2, CARG4, #32 | ldr TMP0, [RA] @@ -789,9 +805,11 @@ static void build_subroutines(BuildCtx *ctx) | sub RB, RB, #0x20000 | csel PC, PC, RB, lo |->cont_nop: + | bti_jump | ins_next | |->cont_ra: // RA = resultptr + | bti_jump | ldr INSw, [PC, #-4] | ldr TMP0, [RA] | decode_RA TMP1, INS @@ -799,12 +817,14 @@ static void build_subroutines(BuildCtx *ctx) | b ->cont_nop | |->cont_condt: // RA = resultptr + | bti_jump | ldr TMP0, [RA] | mov_true TMP1 | cmp TMP1, TMP0 // Branch if result is true. | b <4 | |->cont_condf: // RA = resultptr + | bti_jump | ldr TMP0, [RA] | mov_false TMP1 | cmp TMP0, TMP1 // Branch if result is false. @@ -956,10 +976,12 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc, name |->ff_ .. name: + | bti_jump |.endmacro | |.macro .ffunc_1, name |->ff_ .. name: + | bti_jump | ldr CARG1, [BASE] | cmp NARGS8:RC, #8 | blo ->fff_fallback @@ -967,6 +989,7 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc_2, name |->ff_ .. name: + | bti_jump | ldp CARG1, CARG2, [BASE] | cmp NARGS8:RC, #16 | blo ->fff_fallback @@ -1810,6 +1833,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_record: // Dispatch target for recording phase. |.if JIT + | bti_jump | ldrb CARG1w, GL->hookmask | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | bne >5 @@ -1825,6 +1849,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_rethook: // Dispatch target for return hooks. + | bti_jump | ldrb TMP2w, GL->hookmask | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? |5: // Re-dispatch to static ins. @@ -1832,6 +1857,7 @@ static void build_subroutines(BuildCtx *ctx) | br_auth TMP0 | |->vm_inshook: // Dispatch target for instr/line hooks. + | bti_jump | ldrb TMP2w, GL->hookmask | ldr TMP3w, GL->hookcount | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? @@ -1858,6 +1884,7 @@ static void build_subroutines(BuildCtx *ctx) | br_auth TMP0 | |->cont_hook: // Continue from hook yield. + | bti_jump | ldr CARG1, [CARG4, #-40] | add PC, PC, #4 | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. @@ -1881,6 +1908,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_callhook: // Dispatch target for call hooks. + | bti_jump | mov CARG2, PC |.if JIT | b >1 @@ -1910,6 +1938,7 @@ static void build_subroutines(BuildCtx *ctx) |->cont_stitch: // Trace stitching. |.if JIT | // RA = resultptr, CARG4 = meta base + | bti_jump | ldr RBw, SAVE_MULTRES | ldr INSw, [PC, #-4] | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. @@ -1958,6 +1987,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_profhook: // Dispatch target for profiler hook. #if LJ_HASPROFILE + | bti_jump | mov CARG1, L | str BASE, L->base | mov CARG2, PC @@ -1979,6 +2009,7 @@ static void build_subroutines(BuildCtx *ctx) | |->vm_exit_handler: |.if JIT + | bti_call | sub sp, sp, #(64*8) | savex_, 0, 1 | savex_, 2, 3 @@ -2029,6 +2060,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | |->vm_exit_interp: + | bti_jump | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. |.if JIT | ldr L, SAVE_L @@ -2106,6 +2138,7 @@ static void build_subroutines(BuildCtx *ctx) | | // int lj_vm_modi(int dividend, int divisor); |->vm_modi: + | bti_call | eor CARG4w, CARG1w, CARG2w | cmp CARG4w, #0 | eor CARG3w, CARG1w, CARG1w, asr #31 @@ -2142,6 +2175,7 @@ static void build_subroutines(BuildCtx *ctx) |// Next idx returned in CRET2w. |->vm_next: |.if JIT + | bti_call | ldr NEXT_LIM, NEXT_TAB->asize | ldr NEXT_TMP1, NEXT_TAB->array |1: // Traverse array part. @@ -2286,6 +2320,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |=>defop: switch (op) { +#if !LJ_HASJIT + case BC_FORL: + case BC_JFORI: + case BC_JFORL: + case BC_ITERL: + case BC_JITERL: + case BC_LOOP: + case BC_JLOOP: + case BC_FUNCF: + case BC_JFUNCF: + case BC_JFUNCV: +#endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + break; /* Avoid redundant bti instructions. */ + default: + | bti_jump + break; + } + + switch (op) { /* -- Comparison ops ---------------------------------------------------- */ @@ -4122,6 +4176,19 @@ static void emit_asm_debug(BuildCtx *ctx) "\t.align 3\n" ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); #endif +#endif +#if LJ_TARGET_LINUX && LJ_ABI_BRANCH_TRACK + fprintf(ctx->fp, + "\t.section .note.gnu.property,\"a\"\n" + "\t.align 3\n" + "\t.long 4\n" + "\t.long 16\n" + "\t.long 5\n" + "\t.long 0x00554e47\n" + "\t.long 0xc0000000\n" + "\t.long 4\n" + "\t.long 1\n" + "\t.long 0\n"); #endif break; #if !LJ_NO_UNWIND From 864e78d66cb21335823c7782fa21beae8e7914b0 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Tue, 28 Oct 2025 04:59:18 +0100 Subject: [PATCH 24/32] Windows: Fix lua52compat option for msvcbuild.bat. Thanks to Alex Orlenko. #1395 #1366 --- src/msvcbuild.bat | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index d6aed17009..3f32e1a0e5 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -6,8 +6,8 @@ @rem options (in order), if needed. The default is a dynamic release build. @rem @rem nogc64 disable LJ_GC64 mode for x64 -@rem debug emit debug symbols @rem lua52compat enable extra Lua 5.2 extensions +@rem debug emit debug symbols @rem amalg amalgamated build @rem static create static lib to statically link into your project @rem mixed create static lib to build a DLL in your project @@ -19,7 +19,7 @@ @set DEBUGCFLAGS= @set LJCOMPILE=cl /nologo /c /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE /D_CRT_STDIO_INLINE=__declspec(dllexport)__inline @set LJDYNBUILD=/DLUA_BUILD_AS_DLL /MD -@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd +@set LJDYNBUILD_DEBUG=/DLUA_BUILD_AS_DLL /MDd @set LJCOMPILETARGET=/Zi @set LJLINKTYPE=/DEBUG /RELEASE @set LJLINKTYPE_DEBUG=/DEBUG @@ -65,6 +65,10 @@ if exist minilua.exe.manifest^ @set DASC=vm_x86.dasc @set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64 :DA +@if "%1" neq "lua52compat" goto :NOLUA52COMPAT +@shift +@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_LUA52COMPAT +:NOLUA52COMPAT minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC% @if errorlevel 1 goto :BAD @@ -102,10 +106,6 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @set LJDYNBUILD=%LJDYNBUILD_DEBUG% @set LJLINKTYPE=%LJLINKTYPE_DEBUG% :NODEBUG -@if "%1" neq "lua52compat" goto :NOLUA52COMPAT -@shift -@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_ENABLE_LUA52COMPAT -:NOLUA52COMPAT @set LJCOMPILE=%LJCOMPILE% %LJCOMPILETARGET% @set LJLINK=%LJLINK% %LJLINKTYPE% %LJLINKTARGET% @if "%1"=="amalg" goto :AMALGDLL From 5b20d6e305b67765de357137105f5af007bac705 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 29 Oct 2025 09:38:31 +0100 Subject: [PATCH 25/32] ARM64: Fix ARM BTI. Reported by Yuichiro Naito. #1400 --- src/vm_arm64.dasc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 85d38de384..a437b65766 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc @@ -3497,6 +3497,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif |->vm_IITERN: | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + |.if JIT + | bti_jump + |.endif | add RA, BASE, RA, lsl #3 | ldr TAB:RB, [RA, #-16] | ldrh TMP3w, [PC, # OFS_RD] From 8518c0b40b1734901de888a0a363450c0709d3f8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Wed, 29 Oct 2025 09:38:41 +0100 Subject: [PATCH 26/32] x64: Fix CET IBT. Reported by Yuichiro Naito. #1400 --- src/vm_x64.dasc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 2e9f05056d..4cfb7b6ad2 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -4140,6 +4140,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | hotloop RBd |.endif |->vm_IITERN: + |.if JIT + | endbr + |.endif | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | mov TAB:RB, [BASE+RA*8-16] | cleartp TAB:RB From 3c7b158b799405545775f7ec52e17019fcf6ace8 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 6 Nov 2025 00:30:22 +0100 Subject: [PATCH 27/32] ARM64: Fix disassembly of >2GB branch targets. --- src/jit/dis_arm64.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua index 944f1a6ced..facc6e4a1e 100644 --- a/src/jit/dis_arm64.lua +++ b/src/jit/dis_arm64.lua @@ -923,7 +923,7 @@ local function disass_ins(ctx) elseif p == "B" then local addr = ctx.addr + pos + parse_immpc(op, name) ctx.rel = addr - x = "0x"..tohex(addr) + x = format("0x%08x", addr) elseif p == "T" then x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) elseif p == "V" then From 68354f444728ef99bb51bb4d86e8f1b40853a898 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Thu, 6 Nov 2025 00:42:02 +0100 Subject: [PATCH 28/32] Allow mcode allocations outside of the jump range to the support code. Thank you for your patience. #285 --- doc/running.html | 4 +- src/lib_jit.c | 15 ++- src/lj_arch.h | 3 +- src/lj_asm.c | 6 +- src/lj_asm_arm.h | 76 +++++++++---- src/lj_asm_arm64.h | 56 +++++++--- src/lj_asm_mips.h | 60 +++++++--- src/lj_asm_ppc.h | 68 +++++++---- src/lj_asm_x86.h | 88 +++++++++------ src/lj_emit_arm.h | 11 +- src/lj_emit_mips.h | 3 + src/lj_emit_ppc.h | 3 + src/lj_emit_x86.h | 13 ++- src/lj_jit.h | 23 ++-- src/lj_mcode.c | 255 ++++++++++++++++++++++++++---------------- src/lj_target_arm.h | 2 + src/lj_target_arm64.h | 1 + src/lj_target_ppc.h | 1 + src/lj_trace.c | 17 ++- 19 files changed, 477 insertions(+), 228 deletions(-) diff --git a/doc/running.html b/doc/running.html index f71eee42f6..56d4c7bfbe 100644 --- a/doc/running.html +++ b/doc/running.html @@ -299,9 +299,9 @@

-O[level]
recunroll2Min. unroll factor for true recursion -sizemcode32Size of each machine code area in KBytes (Windows: 64K) +sizemcode64Size of each machine code area in KBytes -maxmcode512Max. total size of all machine code areas in KBytes +maxmcode2048Max. total size of all machine code areas in KBytes
diff --git a/src/lib_jit.c b/src/lib_jit.c index fd8e585b83..1b74d957b5 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -479,12 +479,21 @@ static int jitopt_param(jit_State *J, const char *str) size_t len = *(const uint8_t *)lst; lj_assertJ(len != 0, "bad JIT_P_STRING"); if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { - int32_t n = 0; + uint32_t n = 0; const char *p = &str[len+1]; while (*p >= '0' && *p <= '9') n = n*10 + (*p++ - '0'); - if (*p) return 0; /* Malformed number. */ - J->param[i] = n; + if (*p || (int32_t)n < 0) return 0; /* Malformed number. */ + if (i == JIT_P_sizemcode) { /* Adjust to required range here. */ +#if LJ_TARGET_JUMPRANGE + uint32_t maxkb = ((1 << (LJ_TARGET_JUMPRANGE - 10)) - 64); +#else + uint32_t maxkb = ((1 << (31 - 10)) - 64); +#endif + n = (n + (LJ_PAGESIZE >> 10) - 1) & ~((LJ_PAGESIZE >> 10) - 1); + if (n > maxkb) n = maxkb; + } + J->param[i] = (int32_t)n; if (i == JIT_P_hotloop) lj_dispatch_init_hotcount(J2G(J)); return 1; /* Ok. */ diff --git a/src/lj_arch.h b/src/lj_arch.h index 6d1a92714c..799f9c6cc3 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -301,6 +301,7 @@ #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_TARGET_GC64 1 +#define LJ_PAGESIZE 16384 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL #define LJ_ARCH_VERSION 80 @@ -456,7 +457,7 @@ #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 #define LJ_TARGET_EHRAREG 31 -#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ +#define LJ_TARGET_JUMPRANGE 28 /* 2^28 = 256MB-aligned region */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ diff --git a/src/lj_asm.c b/src/lj_asm.c index 8f558a0392..0e888c294a 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -93,6 +93,10 @@ typedef struct ASMState { MCode *invmcp; /* Points to invertible loop branch (or NULL). */ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *realign; /* Realign loop if not NULL. */ + MCode *mctail; /* Tail of trace before stack adjust + jmp. */ +#if LJ_TARGET_PPC || LJ_TARGET_ARM64 + MCode *mcexit; /* Pointer to exit stubs. */ +#endif #ifdef LUAJIT_RANDOM_RA /* Randomize register allocation. OK for fuzz testing, not for production. */ @@ -2541,7 +2545,7 @@ void lj_asm_trace(jit_State *J, GCtrace *T) RA_DBGX((as, "===== STOP =====")); /* General trace setup. Emit tail of trace. */ - asm_tail_prep(as); + asm_tail_prep(as, T->link); as->mcloop = NULL; as->flagmcp = NULL; as->topslot = 0; diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 24deaeae27..406360d26a 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -79,18 +79,43 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) /* Generate an exit stub group at the bottom of the reserved MCode memory. */ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) { + ExitNo i; + int ind = 0; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mcbot; - int i; - if (mxp + 4*4+4*EXITSTUBS_PER_GROUP >= as->mctop) + if (mxp + 6+EXITSTUBS_PER_GROUP >= as->mctop) asm_mclimit(as); - /* str lr, [sp]; bl ->vm_exit_handler; .long DISPATCH_address, group. */ - *mxp++ = ARMI_STR|ARMI_LS_P|ARMI_LS_U|ARMF_D(RID_LR)|ARMF_N(RID_SP); - *mxp = ARMI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)-2)&0x00ffffffu); - mxp++; + if ((((target - mxp - 2) + 0x00800000u) >> 24) == 0) { + /* str lr, [sp]; bl ->vm_exit_handler; + ** .long DISPATCH_address, group. + */ + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp = ARMI_BL | ((target - mxp - 2) & 0x00ffffffu); mxp++; + } else if ((as->flags & JIT_F_ARMV6T2)) { + /* + ** str lr, [sp]; movw/movt lr, vm_exit_handler; blx lr; + ** .long DISPATCH_address, group; + */ + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR); + *mxp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR); + *mxp++ = ARMI_BLXr | ARMF_M(RID_LR); + ind = 2; + } else { + /* .long vm_exit_handler; + ** str lr, [sp]; ldr lr, [pc, #-16]; blx lr; + ** .long DISPATCH_address, group; + */ + *mxp++ = (MCode)target; + *mxp++ = ARMI_STR | ARMI_LS_P | ARMI_LS_U | ARMF_D(RID_LR) | ARMF_N(RID_SP); + *mxp++ = ARMI_LDRL | ARMF_D(RID_LR) | 16; + *mxp++ = ARMI_BLXr | ARMF_M(RID_LR); + ind = 1; + } *mxp++ = (MCode)i32ptr(J2GG(as->J)->dispatch); /* DISPATCH address */ *mxp++ = group*EXITSTUBS_PER_GROUP; for (i = 0; i < EXITSTUBS_PER_GROUP; i++) - *mxp++ = ARMI_B|((-6-i)&0x00ffffffu); + *mxp++ = ARMI_B | ((-6-ind-i) & 0x00ffffffu); lj_mcode_sync(as->mcbot, mxp); lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -2210,33 +2235,46 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; - MCode *target; + MCode *target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - as->mctop = --p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ uint32_t k = emit_isk12(ARMI_ADD, spadj); lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); - p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + *mcp++ = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); + } + if ((((target - mcp - 2) + 0x00800000u) >> 24) == 0) { + *mcp = ARMI_B | ((target - mcp - 2) & 0x00ffffffu); mcp++; + } else if ((as->flags & JIT_F_ARMV6T2)) { + *mcp++ = emit_movw_k((uint32_t)target) | ARMF_D(RID_LR); + *mcp++ = emit_movt_k((uint32_t)target) | ARMF_D(RID_LR); + *mcp++ = ARMI_BX | ARMF_M(RID_LR); + } else { + *mcp++ = ARMI_LDRL | ARMI_LS_U | ARMF_D(RID_LR) | 0; + *mcp++ = ARMI_BX | ARMF_M(RID_LR); + *mcp++ = (MCode)target; } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = ARMI_B|(((target-p)-1)&0x00ffffffu); + while (as->mctop > mcp) *--as->mctop = ARMI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + MCode *target = (MCode *)(void *)lj_vm_exit_interp; + if ((((target - p - 2) + 0x00800000u) >> 24) || + (((target - p - 1) + 0x00800000u) >> 24)) p -= 2; + } + p--; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } *p = 0; /* Prevent load/store merging. */ + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 4feaa3b0c2..085f935728 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -51,15 +51,27 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) static void asm_exitstub_setup(ASMState *as, ExitNo nexits) { ExitNo i; + int ind; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) asm_mclimit(as); - /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ + ind = !A64F_S_OK(target - (mxp - nexits - 2), 26); + /* !ind: 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; + ** ind: 1: str lr,[sp]; ldr lr, [gl, K64_VXH]; blr lr; movz w0,traceno; + ** bl <1; bl <1; ... + */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); + *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-ind-i)); + as->mcexit = mxp; *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); - mxp--; - *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); + if (ind) { + *--mxp = A64I_LE(A64I_BLR_AUTH | A64F_N(RID_LR)); + *--mxp = A64I_LE(A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]) >> 3)); + } else { + mxp--; + *mxp = A64I_LE(A64I_BL | A64F_S26(target-mxp)); + } *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); as->mctop = mxp; } @@ -67,7 +79,7 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) { /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; + return as->mcexit + exitno; } /* Emit conditional branch to exit for guard. */ @@ -1917,34 +1929,42 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; + MCode *mcp = as->mctail; MCode *target; /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); - if (spadj == 0) { - *--p = A64I_LE(A64I_NOP); - as->mctop = p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ uint32_t k = emit_isk12(spadj); lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); - p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); + *mcp++ = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = A64I_B | A64F_S26((target-p)+1); + /* Emit exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + if (lnk || A64F_S_OK(target - mcp, 26)) { + *mcp = A64I_B | A64F_S26(target - mcp); mcp++; + } else { + *mcp++ = A64I_LDRx | A64F_D(RID_LR) | A64F_N(RID_GL) | A64F_U12(glofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]) >> 3); + *mcp++ = A64I_BR_AUTH | A64F_N(RID_LR); + } + while (as->mctop > mcp) *--as->mctop = A64I_LE(A64I_NOP); } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + MCode *target = (MCode *)(void *)lj_vm_exit_interp; + if (!A64F_S_OK(target - p, 26) || !A64F_S_OK(target - (p+1), 26)) p--; + } + p--; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; *p = 0; /* Prevent load/store merging. */ } diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index af0e714f15..8dadabe4a0 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -92,13 +92,23 @@ static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) /* Setup exit stub after the end of each trace. */ static void asm_exitstub_setup(ASMState *as) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ - *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; - *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, - "branch target out of range"); - *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; + *--mxp = MIPSI_LI | MIPSF_T(RID_TMP) | as->T->traceno; + if (((uintptr_t)(mxp-1) ^ target) >> 28 == 0) { + /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ + *--mxp = MIPSI_J | ((target >> 2) & 0x03ffffffu); + } else { + /* sw TMP, 0(sp); li TMP, K*_VXH(jgl); jr TMP ; li TMP, traceno */ + *--mxp = MIPSI_JR | MIPSF_S(RID_TMP); + *--mxp = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) | +#if LJ_64 + jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_HANDLER]); +#else + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]); +#endif + } + *--mxp = MIPSI_SW | MIPSF_T(RID_TMP) | MIPSF_S(RID_SP) | 0; as->mctop = mxp; } @@ -428,7 +438,8 @@ static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) + RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)| + RID2RSET(RID_CFUNCADDR) #if LJ_TARGET_MIPSR6 |RID2RSET(RID_F21) #endif @@ -514,7 +525,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg r) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| - RID2RSET(RID_R1)|RID2RSET(RID_R12); + RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_CFUNCADDR); if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ra_evictset(as, drop); /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ @@ -2699,18 +2710,37 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; + uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; - p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); + if (((uintptr_t)mcp ^ target) >> 28 == 0) { + *mcp++ = MIPSI_J | ((target >> 2) & 0x03ffffffu); + } else { + *mcp++ = MIPSI_AL | MIPSF_T(RID_TMP) | MIPSF_S(RID_JGL) | +#if LJ_64 + jglofs(as, &as->J->k64[LJ_K64_VM_EXIT_INTERP]); +#else + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]); +#endif + *mcp++ = MIPSI_JR | MIPSF_S(RID_TMP); + } + *mcp++ = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { - as->mcp = as->mctop-2; /* Leave room for branch plus nop or stack adj. */ - as->invmcp = as->loopref ? as->mcp : NULL; + as->mcp = as->mctop - 2; /* Leave room for branch plus nop or stack adj. */ + if (as->loopref) { + as->invmcp = as->mcp; + } else { + if (!lnk) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp; + if (((uintptr_t)as->mcp ^ target) >> 28 != 0) as->mcp--; + } + as->invmcp = NULL; + } + as->mctail = as->mcp; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index df1ac42f7a..d77c45ce9b 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -48,23 +48,38 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) static void asm_exitstub_setup(ASMState *as, ExitNo nexits) { ExitNo i; + int ind; + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler; MCode *mxp = as->mctop; - if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) asm_mclimit(as); - /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ + ind = ((target - (uintptr_t)(mxp - nexits - 2) + 0x02000000u) >> 26) ? 2 : 0; + /* !ind: 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; + ** ind: 1: lwz r0, K32_VXH(jgl); mtctr r0; mflr r0; bctrl; li r0, traceno; + ** bl <1; bl <1; ... + */ for (i = nexits-1; (int32_t)i >= 0; i--) - *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); + *--mxp = PPCI_BL | (((-3-ind-i) & 0x00ffffffu) << 2); + as->mcexit = mxp; *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ - mxp--; - *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); - *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); + if (ind) { + *--mxp = PPCI_BCTRL; + *--mxp = PPCI_MFLR | PPCF_T(RID_TMP); + *--mxp = PPCI_MTCTR | PPCF_T(RID_TMP); + *--mxp = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) | + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_HANDLER]); + } else { + mxp--; + *mxp = PPCI_BL | ((target - (uintptr_t)mxp) & 0x03fffffcu); + *--mxp = PPCI_MFLR | PPCF_T(RID_TMP); + } as->mctop = mxp; } static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) { /* Keep this in-sync with exitstub_trace_addr(). */ - return as->mctop + exitno + 3; + return as->mcexit + exitno; } /* Emit conditional branch to exit for guard. */ @@ -2218,34 +2233,43 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) /* Fixup the tail code. */ static void asm_tail_fixup(ASMState *as, TraceNo lnk) { - MCode *p = as->mctop; - MCode *target; + uintptr_t target = lnk ? (uintptr_t)traceref(as->J, lnk)->mcode : (uintptr_t)(void *)lj_vm_exit_interp; + MCode *mcp = as->mctail; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - *--p = PPCI_NOP; - *--p = PPCI_NOP; - as->mctop = p; - } else { - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); - p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); - p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; + *mcp++ = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); + *mcp++ = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); + /* Emit exit branch. */ + if ((((target - (uintptr_t)mcp) + 0x02000000u) >> 26) == 0) { + *mcp = PPCI_B | ((target - (uintptr_t)mcp) & 0x03fffffcu); mcp++; + } else { + *mcp++ = PPCI_LWZ | PPCF_T(RID_TMP) | PPCF_A(RID_JGL) | + jglofs(as, &as->J->k32[LJ_K32_VM_EXIT_INTERP]); + *mcp++ = PPCI_MTCTR | PPCF_T(RID_TMP); + *mcp++ = PPCI_BCTR; + } + while (as->mctop > mcp) *--as->mctop = PPCI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop - 1; /* Leave room for exit branch. */ if (as->loopref) { as->invmcp = as->mcp = p; } else { - as->mcp = p-2; /* Leave room for stack pointer adjustment. */ + if (!lnk) { + uintptr_t target = (uintptr_t)(void *)lj_vm_exit_interp; + if ((((target - (uintptr_t)p) + 0x02000000u) >> 26) || + (((target - (uintptr_t)(p-2)) + 0x02000000u) >> 26)) p -= 2; + } + p -= 2; /* Leave room for stack pointer adjustment. */ + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 774e77b433..f3c2238a2f 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -9,9 +9,12 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) { ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff; + MCode *target = (MCode *)(void *)lj_vm_exit_handler; MCode *mxp = as->mcbot; MCode *mxpstart = mxp; - if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop) + if (mxp + ((2+2)*EXITSTUBS_PER_GROUP + + (LJ_GC64 ? 0 : 8) + + (LJ_64 ? 6 : 5)) >= as->mctop) asm_mclimit(as); /* Push low byte of exitno for each exit stub. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs; @@ -30,8 +33,13 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; #endif /* Jump to exit handler which fills in the ExitState. */ - *mxp++ = XI_JMP; mxp += 4; - *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); + if (jmprel_ok(mxp + 5, target)) { /* Direct jump. */ + *mxp++ = XI_JMP; mxp += 4; + *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, target); + } else { /* RIP-relative indirect jump. */ + *mxp++ = XI_GROUP5; *mxp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mxp += 4; + *((int32_t *)(mxp-4)) = (int32_t)((group ? as->J->exitstubgroup[0] : mxpstart) - 8 - mxp); + } /* Commit the code for this group (even if assembly fails later on). */ lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -45,6 +53,16 @@ static void asm_exitstub_setup(ASMState *as, ExitNo nexits) ExitNo i; if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) lj_trace_err(as->J, LJ_TRERR_SNAPOV); +#if LJ_64 + if (as->J->exitstubgroup[0] == NULL) { + /* Store the two potentially out-of-range targets below group 0. */ + MCode *mxp = as->mcbot; + while ((uintptr_t)mxp & 7) *mxp++ = XI_INT3; + *((void **)mxp) = (void *)lj_vm_exit_interp; mxp += 8; + *((void **)mxp) = (void *)lj_vm_exit_handler; mxp += 8; + as->mcbot = mxp; /* Don't bother to commit, done in asm_exitstub_gen. */ + } +#endif for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++) if (as->J->exitstubgroup[i] == NULL) as->J->exitstubgroup[i] = asm_exitstub_gen(as, i); @@ -396,7 +414,7 @@ static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) "bad interned 64 bit constant"); } else { while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; - *(uint64_t*)as->mcbot = *k; + *(uint64_t *)as->mcbot = *k; ir->i = (int32_t)(as->mctop - as->mcbot); as->mcbot += 8; as->mclim = as->mcbot + MCLIM_REDZONE; @@ -728,7 +746,7 @@ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) p = (MCode *)(void *)ir_k64(irf)->u64; else p = (MCode *)(void *)(uintptr_t)(uint32_t)irf->i; - if (p - as->mcp == (int32_t)(p - as->mcp)) + if (jmprel_ok(p, as->mcp)) return p; /* Call target is still in +-2GB range. */ /* Avoid the indirect case of emit_call(). Try to hoist func addr. */ } @@ -2806,6 +2824,8 @@ static void asm_gc_check(ASMState *as) emit_rr(as, XO_TEST, RID_RET, RID_RET); args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ASMREF_TMP2; /* MSize steps */ + /* Insert nop to simplify GC exit recognition in lj_asm_patchexit. */ + if (!jmprel_ok(as->mcp, (MCode *)(void *)ci->func)) *--as->mcp = XI_NOP; asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); #if LJ_GC64 @@ -2919,40 +2939,36 @@ static Reg asm_head_side_base(ASMState *as, IRIns *irp) static void asm_tail_fixup(ASMState *as, TraceNo lnk) { /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */ - MCode *p = as->mctop; - MCode *target, *q; + MCode *mcp = as->mctail; + MCode *target; int32_t spadj = as->T->spadjust; - if (spadj == 0) { - p -= LJ_64 ? 7 : 6; - } else { - MCode *p1; - /* Patch stack adjustment. */ + if (spadj) { /* Emit stack adjustment. */ + if (LJ_64) *mcp++ = 0x48; if (checki8(spadj)) { - p -= 3; - p1 = p-6; - *p1 = (MCode)spadj; + *mcp++ = XI_ARITHi8; + *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP); + *mcp++ = (MCode)spadj; } else { - p1 = p-9; - *(int32_t *)p1 = spadj; + *mcp++ = XI_ARITHi; + *mcp++ = MODRM(XM_REG, XOg_ADD, RID_ESP); + *(int32_t *)mcp = spadj; mcp += 4; } -#if LJ_64 - p1[-3] = 0x48; -#endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } - /* Patch exit branch. */ - target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - *(int32_t *)(p-4) = jmprel(as->J, p, target); - p[-5] = XI_JMP; + /* Emit exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)(void *)lj_vm_exit_interp; + if (lnk || jmprel_ok(mcp + 5, target)) { /* Direct jump. */ + *mcp++ = XI_JMP; mcp += 4; + *(int32_t *)(mcp-4) = jmprel(as->J, mcp, target); + } else { /* RIP-relative indirect jump. */ + *mcp++ = XI_GROUP5; *mcp++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; mcp += 4; + *((int32_t *)(mcp-4)) = (int32_t)(as->J->exitstubgroup[0] - 16 - mcp); + } /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ - for (q = as->mctop-1; q >= p; q--) - *q = XI_NOP; - as->mctop = p; + while (as->mctop > mcp) *--as->mctop = XI_NOP; } /* Prepare tail of code. */ -static void asm_tail_prep(ASMState *as) +static void asm_tail_prep(ASMState *as, TraceNo lnk) { MCode *p = as->mctop; /* Realign and leave room for backwards loop branch or exit branch. */ @@ -2964,15 +2980,17 @@ static void asm_tail_prep(ASMState *as) as->mctop = p; p -= (as->loopinv ? 5 : 2); /* Space for short/near jmp. */ } else { - p -= 5; /* Space for exit branch (near jmp). */ + p -= (LJ_64 && !lnk) ? 6 : 5; /* Space for exit branch. */ } if (as->loopref) { as->invmcp = as->mcp = p; } else { - /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (LJ_64 ? 7 : 6); + /* Leave room for ESP adjustment: add esp, imm */ + p -= LJ_64 ? 7 : 6; + as->mcp = p; as->invmcp = NULL; } + as->mctail = p; } /* -- Trace setup --------------------------------------------------------- */ @@ -3132,6 +3150,10 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } else if (*p == XI_CALL && (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { pgc = p+7; /* Do not patch GC check exit. */ + } else if (LJ_64 && *p == 0xff && + p[1] == MODRM(XM_REG, XOg_CALL, RID_RET) && + p[2] == XI_NOP) { + pgc = p+5; /* Do not patch GC check exit. */ } } lj_mcode_sync(T->mcode, T->mcode + T->szmcode); diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index c60e7d7560..3e1eb64bfc 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -173,6 +173,11 @@ static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) return 0; /* Failed. */ } +#define emit_movw_k(k) \ + (ARMI_MOVW | ((k) & 0x0fffu) | (((k) & 0xf000u) << 4)) +#define emit_movt_k(k) \ + (ARMI_MOVT | (((k) >> 16) & 0x0fffu) | ((((k) >> 16) & 0xf000u) << 4)) + /* Load a 32 bit constant into a GPR. */ static void emit_loadi(ASMState *as, Reg rd, int32_t i) { @@ -184,13 +189,13 @@ static void emit_loadi(ASMState *as, Reg rd, int32_t i) emit_d(as, ARMI_MOV^k, rd); } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + emit_d(as, emit_movw_k(i), rd); } else if (emit_kdelta1(as, rd, i)) { /* One step delta relative to another constant. */ } else if ((as->flags & JIT_F_ARMV6T2)) { /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + emit_d(as, emit_movt_k(i), rd); + emit_d(as, emit_movw_k(i), rd); } else if (emit_kdelta2(as, rd, i)) { /* Two step delta relative to another constant. */ } else { diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index d8104959aa..d65b1c5777 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -80,6 +80,9 @@ static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, /* -- Emit loads/stores --------------------------------------------------- */ +#define jglofs(as, k) \ + (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff) + /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index b13f00fe5b..56928e4235 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -53,6 +53,9 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) /* -- Emit loads/stores --------------------------------------------------- */ +#define jglofs(as, k) \ + (((uintptr_t)(k) - (uintptr_t)J2G(as->J) - 32768) & 0xffff) + /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index 5fd6cfa7eb..858fe753be 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -478,6 +478,17 @@ static void emit_sfixup(ASMState *as, MCLabel source) /* Return label pointing to current PC. */ #define emit_label(as) ((as)->mcp) +/* Check if two adresses are in relative jump range. */ +static LJ_AINLINE int jmprel_ok(MCode *a, MCode *b) +{ +#if LJ_64 + return a - b == (int32_t)(a - b); +#else + UNUSED(a); UNUSED(b); + return 1; +#endif +} + /* Compute relative 32 bit offset for jump and call instructions. */ static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) { @@ -511,7 +522,7 @@ static void emit_call_(ASMState *as, MCode *target) { MCode *p = as->mcp; #if LJ_64 - if (target-p != (int32_t)(target-p)) { + if (!jmprel_ok(target, p)) { /* Assumes RID_RET is never an argument to calls and always clobbered. */ emit_rr(as, XO_GROUP5, XOg_CALL, RID_RET); emit_loadu64(as, RID_RET, (uint64_t)target); diff --git a/src/lj_jit.h b/src/lj_jit.h index 102ba0b4b7..05a8e9bbe9 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -104,14 +104,6 @@ /* -- JIT engine parameters ----------------------------------------------- */ -#if LJ_TARGET_WINDOWS || LJ_64 -/* See: https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 */ -#define JIT_P_sizemcode_DEFAULT 64 -#else -/* Could go as low as 4K, but the mmap() overhead would be rather high. */ -#define JIT_P_sizemcode_DEFAULT 32 -#endif - /* Optimization parameters and their defaults. Length is a char in octal! */ #define JIT_PARAMDEF(_) \ _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ @@ -131,9 +123,9 @@ _(\011, recunroll, 2) /* Min. unroll for true recursion. */ \ \ /* Size of each machine code area (in KBytes). */ \ - _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ + _(\011, sizemcode, 64) \ /* Max. total size of all machine code areas (in KBytes). */ \ - _(\010, maxmcode, 512) \ + _(\010, maxmcode, 2048) \ /* End of list. */ enum { @@ -374,10 +366,14 @@ enum { LJ_K64_2P63, /* 2^63 */ LJ_K64_M2P64, /* -2^64 */ #endif +#endif +#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 + LJ_K64_VM_EXIT_HANDLER, + LJ_K64_VM_EXIT_INTERP, #endif LJ_K64__MAX, }; -#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) +#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS) enum { #if LJ_TARGET_X86ORX64 @@ -393,6 +389,10 @@ enum { #if LJ_TARGET_MIPS64 LJ_K32_2P63, /* 2^63 */ LJ_K32_M2P64, /* -2^64 */ +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 + LJ_K32_VM_EXIT_HANDLER, + LJ_K32_VM_EXIT_INTERP, #endif LJ_K32__MAX }; @@ -513,6 +513,7 @@ typedef struct jit_State { MCode *mcbot; /* Bottom of current mcode area. */ size_t szmcarea; /* Size of current mcode area. */ size_t szallmcarea; /* Total size of all allocated mcode areas. */ + uintptr_t mcmin, mcmax; /* Mcode allocation range. */ TValue errinfo; /* Additional info element for trace errors. */ diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 2b8ac2df58..c3032f4e2d 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -63,31 +63,46 @@ void lj_mcode_sync(void *start, void *end) #if LJ_HASJIT +#if LUAJIT_SECURITY_MCODE != 0 +/* Protection twiddling failed. Probably due to kernel security. */ +static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) +{ + lua_CFunction panic = J2G(J)->panic; + if (panic) { + lua_State *L = J->L; + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); + panic(L); + } + exit(EXIT_FAILURE); +} +#endif + #if LJ_TARGET_WINDOWS #define MCPROT_RW PAGE_READWRITE #define MCPROT_RX PAGE_EXECUTE_READ #define MCPROT_RWX PAGE_EXECUTE_READWRITE -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) +static void *mcode_alloc_at(uintptr_t hint, size_t sz, DWORD prot) { - void *p = LJ_WIN_VALLOC((void *)hint, sz, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); - if (!p && !hint) - lj_trace_err(J, LJ_TRERR_MCODEAL); - return p; + return LJ_WIN_VALLOC((void *)hint, sz, + MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); } -static void mcode_free(jit_State *J, void *p, size_t sz) +static void mcode_free(void *p, size_t sz) { - UNUSED(J); UNUSED(sz); + UNUSED(sz); VirtualFree(p, 0, MEM_RELEASE); } -static int mcode_setprot(void *p, size_t sz, DWORD prot) +static void mcode_setprot(jit_State *J, void *p, size_t sz, DWORD prot) { +#if LUAJIT_SECURITY_MCODE != 0 DWORD oprot; - return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); + if (!LJ_WIN_VPROTECT(p, sz, prot, &oprot)) mcode_protfail(J); +#else + UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot); +#endif } #elif LJ_TARGET_POSIX @@ -117,33 +132,33 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #define MCPROT_CREATE 0 #endif -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) +static void *mcode_alloc_at(uintptr_t hint, size_t sz, int prot) { void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS|MCMAP_CREATE, -1, 0); - if (p == MAP_FAILED) { - if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); - p = NULL; + if (p == MAP_FAILED) return NULL; #if MCMAP_CREATE - } else { - pthread_jit_write_protect_np(0); + pthread_jit_write_protect_np(0); #endif - } return p; } -static void mcode_free(jit_State *J, void *p, size_t sz) +static void mcode_free(void *p, size_t sz) { - UNUSED(J); munmap(p, sz); } -static int mcode_setprot(void *p, size_t sz, int prot) +static void mcode_setprot(jit_State *J, void *p, size_t sz, int prot) { +#if LUAJIT_SECURITY_MCODE != 0 #if MCMAP_CREATE + UNUSED(J); UNUSED(p); UNUSED(sz); pthread_jit_write_protect_np((prot & PROT_EXEC)); return 0; #else - return mprotect(p, sz, prot); + if (mprotect(p, sz, prot)) mcode_protfail(J); +#endif +#else + UNUSED(J); UNUSED(p); UNUSED(sz); UNUSED(prot); #endif } @@ -153,6 +168,49 @@ static int mcode_setprot(void *p, size_t sz, int prot) #endif +#ifdef LUAJIT_MCODE_TEST +/* Test wrapper for mcode allocation. DO NOT ENABLE in production! Try: +** LUAJIT_MCODE_TEST=hhhhhhhhhhhhhhhh luajit -jv main.lua +** LUAJIT_MCODE_TEST=F luajit -jv main.lua +*/ +static void *mcode_alloc_at_TEST(jit_State *J, uintptr_t hint, size_t sz, int prot) +{ + static int test_ofs = 0; + static const char *test_str; + if (!test_str) { + test_str = getenv("LUAJIT_MCODE_TEST"); + if (!test_str) test_str = ""; + } + switch (test_str[test_ofs]) { + case 'a': /* OK for one allocation. */ + test_ofs++; + /* fallthrough */ + case '\0': /* EOS: OK for any further allocations. */ + break; + case 'h': /* Ignore one hint. */ + test_ofs++; + /* fallthrough */ + case 'H': /* Ignore any further hints. */ + hint = 0u; + break; + case 'r': /* Randomize one hint. */ + test_ofs++; + /* fallthrough */ + case 'R': /* Randomize any further hints. */ + hint = lj_prng_u64(&J2G(J)->prng) & ~(uintptr_t)0xffffu; + hint &= ((uintptr_t)1 << (LJ_64 ? 47 : 31)) - 1; + break; + case 'f': /* Fail one allocation. */ + test_ofs++; + /* fallthrough */ + default: /* 'F' or unknown: Fail any further allocations. */ + return NULL; + } + return mcode_alloc_at(hint, sz, prot); +} +#define mcode_alloc_at(hint, sz, prot) mcode_alloc_at_TEST(J, hint, sz, prot) +#endif + /* -- MCode area protection ----------------------------------------------- */ #if LUAJIT_SECURITY_MCODE == 0 @@ -174,7 +232,7 @@ static int mcode_setprot(void *p, size_t sz, int prot) static void mcode_protect(jit_State *J, int prot) { - UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); + UNUSED(J); UNUSED(prot); } #else @@ -190,24 +248,11 @@ static void mcode_protect(jit_State *J, int prot) #define MCPROT_GEN MCPROT_RW #define MCPROT_RUN MCPROT_RX -/* Protection twiddling failed. Probably due to kernel security. */ -static LJ_NORET LJ_NOINLINE void mcode_protfail(jit_State *J) -{ - lua_CFunction panic = J2G(J)->panic; - if (panic) { - lua_State *L = J->L; - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_JITPROT)); - panic(L); - } - exit(EXIT_FAILURE); -} - /* Change protection of MCode area. */ static void mcode_protect(jit_State *J, int prot) { if (J->mcprot != prot) { - if (LJ_UNLIKELY(mcode_setprot(J->mcarea, J->szmcarea, prot))) - mcode_protfail(J); + mcode_setprot(J, J->mcarea, J->szmcarea, prot); J->mcprot = prot; } } @@ -216,47 +261,74 @@ static void mcode_protect(jit_State *J, int prot) /* -- MCode area allocation ----------------------------------------------- */ -#if LJ_64 -#define mcode_validptr(p) (p) -#else -#define mcode_validptr(p) ((p) && (uintptr_t)(p) < 0xffff0000) -#endif - #ifdef LJ_TARGET_JUMPRANGE -/* Get memory within relative jump distance of our code in 64 bit mode. */ -static void *mcode_alloc(jit_State *J, size_t sz) +#define MCODE_RANGE64 ((1u << LJ_TARGET_JUMPRANGE) - 0x10000u) + +/* Set a memory range for mcode allocation with addr in the middle. */ +static void mcode_setrange(jit_State *J, uintptr_t addr) { - /* Target an address in the static assembler code (64K aligned). - ** Try addresses within a distance of target-range/2+1MB..target+range/2-1MB. - ** Use half the jump range so every address in the range can reach any other. - */ #if LJ_TARGET_MIPS - /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & - ~(uintptr_t)0x0fffffffu) + 0x08000000u; + /* Use the whole 256MB-aligned region. */ + J->mcmin = addr & ~(uintptr_t)((1u << LJ_TARGET_JUMPRANGE) - 1); + J->mcmax = J->mcmin + (1u << LJ_TARGET_JUMPRANGE); #else - uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; + /* Every address in the 64KB-aligned range should be able to reach + ** any other, so MCODE_RANGE64 is only half the (signed) branch range. + */ + J->mcmin = (addr - (MCODE_RANGE64 >> 1) + 0xffffu) & ~(uintptr_t)0xffffu; + J->mcmax = J->mcmin + MCODE_RANGE64; #endif - const uintptr_t range = (1u << (LJ_TARGET_JUMPRANGE-1)) - (1u << 21); - /* First try a contiguous area below the last one. */ - uintptr_t hint = J->mcarea ? (uintptr_t)J->mcarea - sz : 0; - int i; - /* Limit probing iterations, depending on the available pool size. */ - for (i = 0; i < LJ_TARGET_JUMPRANGE; i++) { - if (mcode_validptr(hint)) { - void *p = mcode_alloc_at(J, hint, sz, MCPROT_GEN); - - if (mcode_validptr(p) && - ((uintptr_t)p + sz - target < range || target - (uintptr_t)p < range)) - return p; - if (p) mcode_free(J, p, sz); /* Free badly placed area. */ - } - /* Next try probing 64K-aligned pseudo-random addresses. */ + /* Avoid wrap-around and the 64KB corners. */ + if (addr < J->mcmin || !J->mcmin) J->mcmin = 0x10000u; + if (addr > J->mcmax) J->mcmax = ~(uintptr_t)0xffffu; +} + +/* Check if an address is in range of the mcode allocation range. */ +static LJ_AINLINE int mcode_inrange(jit_State *J, uintptr_t addr, size_t sz) +{ + /* Take care of unsigned wrap-around of addr + sz, too. */ + return addr >= J->mcmin && addr + sz >= J->mcmin && addr + sz <= J->mcmax; +} + +/* Get memory within a specific jump range in 64 bit mode. */ +static void *mcode_alloc(jit_State *J, size_t sz) +{ + uintptr_t hint; + int i = 0, j; + if (!J->mcmin) /* Place initial range near the interpreter code. */ + mcode_setrange(J, (uintptr_t)(void *)lj_vm_exit_handler); + else if (!J->mcmax) /* Switch to a new range (already flushed). */ + goto newrange; + /* First try a contiguous area below the last one (if in range). */ + hint = (uintptr_t)J->mcarea - sz; + if (!mcode_inrange(J, hint, sz)) /* Also takes care of NULL J->mcarea. */ + goto probe; + for (; i < 16; i++) { + void *p = mcode_alloc_at(hint, sz, MCPROT_GEN); + if (mcode_inrange(J, (uintptr_t)p, sz)) + return p; /* Success. */ + else if (p) + mcode_free(p, sz); /* Free badly placed area. */ + probe: + /* Next try probing 64KB-aligned pseudo-random addresses. */ + j = 0; do { - hint = lj_prng_u64(&J2G(J)->prng) & ((1u<mcmin + (lj_prng_u64(&J2G(J)->prng) & MCODE_RANGE64); + if (++j > 15) goto fail; + } while (!mcode_inrange(J, hint, sz)); + } +fail: + if (!J->mcarea) { /* Switch to a new range now. */ + void *p; + newrange: + p = mcode_alloc_at(0, sz, MCPROT_GEN); + if (p) { + mcode_setrange(J, (uintptr_t)p + (sz >> 1)); + return p; /* Success. */ + } + } else { + J->mcmax = 0; /* Switch to a new range after the flush. */ } lj_trace_err(J, LJ_TRERR_MCODEAL); /* Give up. OS probably ignores hints? */ return NULL; @@ -269,15 +341,13 @@ static void *mcode_alloc(jit_State *J, size_t sz) { #if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP /* Allow better executable memory allocation for OpenBSD W^X mode. */ - void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); - if (p && mcode_setprot(p, sz, MCPROT_GEN)) { - mcode_free(J, p, sz); - return NULL; - } - return p; + void *p = mcode_alloc_at(0, sz, MCPROT_RUN); + if (p) mcode_setprot(J, p, sz, MCPROT_GEN); #else - return mcode_alloc_at(J, 0, sz, MCPROT_GEN); + void *p = mcode_alloc_at(0, sz, MCPROT_GEN); #endif + if (!p) lj_trace_err(J, LJ_TRERR_MCODEAL); + return p; } #endif @@ -289,7 +359,6 @@ static void mcode_allocarea(jit_State *J) { MCode *oldarea = J->mcarea; size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10; - sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); J->mcarea = (MCode *)mcode_alloc(J, sz); J->szmcarea = sz; J->mcprot = MCPROT_GEN; @@ -311,7 +380,7 @@ void lj_mcode_free(jit_State *J) MCode *next = ((MCLink *)mc)->next; size_t sz = ((MCLink *)mc)->size; lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); - mcode_free(J, mc, sz); + mcode_free(mc, sz); mc = next; } } @@ -347,32 +416,25 @@ void lj_mcode_abort(jit_State *J) MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) { if (finish) { -#if LUAJIT_SECURITY_MCODE if (J->mcarea == ptr) mcode_protect(J, MCPROT_RUN); - else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) - mcode_protfail(J); -#endif + else + mcode_setprot(J, ptr, ((MCLink *)ptr)->size, MCPROT_RUN); return NULL; } else { - MCode *mc = J->mcarea; + uintptr_t base = (uintptr_t)J->mcarea, addr = (uintptr_t)ptr; /* Try current area first to use the protection cache. */ - if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { -#if LUAJIT_SECURITY_MCODE + if (addr >= base && addr < base + J->szmcarea) { mcode_protect(J, MCPROT_GEN); -#endif - return mc; + return (MCode *)base; } /* Otherwise search through the list of MCode areas. */ for (;;) { - mc = ((MCLink *)mc)->next; - lj_assertJ(mc != NULL, "broken MCode area chain"); - if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { -#if LUAJIT_SECURITY_MCODE - if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) - mcode_protfail(J); -#endif - return mc; + base = (uintptr_t)(((MCLink *)base)->next); + lj_assertJ(base != 0, "broken MCode area chain"); + if (addr >= base && addr < base + ((MCLink *)base)->size) { + mcode_setprot(J, (MCode *)base, ((MCLink *)base)->size, MCPROT_GEN); + return (MCode *)base; } } } @@ -384,7 +446,6 @@ void lj_mcode_limiterr(jit_State *J, size_t need) size_t sizemcode, maxmcode; lj_mcode_abort(J); sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10; - sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1); maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10; if (need * sizeof(MCode) > sizemcode) lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */ diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index d0bbc5a5fb..947545f821 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h @@ -190,6 +190,7 @@ typedef enum ARMIns { ARMI_LDRSB = 0xe01000d0, ARMI_LDRSH = 0xe01000f0, ARMI_LDRD = 0xe00000d0, + ARMI_LDRL = 0xe51f0000, ARMI_STR = 0xe4000000, ARMI_STRB = 0xe4400000, ARMI_STRH = 0xe00000b0, @@ -200,6 +201,7 @@ typedef enum ARMIns { ARMI_BL = 0xeb000000, ARMI_BLX = 0xfa000000, ARMI_BLXr = 0xe12fff30, + ARMI_BX = 0xe12fff10, /* ARMv6 */ ARMI_REV = 0xe6bf0f30, diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h index 30aff47882..3113d1410a 100644 --- a/src/lj_target_arm64.h +++ b/src/lj_target_arm64.h @@ -110,6 +110,7 @@ typedef struct { static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ + if ((LJ_LE ? p[1] >> 28 : p[1] & 0xf) == 0xf) p++; /* Skip A64I_LDRx. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index 5a1b5a7cca..58f311884f 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -115,6 +115,7 @@ typedef struct { static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) { while (*p == 0x60000000) p++; /* Skip PPCI_NOP. */ + if (p[3] == 0x4e800421) p += 2; /* Indirect branch PPCI_BCTRL. */ return p + 3 + exitno; } /* Avoid dependence on lj_jit.h if only including lj_target.h. */ diff --git a/src/lj_trace.c b/src/lj_trace.c index 0e948e8d08..3e2cd0b393 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -343,6 +343,14 @@ void lj_trace_initstate(global_State *g) J->k32[LJ_K32_M2P64] = 0xdf800000; #endif #endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS32 + J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler; + J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp; +#endif +#if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_VM_EXIT_HANDLER].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_handler, 0); + J->k64[LJ_K64_VM_EXIT_INTERP].u64 = (uintptr_t)lj_ptr_sign((void *)lj_vm_exit_interp, 0); +#endif } /* Free everything associated with the JIT compiler state. */ @@ -637,10 +645,15 @@ static int trace_abort(jit_State *J) J->cur.traceno = 0; } L->top--; /* Remove error object */ - if (e == LJ_TRERR_DOWNREC) + if (e == LJ_TRERR_DOWNREC) { return trace_downrec(J); - else if (e == LJ_TRERR_MCODEAL) + } else if (e == LJ_TRERR_MCODEAL) { + if (!J->mcarea) { /* Disable JIT compiler if first mcode alloc fails. */ + J->flags &= ~JIT_F_ON; + lj_dispatch_update(J2G(J)); + } lj_trace_flushall(L); + } return 0; } From 578c41ceb73bdf9d97f23c9e0342f8d027c08e77 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 8 Nov 2025 15:41:42 +0100 Subject: [PATCH 29/32] ARM64: Enable unaligned accesses if indicated by the toolchain. If you get a crash in jit_init() then you need to fix your toolchain. --- src/lib_jit.c | 9 +++++++++ src/lj_arch.h | 4 ++++ src/lj_asm_arm64.h | 3 ++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/lib_jit.c b/src/lib_jit.c index 1b74d957b5..0f75c5ac64 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -723,7 +723,16 @@ static void jit_init(lua_State *L) jit_State *J = L2J(L); J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); +#if LJ_TARGET_UNALIGNED + G(L)->tmptv.u64 = U64x(0000504d,4d500000); +#endif lj_dispatch_update(G(L)); +#if LJ_TARGET_UNALIGNED + /* If you get a crash below then your toolchain indicates unaligned + ** accesses are OK, but your kernel disagrees. I.e. fix your toolchain. + */ + if (*(uint32_t *)((char *)&G(L)->tmptv + 2) != 0x504d4d50u) L->top = NULL; +#endif } #endif diff --git a/src/lj_arch.h b/src/lj_arch.h index 799f9c6cc3..5f3880680b 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -304,6 +304,10 @@ #define LJ_PAGESIZE 16384 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#if __ARM_FEATURE_UNALIGNED +#define LJ_TARGET_UNALIGNED 1 +#endif + #define LJ_ARCH_VERSION 80 #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h index 085f935728..fdcff1db24 100644 --- a/src/lj_asm_arm64.h +++ b/src/lj_asm_arm64.h @@ -1056,7 +1056,8 @@ static void asm_fstore(ASMState *as, IRIns *ir) static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); } From fdf2379ccba1eb68ff07f8bc48541568f5bbdfbf Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Sat, 8 Nov 2025 16:54:00 +0100 Subject: [PATCH 30/32] macOS: Change Mach-O object file layout required by XCode 15.0. Reported by George Zhao. #1404 --- src/jit/bcsave.lua | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index a30a34b6be..e4ca19779d 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -465,9 +465,11 @@ typedef struct { mach_segment_command_64 seg; mach_section_64 sec; mach_symtab_command sym; +} mach_obj_64; +typedef struct { mach_nlist_64 sym_entry; uint8_t space[4096]; -} mach_obj_64; +} mach_obj_64_tail; ]] local symname = '_'..LJBC_PREFIX..ctx.modname local cputype, cpusubtype = 0x01000007, 3 @@ -479,7 +481,10 @@ typedef struct { -- Create Mach-O object and fill in header. local o = ffi.new("mach_obj_64") - local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, 8) + local t = ffi.new("mach_obj_64_tail") + local ofs_bc = ffi.sizeof(o) + local sz_bc = aligned(#s, 8) + local ofs_sym = ofs_bc + sz_bc -- Fill in sections and symbols. o.hdr.magic = 0xfeedfacf @@ -491,7 +496,7 @@ typedef struct { o.seg.cmd = 0x19 o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec) o.seg.vmsize = #s - o.seg.fileoff = mach_size + o.seg.fileoff = ofs_bc o.seg.filesize = #s o.seg.maxprot = 1 o.seg.initprot = 1 @@ -499,22 +504,23 @@ typedef struct { ffi.copy(o.sec.sectname, "__data") ffi.copy(o.sec.segname, "__DATA") o.sec.size = #s - o.sec.offset = mach_size + o.sec.offset = ofs_bc o.sym.cmd = 2 o.sym.cmdsize = ffi.sizeof(o.sym) - o.sym.symoff = ffi.offsetof(o, "sym_entry") + o.sym.symoff = ofs_sym o.sym.nsyms = 1 - o.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry) + o.sym.stroff = ofs_sym + ffi.offsetof(t, "space") o.sym.strsize = aligned(#symname+2, 8) - o.sym_entry.type = 0xf - o.sym_entry.sect = 1 - o.sym_entry.strx = 1 - ffi.copy(o.space+1, symname) + t.sym_entry.type = 0xf + t.sym_entry.sect = 1 + t.sym_entry.strx = 1 + ffi.copy(t.space+1, symname) -- Write Mach-O object file. local fp = savefile(output, "wb") - fp:write(ffi.string(o, mach_size)) - bcsave_tail(fp, output, s) + fp:write(ffi.string(o, ofs_bc)) + fp:write(s, ("\0"):rep(sz_bc - #s)) + bcsave_tail(fp, output, ffi.string(t, ffi.offsetof(t, "space") + o.sym.strsize)) end local function bcsave_obj(ctx, output, s) From 5c647754a687a910ef40a097fbf8f7415561c8aa Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Nov 2025 18:11:26 +0100 Subject: [PATCH 31/32] Run VM events and finalizers in separate state. Reported by Sergey Kaplun. #1403 --- src/lj_gc.c | 18 +++++----- src/lj_obj.h | 2 ++ src/lj_parse.c | 4 +-- src/lj_state.c | 1 + src/lj_trace.c | 91 +++++++++++++++++++++++------------------------- src/lj_vmevent.c | 5 +++ src/lj_vmevent.h | 22 ++++++------ 7 files changed, 76 insertions(+), 67 deletions(-) diff --git a/src/lj_gc.c b/src/lj_gc.c index d9581d20d3..c779d583e9 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -106,6 +106,7 @@ static void gc_mark_start(global_State *g) setgcrefnull(g->gc.weak); gc_markobj(g, mainthread(g)); gc_markobj(g, tabref(mainthread(g)->env)); + gc_markobj(g, vmthread(g)); gc_marktv(g, &g->registrytv); gc_mark_gcroot(g); g->gc.state = GCSpropagate; @@ -507,24 +508,25 @@ static void gc_call_finalizer(global_State *g, lua_State *L, uint8_t oldh = hook_save(g); GCSize oldt = g->gc.threshold; int errcode; + lua_State *VL = vmthread(g); TValue *top; lj_trace_abort(g); hook_entergc(g); /* Disable hooks and new traces during __gc. */ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ - top = L->top; - copyTV(L, top++, mo); + top = VL->top; + copyTV(VL, top++, mo); if (LJ_FR2) setnilV(top++); - setgcV(L, top, o, ~o->gch.gct); - L->top = top+1; - errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */ + setgcV(VL, top, o, ~o->gch.gct); + VL->top = top+1; + errcode = lj_vm_pcall(VL, top, 1+0, -1); /* Stack: |mo|o| -> | */ + setgcref(g->cur_L, obj2gco(L)); hook_restore(g, oldh); if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = oldt; /* Restore GC threshold. */ if (errcode) { - ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ - lj_vmevent_send(L, ERRFIN, - copyTV(L, L->top++, restorestack(L, errobj)); + lj_vmevent_send(g, ERRFIN, + copyTV(V, V->top++, L->top-1); ); L->top--; } diff --git a/src/lj_obj.h b/src/lj_obj.h index 855727bfab..73b186e256 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -647,6 +647,7 @@ typedef struct global_State { TValue tmptv, tmptv2; /* Temporary TValues. */ Node nilnode; /* Fallback 1-element hash part (nil key and value). */ TValue registrytv; /* Anchor for registry. */ + GCRef vmthref; /* Link to VM thread. */ GCupval uvhead; /* Head of double-linked list of all open upvalues. */ int32_t hookcount; /* Instruction hook countdown. */ int32_t hookcstart; /* Start count for instruction hook counter. */ @@ -663,6 +664,7 @@ typedef struct global_State { } global_State; #define mainthread(g) (&gcref(g->mainthref)->th) +#define vmthread(g) (&gcref(g->vmthref)->th) #define niltv(L) \ check_exp(tvisnil(&G(L)->nilnode.val), &G(L)->nilnode.val) #define niltvg(g) \ diff --git a/src/lj_parse.c b/src/lj_parse.c index e326432abb..181ce4d7e2 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1593,8 +1593,8 @@ static GCproto *fs_finish(LexState *ls, BCLine line) fs_fixup_line(fs, pt, (void *)((char *)pt + ofsli), numline); fs_fixup_var(ls, pt, (uint8_t *)((char *)pt + ofsdbg), ofsvar); - lj_vmevent_send(L, BC, - setprotoV(L, L->top++, pt); + lj_vmevent_send(G(L), BC, + setprotoV(V, V->top++, pt); ); L->top--; /* Pop table of constants. */ diff --git a/src/lj_state.c b/src/lj_state.c index fb6d41a5f9..9d4fdcee3a 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -202,6 +202,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) #endif lj_trace_initstate(g); lj_err_verify(); + setgcref(g->vmthref, obj2gco(lj_state_new(L))); return NULL; } diff --git a/src/lj_trace.c b/src/lj_trace.c index 3e2cd0b393..47d7faa5c9 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -296,8 +296,8 @@ int lj_trace_flushall(lua_State *L) /* Free the whole machine code and invalidate all exit stub groups. */ lj_mcode_free(J); memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup)); - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "flush")); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "flush")); ); return 0; } @@ -416,7 +416,6 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) /* Start tracing. */ static void trace_start(jit_State *J) { - lua_State *L; TraceNo traceno; if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ @@ -466,20 +465,19 @@ static void trace_start(jit_State *J) J->ktrace = 0; setgcref(J->cur.startpt, obj2gco(J->pt)); - L = J->L; - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "start")); - setintV(L->top++, traceno); - setfuncV(L, L->top++, J->fn); - setintV(L->top++, proto_bcpos(J->pt, J->pc)); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "start")); + setintV(V->top++, traceno); + setfuncV(V, V->top++, J->fn); + setintV(V->top++, proto_bcpos(J->pt, J->pc)); if (J->parent) { - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); + setintV(V->top++, J->parent); + setintV(V->top++, J->exitno); } else { BCOp op = bc_op(*J->pc); if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { - setintV(L->top++, J->exitno); /* Parent of stitched trace. */ - setintV(L->top++, -1); + setintV(V->top++, J->exitno); /* Parent of stitched trace. */ + setintV(V->top++, -1); } } ); @@ -494,7 +492,6 @@ static void trace_stop(jit_State *J) GCproto *pt = &gcref(J->cur.startpt)->pt; TraceNo traceno = J->cur.traceno; GCtrace *T = J->curfinal; - lua_State *L; switch (op) { case BC_FORL: @@ -551,11 +548,10 @@ static void trace_stop(jit_State *J) J->postproc = LJ_POST_NONE; trace_save(J, T); - L = J->L; - lj_vmevent_send(L, TRACE, - setstrV(L, L->top++, lj_str_newlit(L, "stop")); - setintV(L->top++, traceno); - setfuncV(L, L->top++, J->fn); + lj_vmevent_send(J2G(J), TRACE, + setstrV(V, V->top++, lj_str_newlit(V, "stop")); + setintV(V->top++, traceno); + setfuncV(V, V->top++, J->fn); ); } @@ -610,18 +606,17 @@ static int trace_abort(jit_State *J) /* Is there anything to abort? */ traceno = J->cur.traceno; if (traceno) { - ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */ J->cur.link = 0; J->cur.linktype = LJ_TRLINK_NONE; - lj_vmevent_send(L, TRACE, + lj_vmevent_send(J2G(J), TRACE, cTValue *bot = tvref(L->stack)+LJ_FR2; cTValue *frame; const BCIns *pc; BCPos pos = 0; - setstrV(L, L->top++, lj_str_newlit(L, "abort")); - setintV(L->top++, traceno); + setstrV(V, V->top++, lj_str_newlit(V, "abort")); + setintV(V->top++, traceno); /* Find original Lua function call to generate a better error message. */ - for (frame = J->L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { + for (frame = L->base-1, pc = J->pc; ; frame = frame_prev(frame)) { if (isluafunc(frame_func(frame))) { pos = proto_bcpos(funcproto(frame_func(frame)), pc); break; @@ -633,10 +628,10 @@ static int trace_abort(jit_State *J) pc = frame_pc(frame) - 1; } } - setfuncV(L, L->top++, frame_func(frame)); - setintV(L->top++, pos); - copyTV(L, L->top++, restorestack(L, errobj)); - copyTV(L, L->top++, &J->errinfo); + setfuncV(V, V->top++, frame_func(frame)); + setintV(V->top++, pos); + copyTV(V, V->top++, L->top-1); + copyTV(V, V->top++, &J->errinfo); ); /* Drop aborted trace after the vmevent (which may still access it). */ setgcrefnull(J->trace[traceno]); @@ -692,16 +687,16 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) case LJ_TRACE_RECORD: trace_pendpatch(J, 0); setvmstate(J2G(J), RECORD); - lj_vmevent_send_(L, RECORD, + lj_vmevent_send_(J2G(J), RECORD, /* Save/restore state for trace recorder. */ TValue savetv = J2G(J)->tmptv; TValue savetv2 = J2G(J)->tmptv2; TraceNo parent = J->parent; ExitNo exitno = J->exitno; - setintV(L->top++, J->cur.traceno); - setfuncV(L, L->top++, J->fn); - setintV(L->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1); - setintV(L->top++, J->framedepth); + setintV(V->top++, J->cur.traceno); + setfuncV(V, V->top++, J->fn); + setintV(V->top++, J->pt ? (int32_t)proto_bcpos(J->pt, J->pc) : -1); + setintV(V->top++, J->framedepth); , J2G(J)->tmptv = savetv; J2G(J)->tmptv2 = savetv2; @@ -839,23 +834,23 @@ static TValue *trace_exit_cp(lua_State *L, lua_CFunction dummy, void *ud) #ifndef LUAJIT_DISABLE_VMEVENT /* Push all registers from exit state. */ -static void trace_exit_regs(lua_State *L, ExitState *ex) +static void trace_exit_regs(lua_State *V, ExitState *ex) { int32_t i; - setintV(L->top++, RID_NUM_GPR); - setintV(L->top++, RID_NUM_FPR); + setintV(V->top++, RID_NUM_GPR); + setintV(V->top++, RID_NUM_FPR); for (i = 0; i < RID_NUM_GPR; i++) { if (sizeof(ex->gpr[i]) == sizeof(int32_t)) - setintV(L->top++, (int32_t)ex->gpr[i]); + setintV(V->top++, (int32_t)ex->gpr[i]); else - setnumV(L->top++, (lua_Number)ex->gpr[i]); + setnumV(V->top++, (lua_Number)ex->gpr[i]); } #if !LJ_SOFTFP for (i = 0; i < RID_NUM_FPR; i++) { - setnumV(L->top, ex->fpr[i]); - if (LJ_UNLIKELY(tvisnan(L->top))) - setnanV(L->top); - L->top++; + setnumV(V->top, ex->fpr[i]); + if (LJ_UNLIKELY(tvisnan(V->top))) + setnanV(V->top); + V->top++; } #endif } @@ -897,6 +892,8 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) #ifdef EXITSTATE_PCREG J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); +#else + UNUSED(ex); #endif T = traceref(J, J->parent); UNUSED(T); #ifdef EXITSTATE_CHECKEXIT @@ -917,11 +914,11 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */ if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) - lj_vmevent_send(L, TEXIT, - lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); - trace_exit_regs(L, ex); + lj_vmevent_send(G(L), TEXIT, + lj_state_checkstack(V, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); + setintV(V->top++, J->parent); + setintV(V->top++, J->exitno); + trace_exit_regs(V, ex); ); pc = exd.pc; diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c index 070c6144aa..8913ead946 100644 --- a/src/lj_vmevent.c +++ b/src/lj_vmevent.c @@ -38,6 +38,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) { global_State *g = G(L); + lua_State *oldL = gco2th(gcref(g->cur_L)); uint8_t oldmask = g->vmevmask; uint8_t oldh = hook_save(g); int status; @@ -51,6 +52,10 @@ void lj_vmevent_call(lua_State *L, ptrdiff_t argbase) fputs(tvisstr(L->top) ? strVdata(L->top) : "?", stderr); fputc('\n', stderr); } + setgcref(g->cur_L, obj2gco(oldL)); +#if LJ_HASJIT + G2J(g)->L = oldL; +#endif hook_restore(g, oldh); if (g->vmevmask != VMEVENT_NOCACHE) g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */ diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h index 8a99536068..cdd4f75825 100644 --- a/src/lj_vmevent.h +++ b/src/lj_vmevent.h @@ -32,23 +32,25 @@ typedef enum { } VMEvent; #ifdef LUAJIT_DISABLE_VMEVENT -#define lj_vmevent_send(L, ev, args) UNUSED(L) -#define lj_vmevent_send_(L, ev, args, post) UNUSED(L) +#define lj_vmevent_send(g, ev, args) UNUSED(g) +#define lj_vmevent_send_(g, ev, args, post) UNUSED(g) #else -#define lj_vmevent_send(L, ev, args) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ +#define lj_vmevent_send(g, ev, args) \ + if ((g)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ + lua_State *V = vmthread(g); \ + ptrdiff_t argbase = lj_vmevent_prepare(V, LJ_VMEVENT_##ev); \ if (argbase) { \ args \ - lj_vmevent_call(L, argbase); \ + lj_vmevent_call(V, argbase); \ } \ } -#define lj_vmevent_send_(L, ev, args, post) \ - if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ - ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \ +#define lj_vmevent_send_(g, ev, args, post) \ + if ((g)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \ + lua_State *V = vmthread(g); \ + ptrdiff_t argbase = lj_vmevent_prepare(V, LJ_VMEVENT_##ev); \ if (argbase) { \ args \ - lj_vmevent_call(L, argbase); \ + lj_vmevent_call(V, argbase); \ post \ } \ } From c94312d348e3530b369b4e517fce4c65df6cd270 Mon Sep 17 00:00:00 2001 From: Mike Pall Date: Mon, 10 Nov 2025 18:15:11 +0100 Subject: [PATCH 32/32] FFI: Avoid dangling cts->L. Reported by ZumiKua. #1405 --- src/lj_state.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lj_state.c b/src/lj_state.c index ecf1519865..18a55e7227 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -304,6 +304,10 @@ lua_State *lj_state_new(lua_State *L) void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) { lua_assert(L != mainthread(g)); +#if LJ_HASFFI + if (ctype_ctsG(g) && ctype_ctsG(g)->L == L) /* Avoid dangling cts->L. */ + ctype_ctsG(g)->L = mainthread(g); +#endif lj_func_closeuv(L, tvref(L->stack)); lua_assert(gcref(L->openupval) == NULL); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);