Skip to content

Commit c5b2cb1

Browse files
committed
Lazily allocate and initialise JTs. The goal is to support very large numbers of threads (JTALIGNBDY=1<<31, say, or perhaps just 26 for the sake of the rwlocks) while still having minimal startup resource usage on normal-size machines. I haven't added any allowance for decommitting these; presumably, if you need O(big) threads then you need O(big) threads, so it just doesn't seem very important
1 parent 7e6989b commit c5b2cb1

File tree

18 files changed

+209
-150
lines changed

18 files changed

+209
-150
lines changed

dllsrc/jdll.c

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -733,25 +733,15 @@ void getpath(HINSTANCE hi, C* path)
733733
}
734734
#endif
735735

736-
// create a memory heap of the given size, allocate a JST in it, and store the address
737-
// of the heap into jt->heap so that the JE can do memory allocations from it
738-
JS heapinit(int size)
736+
// create a skeletal JS--just good enough to do basic initialisation
737+
JS heapinit()
739738
{
740-
HANDLE h;
741-
JS jt;
742-
743-
h = HeapCreate(0, size, 0);
744-
if(!h) return 0;
745-
jt = HeapAlloc(h, 0, sizeof(JST)+JTALIGNBDY-1);
746-
if(!jt)
747-
{
748-
HeapDestroy(h);
749-
return 0;
750-
}
751-
jt = (JS)(((I)jt+JTALIGNBDY-1)&-JTALIGNBDY); // force to SDRAM page boundary
752-
mvc(sizeof(JST),jt,1,MEMSET00);
753-
JT(jt,heap) = h;
754-
return jt;
739+
JS jt=jmreserve(sizeof(JST),__builtin_ctz(JTALIGNBDY));
740+
if(!jt)R 0; //no address space
741+
I sz=(I)&jt->threaddata[2]-(I)jt; // #relevant bytes: just JS and the first JT
742+
if(!jmcommit(jt,sz)){jmrelease(jt,sizeof(JST));R 0;} //no memory
743+
mvc(sz,jt,1,MEMSET00);
744+
R jt;
755745
}
756746

757747
int WINAPI DllMain (HINSTANCE hDLL, DWORD dwReason, LPVOID lpReserved)
@@ -778,9 +768,9 @@ int WINAPI DllMain (HINSTANCE hDLL, DWORD dwReason, LPVOID lpReserved)
778768
// just enough to do GA(). The rest of jt is never used
779769
getpath(0, modulepath);
780770
getpath(hDLL, dllpath);
781-
g_jt=heapinit(10000); // just enough for a few allocations
771+
g_jt=heapinit()
782772
if(!g_jt) R 0; // abort if no memory
783-
if(!jtglobinit(g_jt)) {HeapDestroy(g_jt->heap); g_jt=0; R 0;}; // free & abort if initialization error
773+
if(!jtglobinit(g_jt)) {jmrelease(g_jt,sizeof(JST)); g_jt=0; R 0;}; // free & abort if initialization error
784774
// The g_jt heap MUST NOT be freed, because it holds the blocks pointed to by initialized globals.
785775
// g_jt itself, a JST struct, is not used. Perhaps it could be freed, as long as the rest of the heap remains.
786776
break;
@@ -792,7 +782,7 @@ int WINAPI DllMain (HINSTANCE hDLL, DWORD dwReason, LPVOID lpReserved)
792782
break;
793783

794784
case DLL_PROCESS_DETACH:
795-
if(g_jt) HeapDestroy(g_jt->heap);
785+
if(g_jt){jmrelease(g_jt,sizeof(JST));g_jt=0;}
796786
break;
797787
}
798788
return TRUE;

jsrc/ct.c

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ extern int numberOfCores;
88
// burn some time, approximately n nanoseconds
99
NOINLINE I johnson(I n){I johnson=0x1234; if(n<0)R n; do{johnson ^= (johnson<<1) ^ johnson>>(BW-1);}while(--n); R johnson&-256;} // return low byte 0
1010
#if PYXES
11-
#define delay(n) {if(__builtin_constant_p(n)){if(n>36)DONOUNROLL(n/36,_mm_pause();)else johnson(n);}else if(unlikely(n>36))DONOUNROLL((n-7)/36,_mm_pause();)else johnson(n);}
11+
#define delay(n) {if(__builtin_constant_p(n)){if(n>36)DONOUNROLL(n/36,_mm_pause();)else johnson(n);}else if(uncommon(n>36))DONOUNROLL((n-7)/36,_mm_pause();)else johnson(n);}
1212
#else
1313
#define delay(n)
1414
#endif
@@ -86,7 +86,7 @@ I jtextendunderlock(J jt, A *abuf, US *alock, I flags){A z;
8686
// wakeallct keeps track of the number of wakealls being processed. When this is nonzero, a waiter must not allow the block pointed to by futexwt to go away. And, it must
8787
// consider that wakeall may have sampled futexwt before it was cleared. So, the waiter must wait for wakeallct to go to 0 before exiting.
8888
// Only a couple of threads can call wakeall (the leader, and a JBreak), so 1 byte suffices for wakeallct.
89-
void wakeall(J jt){aadd(&JT(jt,wakeallct),1); UI4 *wta; DONOUNROLL(MAXTHREADS,if((wta=JTTHREAD0(jt)[i].futexwt)!=0){aadd(wta,0x10000); jfutex_wakea(wta);} ) aadd(&JT(jt,wakeallct),(UC)-1);}
89+
void wakeall(J jt){aadd(&JT(jt,wakeallct),1); UI4 *wta; DONOUNROLL(NALLTHREADS(jt),if((wta=JTTHREAD0(jt)[i].futexwt)!=0){aadd(wta,0x10000); jfutex_wakea(wta);} ) aadd(&JT(jt,wakeallct),(UC)-1);}
9090
#else
9191
void wakeall(J jt){}
9292
#endif
@@ -104,11 +104,11 @@ A jtsystemlock(J jt,I priority,A (*lockedfunction)(J)){A z;
104104
// Process the request. We don't know what the highest-priority request is until we have heard from all the
105105
// threads. Thus, it is possible that our request will still be pending whe we finish. In that case, loop till it is satisfied
106106
while(priority!=0){
107-
S xxx=0; I leader=__atomic_compare_exchange_n(&JT(jt,systemlock), &xxx, (S)1, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED); // go to state 1; set leader if we are the first to do so
107+
I leader=__atomic_compare_exchange_n(&JT(jt,systemlock), &(S){0}, (S)1, 0, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED); // go to state 1; set leader if we are the first to do so
108108
I nrunning=0; JTT *jjbase=JTTHREAD0(jt); // #running threads, base of thread blocks
109109
// In the leader task only, go through all tasks (including master), turning on the SYSLOCK task flag in each thread. Count how many are running after the flag is set
110110
// Also, wake up all tasks that are in a loop that needs interrupting on system action. Those loops will honor it when we are in state 1/2
111-
if(leader){DONOUNROLL(MAXTHREADS, nrunning+=(__atomic_fetch_or(&jjbase[i].taskstate,TASKSTATELOCKACTIVE,__ATOMIC_ACQ_REL)>>TASKSTATERUNNINGX)&1;) wakeall(jt);}
111+
if(leader){DONOUNROLL(NALLTHREADS(jt), nrunning+=(__atomic_fetch_or(&jjbase[i].taskstate,TASKSTATELOCKACTIVE,__ATOMIC_ACQ_REL)>>TASKSTATERUNNINGX)&1;) wakeall(jt);}
112112
// state 2: lock requesters indicate request priority and we wait for all tasks to come to a stop. We wake all threads that are waiting on pyx/mutex
113113
C oldpriority; DOINSTATE(leader,2,oldpriority=__atomic_fetch_or(&JT(jt,adbreak)[1],priority,__ATOMIC_ACQ_REL);) // remember priority before we made our request
114114
// state 3: all threads get the final request priorities
@@ -132,7 +132,7 @@ A jtsystemlock(J jt,I priority,A (*lockedfunction)(J)){A z;
132132
if(executor){
133133
__atomic_store_n(&((C*)&JT(jt,breakbytes))[1],0,__ATOMIC_RELEASE); // clear the error flag from the interrupt request
134134
// go through all threads, turning off SYSLOCK in each. This allows other tasks to run and new tasks to start
135-
DO(MAXTHREADS, __atomic_fetch_and(&jjbase[i].taskstate,~TASKSTATELOCKACTIVE,__ATOMIC_ACQ_REL);)
135+
DO(NALLTHREADS(jt), __atomic_fetch_and(&jjbase[i].taskstate,~TASKSTATELOCKACTIVE,__ATOMIC_ACQ_REL);)
136136
// set the systemlock to 0, completing the operation
137137
__atomic_store_n(&JT(jt,systemlock),0,__ATOMIC_RELEASE);
138138
}else{
@@ -339,7 +339,7 @@ typedef struct jobstruct {
339339
// we use the 6 LSBs of jobq->ht[0] as the lock, so that when we get the lock we also have the job pointer. The job is always on a cacheline boundary
340340
// We take JOBLOCK before taking the mutex, always. By measurement (20220516 SkylakeX, 4 cores) the job lock keeps contention low until the tasks are < 400ns
341341
// long, while using the mutex gives out at < 1000ns
342-
_Static_assert(MAXTHREADS<64,"JOBLOCK fails if > 63 threads");
342+
_Static_assert(MAXTHREADSINPOOL<64,"JOBLOCK fails if > 63 threads");
343343
#define JOBLOCK(jobq) ({I z; if(unlikely(((z=__atomic_fetch_add((I*)&jobq->ht[0],1,__ATOMIC_ACQ_REL))&(CACHELINESIZE-1))!=0))z=joblock(jobq); (JOB*)z; })
344344
#define JOBUNLOCK(jobq,oldh) __atomic_store_n(&jobq->ht[0],oldh,__ATOMIC_RELEASE);
345345
static NOINLINE I joblock(JOBQ *jobq){I z;
@@ -796,6 +796,10 @@ ASSERT(0,EVNONCE)
796796
WRITELOCK(JT(jt,flock)) // nwthreads is protected by flock
797797
resthread=THREADIDFORWORKER(JT(jt,nwthreads)); // number of current worker threads. Next worker is nwthreads; convert worker# to thread#
798798
ASSERTSUFF(resthread<MAXTHREADS,EVLIMIT,WRITEUNLOCK(JT(jt,flock)); R 0;); // error if new 0-origin thread# exceeds limit
799+
if(!jmcommit(JTFORTHREAD(jt,resthread),sizeof(JTT))){ // attempt to commit thread data (in case it's not already committed); if failed, then bail
800+
WRITEUNLOCK(JT(jt,flock));
801+
ASSERT(0,EVWSFULL);}
802+
if(unlikely(!jtjinitt(JTFORTHREAD(jt,resthread)))){WRITEUNLOCK(JT(jt,flock)); R 0;} // initialise thread-local state
799803
if(!(__atomic_load_n(&JTFORTHREAD(jt,resthread)->taskstate,__ATOMIC_ACQUIRE)&TASKSTATETERMINATE))break;
800804
WRITEUNLOCK(JT(jt,flock)) // release lock for next poll
801805
if(unlikely(lda(&JT(jt,adbreak)[1]))!=0){jtsystemlockaccept(jt,LOCKALL);}else{YIELD} // allow syslock if requested; otherwise let other threads run

jsrc/dsusp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ F1(jtdbc){UC k;
328328
if(AN(w)){
329329
// turn debugging on/off in all threads
330330
JTT *jjbase=JTTHREAD0(jt); // base of thread blocks
331-
DONOUNROLL(MAXTHREADS, if(k&1)__atomic_fetch_or(&jjbase[i].uflags.trace,TRACEDB1,__ATOMIC_ACQ_REL);else __atomic_fetch_and(&jjbase[i].uflags.trace,~TRACEDB1,__ATOMIC_ACQ_REL);) JT(jt,dbuser)=k;
331+
DONOUNROLL(NALLTHREADS(jt), if(k&1)__atomic_fetch_or(&jjbase[i].uflags.trace,TRACEDB1,__ATOMIC_ACQ_REL);else __atomic_fetch_and(&jjbase[i].uflags.trace,~TRACEDB1,__ATOMIC_ACQ_REL);) JT(jt,dbuser)=k;
332332
#if USECSTACK
333333
jt->cstackmin=jt->cstackinit-((CSTACKSIZE-CSTACKRESERVE)>>k);
334334
#else

jsrc/dtoa.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3398,8 +3398,7 @@ d2a_Malloc
33983398
#endif
33993399

34003400
// called only at initialization, so no ras() needed
3401-
B jtecvtinit(JS jjt, I nthreads) {A x; struct dtoa_info *di;
3402-
I threadno; for(threadno=0;threadno<nthreads;++threadno){JJ jt=&jjt->threaddata[threadno];
3401+
B jtecvtinit(J jt) {A x; struct dtoa_info *di;
34033402
GATV0(x, LIT, sizeof(struct dtoa_info), 1);
34043403
di=(struct dtoa_info*)AV(x);
34053404
di->_p5s=0;
@@ -3408,7 +3407,6 @@ B jtecvtinit(JS jjt, I nthreads) {A x; struct dtoa_info *di;
34083407
mvc( sizeof(di->_freelist),di->_freelist,1,MEMSET00);
34093408
di->jt=jt; // remember thread pointer in case further ga() needed
34103409
jt->dtoa=di;
3411-
}
34123410
R 1;
34133411
}
34143412

jsrc/i.c

Lines changed: 58 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ JS gjt=0; // JPF debug - convenience debug single process - points to shared are
5454
// The jt we are given is a throwaway, needed ONLY so we can allocate some A blocks here. Anything stored
5555
// into jt will never be used. jinit3 will later be called with the real jt, to initialize it
5656
B jtglobinit(JS jjt){A x,y;J jt=MTHREAD(jjt); // initialize in master thread
57-
jtmeminit(jjt,1); // init allocation queues & tpop stack, master thread only
57+
jtmeminits(jjt);
58+
jtmeminitt(jt); // init allocation queues & tpop stack, master thread only
5859
RZ(mnuvxynam[0]=makename("m",0));
5960
RZ(mnuvxynam[1]=makename("n",0));
6061
RZ(mnuvxynam[2]=makename("u",NAMEBYVALUE)); // uv (and thus u. v.) must be defined when used in an explicit def. Otherwise OK
@@ -81,7 +82,7 @@ B jtglobinit(JS jjt){A x,y;J jt=MTHREAD(jjt); // initialize in master thread
8182
R 1;
8283
}
8384

84-
static B jtevinit(JS jjt,I nthreads){A q,*v;JJ jt=MTHREAD(jjt);
85+
static B jtevinit(JS jjt){A q,*v;JJ jt=MTHREAD(jjt);
8586
GA10(q,BOX,1+NEVM); v=AAV(q);
8687
DO(AN(q), v[i]=mtv;);
8788
v[EVALLOC ]=INCORPNA(cstr("allocation error" ));
@@ -119,14 +120,13 @@ static B jtevinit(JS jjt,I nthreads){A q,*v;JJ jt=MTHREAD(jjt);
119120
v[EVTHROW ]=INCORPNA(cstr("uncaught throw." ));
120121
v[EVTIME ]=INCORPNA(cstr("time limit" ));
121122
v[EVVALUE ]=INCORPNA(cstr("value error" ));
122-
// no more v[EVCONCURRENCY]=INCORPNA(cstr("concurrency error" ));
123123
ACINITZAPRECUR(q,BOX); INITJT(jjt,evm)=q; // q and its contents are not on tstack; this way the contents are freed on assignment
124124
if(jt->jerr){printf("evinit failed; error %hhi\n", jt->jerr); R 0;} else R 1;
125125
}
126126

127127
/* static void sigflpe(int k){jsignal(EVDOMAIN); signal(SIGFPE,sigflpe);} */
128128

129-
static B jtconsinit(JS jjt,I nthreads){D y;JJ jt=MTHREAD(jjt);
129+
static B jtconsinits(JS jjt){D y;JJ jt=MTHREAD(jjt);
130130
// This is an initialization routine, so memory allocations performed here are NOT
131131
// automatically freed by tpop()
132132
#if AUDITCOMPILER
@@ -167,40 +167,53 @@ if(((-1) >> 1) != -1)*(I *)4 = 104;
167167
jt->fdepn=NFDEP;
168168
#endif
169169
MTHREAD(jjt)->threadpoolno=-1; // the master thread is in no pool, ever
170-
171-
I threadno; for(threadno=0;threadno<nthreads;++threadno){jt=&jjt->threaddata[threadno];
172-
RESETRANK; // init both ranks to RMAX
173-
jt->ppn=6; // default precision for printf
174-
jt->fcalln=NFCALL;
175-
jt->cct= 1.0-FUZZ;
176-
jt->xmode=XMEXACT;
170+
R 1;
171+
}
172+
173+
static B jtconsinitt(J jt){
174+
RESETRANK; // init both ranks to RMAX
175+
jt->ppn=6; // default precision for printf
176+
jt->fcalln=NFCALL;
177+
jt->cct= 1.0-FUZZ;
178+
jt->xmode=XMEXACT;
177179
// create an initial stack, so that stack[-1] can be used for saving error messages
178-
jt->parserstackframe.parserstkbgn=jt->parserstackframe.parserstkend1=&jt->initparserstack[1]; // ensure valid error stack after final return
179-
}
180+
jt->parserstackframe.parserstkbgn=jt->parserstackframe.parserstkend1=&jt->initparserstack[1]; // ensure valid error stack after final return
180181
R 1;
181182
}
182183

183-
static B jtbufferinit(JS jjt,I nthreads){
184-
INITJT(jjt,breakfn)=malloc(NPATH); memset(INITJT(jjt,breakfn),0,NPATH); // place to hold the break filename
185-
I threadno; for(threadno=0;threadno<nthreads;++threadno){JJ jt=&jjt->threaddata[threadno];
186-
jt->etx=malloc(1+NETX); // error-message buffer
187-
jt->callstack=(LS *)malloc(sizeof(LS)*(1+NFCALL)); // function-call stack
188-
jt->rngdata=(RNG*)(((I)malloc(sizeof(RNG)+CACHELINESIZE)+CACHELINESIZE-1)&-CACHELINESIZE); mvc(sizeof(RNG),jt->rngdata,1,MEMSET00); // place to hold RNG data, aligned to cacheline
189-
}
184+
// initialise shared buffers
185+
static B jtbufferinits(JS jjt){
186+
R !!(INITJT(jjt,breakfn)=calloc(1,NPATH)); // place to hold the break filename
187+
}
188+
189+
// initialise thread-local buffers for thread threadno. Requires synchronisation
190+
B jtbufferinitt(J jt){
191+
RZ(jt->etx=malloc(1+NETX)); // error-message buffer
192+
RZ(jt->callstack=malloc(sizeof(LS)*(1+NFCALL))); // function-call stack
193+
RZ(jt->rngdata=aligned_malloc(sizeof(RNG),CACHELINESIZE)); // place to hold RNG data, aligned to cacheline
194+
memset(jt->rngdata,0,sizeof(RNG));
190195
R 1;
191196
}
192197

193-
// We have completed initial allocation. Everything allocated so far will not be freed by a tpop, because
194-
// tpop() isn't called during initialization. So, to keep the memory auditor happy, we reset ttop so that it doesn't
195-
// look like those symbols have a free outstanding.
196-
// This also has the effect that buffers allocated during init do not need ra() to protect them, since they have no free outstanding
197-
static B jtinitfinis(JS jjt,I nthreads){
198-
I threadno; for(threadno=0;threadno<nthreads;++threadno){JJ jt=&jjt->threaddata[threadno];
199-
jt->tnextpushp=(A*)(((I)jt->tstackcurr+NTSTACKBLOCK)&(-NTSTACKBLOCK))+1; // first store is to entry 1 of the first block
200-
}
198+
// We have completed initial allocation. Everything allocated so far will not be freed by a tpop, because
199+
// tpop() isn't called during initialization. So, to keep the memory auditor happy, we reset ttop so that it doesn't
200+
// look like those symbols have a free outstanding.
201+
// This also has the effect that buffers allocated during init do not need ra() to protect them, since they have no free outstanding
202+
static B jtinitfinis(J jt){
203+
jt->tnextpushp=(A*)(((I)jt->tstackcurr+NTSTACKBLOCK)&(-NTSTACKBLOCK))+1; // first store is to entry 1 of the first block
201204
R 1;
202205
}
203206

207+
// Initialise thread-specific data for jt. Idempotent. Requires synchronisation.
208+
C jtjinitt(J jt){
209+
if(jt->etx)R 1; // already initialised; ok
210+
RZ(jtbufferinitt(jt)); // init thread-local buffers
211+
RZ(jtmeminitt(jt));
212+
RZ(jtconsinitt(jt));
213+
RZ(jtrnginit(jt)); // thread only
214+
RZ(jtecvtinit(jt));
215+
RZ(jtinitfinis(jt));
216+
R 1;}
204217

205218
// initialize the master thread for a new instance. This fills in the JS block, which will remain
206219
// for the duration of the instance. It also fills in the JJ block for each thread
@@ -226,24 +239,26 @@ static C jtjinit3(JS jjt){S t;JJ jt=MTHREAD(jjt);
226239
#endif
227240
// only crashing on startup INITJT(jjt,peekdata)=1; // wake up auditing
228241
// Initialize subsystems in order. Each initializes all threads, if there are thread variables
229-
RZ(jtbufferinit(jjt,MAXTHREADS)); // init the buffers pointed to by jjt
230-
RZ(jtmeminit(jjt,MAXTHREADS));
231-
RZ(jtsesminit(jjt,MAXTHREADS)); // master only
232-
RZ(jtcdinit(jjt,MAXTHREADS)); // master only
233-
RZ(jtevinit(jjt,MAXTHREADS)); // master only
234-
RZ(jtconsinit(jjt,MAXTHREADS));
235-
RZ(jtxsinit(jjt,MAXTHREADS)); // must be before symbinit master only
236-
RZ(jtsymbinit(jjt,MAXTHREADS)); // must be after consinit master only - global/locsyms must init at start of op
237-
RZ(jtparseinit(jjt,MAXTHREADS));
238-
RZ(jtxoinit(jjt,MAXTHREADS)); // master only
239-
RZ(jtsbtypeinit(jjt,MAXTHREADS)); // master only
240-
RZ(jtrnginit(jjt,MAXTHREADS));
242+
RZ(jtbufferinits(jjt)); // init the buffers pointed to by jjt
243+
RZ(jtbufferinitt(jt)); // init thread-local buffers
244+
RZ(jtmeminits(jjt));
245+
RZ(jtmeminitt(jt));
246+
RZ(jtsesminit(jjt,1)); // master only. scaf pointless?
247+
RZ(jtcdinit(jjt)); // master only
248+
RZ(jtevinit(jjt)); // master only
249+
RZ(jtconsinits(jjt));
250+
RZ(jtconsinitt(jt));
251+
RZ(jtxsinit(jjt)); // must be before symbinit master only
252+
RZ(jtsymbinit(jjt)); // must be after consinit master only - global/locsyms must init at start of op
253+
RZ(jtxoinit(jjt)); // master only
254+
RZ(jtsbtypeinit(jjt)); // master only
255+
RZ(jtrnginit(jt));
241256
// #if (SYS & SYS_DOS+SYS_MACINTOSH+SYS_UNIX)
242257
#if (SYS & SYS_DOS+SYS_MACINTOSH)
243-
RZ(jtxlinit(jjt,MAXTHREADS)); // file info, master only
258+
RZ(jtxlinit(jjt)); // file info, master only
244259
#endif
245-
RZ(jtecvtinit(jjt,MAXTHREADS));
246-
RZ(jtinitfinis(jjt,MAXTHREADS));
260+
RZ(jtecvtinit(jt));
261+
RZ(jtinitfinis(jt));
247262
R 1;
248263
}
249264

jsrc/io.c

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -713,33 +713,30 @@ void jsto(JS jt,I type,C*s){C e;I ex;
713713
C dll_initialized= 0; // dll init sets to 1
714714

715715
// dll init on load - eqivalent to windows DLLMAIN DLL_ATTACH_PROOCESS
716-
__attribute__((constructor)) static void Initializer(int argc, char** argv, char** envp)
717-
{
716+
__attribute__((constructor)) static void Initializer(){
718717
// Initialize J globals. This is done only once. Many of the globals are in static memory, initialized
719718
// by the compiler; some must be initialized a run-time in static memory; some must be allocated into A blocks
720719
// pointed to by static names. Because of the A blocks, we have to perform a skeletal initialization of jt,
721720
// just enough to do ga(). The rest of jt is never used
722-
JS jtnobdy=malloc(sizeof(JST)+JTALIGNBDY-1);
723-
if(!jtnobdy) R;
724-
JS jt = (JS)(((I)jtnobdy+JTALIGNBDY-1)&-JTALIGNBDY); // force to SDRAM page boundary
725-
mvc(sizeof(JST),jt,1,MEMSET00);
726-
if(!jtglobinit(jt)){free(jtnobdy); R;}
727-
dll_initialized= 1; JT(jt,heap)=(void *)jtnobdy; // save allo address for later free
728-
// The g_jt heap MUST NOT be freed, because it holds the blocks pointed to by initialized globals.
729-
// g_jt itself, a JST struct, is not used. Perhaps it could be freed, as long as the rest of the heap remains.
721+
JS jt=jmreservea(sizeof(JST),__builtin_ctz(JTALIGNBDY));
722+
if(!jt)R;
723+
I sz=offsetof(JST,threaddata[1]); // #relevant bytes: just JS and the first JT
724+
if(!jmcommit(jt,sz)){jmrelease(jt,sizeof(JST));R;}
725+
if(!jtglobinit(jt)){jmrelease(jt,sizeof(JST)); R;}
726+
dll_initialized=1;
727+
jmrelease(jt,sizeof(JST)); //the jt block itself can be released; we effectively orphan any blocks pointed to there by, because they are used by the globals we've just initialised
730728
}
731729

732730
// Init for a new J instance. Globals have already been initialized.
733731
// Create a new jt, which will be the one we use for the entirety of the instance.
734732
JS _stdcall JInit(void){
735-
if(!dll_initialized) R 0; // constructor failed
736-
JS jtnobdy;
737-
RZ(jtnobdy=malloc(sizeof(JST)+JTALIGNBDY-1));
738-
JS jt = (JS)(((I)jtnobdy+JTALIGNBDY-1)&-JTALIGNBDY); // force to SDRAM page boundary
739-
mvc(sizeof(JST),jt,1,MEMSET00);
733+
if(!dll_initialized)R 0; // constructor failed
734+
JS jt=jmreservea(sizeof(JST),__builtin_ctz(JTALIGNBDY));
735+
if(!jt)R 0;
736+
if(!jmcommit(jt,offsetof(JST,threaddata[1]))){jmrelease(jt,sizeof(JST));R 0;}
737+
mvc(offsetof(JST,threaddata[1]),jt,1,MEMSET00);
740738
// Initialize all the info for the shared region and the master thread
741-
if(!jtjinit2(jt,0,0)){free(jtnobdy); R 0;};
742-
JT(jt,heap)=(void *)jtnobdy; // save allo address for later free
739+
if(!jtjinit2(jt,0,0)){jmrelease(jt,sizeof(JST)); R 0;}
743740
R jt; // R (JS)MTHREAD(jt);
744741
}
745742

@@ -753,7 +750,7 @@ int _stdcall JFree(JS jt){
753750
#if PYXES
754751
aligned_free(JT(jt,jobqueue));
755752
#endif
756-
free(JT(jt,heap)); // free the initial allocation
753+
jmrelease(jt,sizeof(JST)); // free the initial allocation
757754
R 0;
758755
}
759756
#endif

0 commit comments

Comments
 (0)