Skip to content

Commit 7459ffa

Browse files
committed
pyx timeout works. futexes might work on windows
1 parent 96b87a3 commit 7459ffa

File tree

3 files changed

+115
-84
lines changed

3 files changed

+115
-84
lines changed

jsrc/ct.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -213,26 +213,26 @@ static A jtcreatepyx(J jt, I thread,D timeout){A pyx;
213213
}
214214

215215
// w is an A holding a pyx value. Return its value when it has been resolved. If it times out
216-
A jtpyxval(J jt,A pyx){ UI4 state;
217-
if(PYXFULL==(state=lda(&((PYXBLOK*)AAV0(pyx))->state)))goto done;
218-
if(state!=PYXWAIT)if(!casa(&((PYXBLOK*)AAV0(pyx))->state,&state,PYXWAIT))goto done;
219-
UI ns=({D mwt=((PYXBLOK*)AAV0(pyx))->pyxmaxwt;mwt==inf?IMAX:(I)(mwt*1e9);});
216+
A jtpyxval(J jt,A pyx){ UI4 state;PYXBLOK *blok=(PYXBLOK*)AAV0(pyx);
217+
if(PYXFULL==(state=lda(&blok->state)))goto done;
218+
if(state!=PYXWAIT)if(!casa(&blok->state,&state,PYXWAIT))goto done;
219+
UI ns=({D mwt=blok->pyxmaxwt;mwt==inf?IMAX:(I)(mwt*1e9);});
220220
struct jtimespec end=jtmtil(ns); // get the time when we have to give up on this pyx
221221
while(1){ // repeat till defined
222-
_jfutex_waitn(&((PYXBLOK*)AAV0(pyx))->state,PYXWAIT,ns);
223-
if(lda(&((PYXBLOK*)AAV0(pyx))->state)==PYXFULL)break; // if pyx was filled, exit and return its value
222+
I wr=jfutex_waitn(&blok->state,PYXWAIT,ns);ASSERT(wr<=0,wr);
223+
if(lda(&blok->state)==PYXFULL)break; // if pyx was filled, exit and return its value
224224
I adbreak=lda((US*)&JT(jt,adbreak)[0]); // break requests
225225
// wait till the value is defined. We have to make one last check inside the lock to make sure the value is still unresolved
226226
// The wait may time out because another thread is requesting a system lock. If so, we accept it now
227227
if(unlikely(adbreak>>8)!=0){jtsystemlockaccept(jt,LOCKPRISYM+LOCKPRIPATH+LOCKPRIDEBUG); continue;} // process lock and keep waiting
228-
// or, the user may be requesting a BREAK interrupt for deadlock or other slow execution. In that case fail the pyx. It will not be deleted until the value has been stored
229-
if(unlikely(adbreak&0xff))ASSERT(0,adbreak&0xff); // JBREAK: fail the pyx and exit
230-
if(uncommon(-1==(ns=jtmdif(end)))){ //update timeout
231-
if(unlikely(inf==((PYXBLOK*)AAV0(pyx))->pyxmaxwt))ns=IMAX;
228+
// or, the user may be requesting a BREAK interrupt for deadlock or other slow execution
229+
if(unlikely((adbreak&0xff)!=0))ASSERT(0,adbreak&0xff); // JBREAK: give up on the pyx and exit
230+
if(uncommon(-1ull==(ns=jtmdif(end)))){ //update timeout
231+
if(unlikely(inf==blok->pyxmaxwt))ns=IMAX;
232232
else ASSERT(0,EVTIME);}} // fail the pyx and exit
233233
done:
234-
if(likely(!!((PYXBLOK*)AAV0(pyx))->pyxvalue))R ((PYXBLOK*)AAV0(pyx))->pyxvalue; // valid value, use it
235-
ASSERT(0,((PYXBLOK*)AAV0(pyx))->errcode);} // if error, return the error code
234+
if(likely(blok->pyxvalue!=NULL))R blok->pyxvalue; // valid value, use it
235+
ASSERT(0,blok->errcode);} // if error, return the error code
236236

237237
// ************************************* Locks **************************************
238238
// take a readlock on *alock. We come here only if a writelock was requested or running. We have incremented the readlock

jsrc/mt.c

Lines changed: 97 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,100 @@
1818

1919
#include"j.h"
2020

21+
// timing shenanigans
2122
struct jtimespec jtmtil(UI ns){
2223
struct jtimespec r=jmtclk();
23-
r.tv_sec+=ns/1000000000;r.tv_nsec+=ns%1000000000;
24-
if(r.tv_nsec>=1000000000){r.tv_sec++;r.tv_nsec-=1000000000;}
24+
r.tv_sec+=ns/1000000000ull;r.tv_nsec+=ns%1000000000ull;
25+
if(r.tv_nsec>=1000000000ll){r.tv_sec++;r.tv_nsec-=1000000000ll;}
2526
R r;}
2627
I jtmdif(struct jtimespec w){
2728
struct jtimespec t=jmtclk();
2829
if(t.tv_sec>w.tv_sec||t.tv_sec==w.tv_sec&&t.tv_nsec>=w.tv_nsec)R -1;
29-
R (w.tv_sec-t.tv_sec)*1000000000+w.tv_nsec-t.tv_nsec;}
30+
R (w.tv_sec-t.tv_sec)*1000000000ull+w.tv_nsec-t.tv_nsec;}
31+
32+
#ifdef __APPLE__
33+
void jfutex_wake1(UI4 *p){__ulock_wake(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,0);}
34+
void jfutex_wakea(UI4 *p){__ulock_wake(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO|ULF_WAKE_ALL,p,0);}
35+
C jfutex_wait(UI4 *p,UI4 v){
36+
I r=__ulock_wait(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,0);
37+
if(r>=0)R 0;
38+
if(r==-EINTR||r==-EFAULT)R 0; //EFAULT means the address needed to be paged in, not that it wasn't mapped?
39+
R EVFACE;} //should never happen?
40+
#if __arm64__
41+
// wait2 takes an ns timeout, but it's only available from macos 11 onward; coincidentally, arm macs only support macos 11+
42+
// so we can count on having this
43+
I jfutex_waitn(UI4 *p,UI4 v,UI ns){
44+
I r=R __ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,ns,0);
45+
if(r>=0)R 0;
46+
if(r==-ETIMEDOUT)R -1;
47+
if(r==-EINTR||r==-EFAULT)R 0;
48+
if(r==-ENOMEM)R EVWSFULL;//lol
49+
R EVFACE;}
50+
#else
51+
// but for the x86 case, we keep compatibility with older macos. Revisit in the future
52+
// deal with >32 bits; 2^32us is just a little over an hour; just too close for comfort
53+
I jfutex_waitn(UI4 *p,UI4 v,UI ns){I r;
54+
UI us=ns/1000;
55+
while(us>0xfffffff){
56+
r=__ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,0xffffffff,0);
57+
if(r!=-ETIMEDOUT)goto out;
58+
us-=0xffffffff;}
59+
r=__ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,us,0);
60+
out:
61+
if(r>=0)R 0;
62+
if(r==-ETIMEDOUT)R -1;
63+
if(r==-EINTR||r==-EFAULT)R 0;
64+
if(r==-ENOMEM)R EVWSFULL;
65+
R EVFACE;}
66+
#endif
67+
#elif defined(__linux__)
68+
//glibc 'syscall': stupid errno
69+
void jfutex_wake1(UI4 *p){
70+
__asm__ volatile("syscall" :: "a" (SYS_futex), //eax: syscall#
71+
"D" (p), //rdi: ptr
72+
"S" (FUTEX_WAKE), //rsi: op
73+
"d" (1));} //rdx: count
74+
void jfutex_wakea(UI4 *p){
75+
__asm__ volatile("syscall" :: "a" (SYS_futex), //eax: syscall#
76+
"D" (p), //rdi: ptr
77+
"S" (FUTEX_WAKE), //rsi: op
78+
"d" (0xffffffff));} //rdx: count
79+
C jfutex_wait(UI4 *p,UI4 v){
80+
register struct timespec *pts asm("r10") = 0;
81+
int r;__asm__ volatile("syscall" : "=a"(r) //result in rax
82+
: "a" (SYS_futex), //eax: syscall#
83+
"D" (p), //rdi: ptr
84+
"S" (FUTEX_WAIT), //rsi: op
85+
"d" (v), //rdx: espected
86+
"r" (pts)); //r10: timeout (null=no timeout)
87+
if(r>=0)R 0;
88+
if(r==-EAGAIN||r==-EINTR)R 0;
89+
R EVFACE;}
90+
I jfutex_waitn(UI4 *p,UI4 v,UI ns){
91+
struct timespec ts={.tv_sec=ns/1000000000ull, .tv_nsec=ns%1000000000ull};
92+
register struct timespec *pts asm("r10") = &ts;
93+
int r;__asm__ volatile("syscall" : "=a"(r) //result in rax
94+
: "a" (SYS_futex), //eax: syscall#
95+
"D" (p), //rdi: ptr
96+
"S" (FUTEX_WAIT), //rsi: op
97+
"d" (v), //rdx: espected
98+
"r" (pts)); //r10: timeout (relative!)
99+
if(r>=0)R 0;
100+
if(r==-ETIMEDOUT)R -1;
101+
if(r==-EAGAIN||r==-EINTR)R 0;
102+
R EVFACE;}
103+
#elif defined(_WIN32)
104+
#define WIN32_LEAN_AND_MEAN
105+
#include <windows.h>
106+
void jfutex_wake1(UI4 *p){WakeByAddressSingle(p);}
107+
void jfutex_wakea(UI4 *p){WakeByAddressAll(p);}
108+
C jfutex_wait(UI4 *p,UI4 v){R WaitOnAddress(p,&v,4,INFINITE)?0:EVFACE;}
109+
I jfutex_waitn(UI4 *p,UI4 v,UI ns){
110+
if(WaitOnAddress(p,&v,4,ns/1000000))R 0;
111+
if(GetLastError()==ERROR_TIMEOUT)R -1;
112+
//is there EINTR on windows? Does it manifest as a spurious wake with no error?
113+
R EVFACE;}
114+
#endif
30115

31116
#if defined(__APPLE__) || defined(__linux__)
32117
enum{FREE=0,LOCK=1,WAIT=2};//values for mutex->v
@@ -45,17 +130,14 @@ C jtpthread_mutex_lock(J jt,jtpthread_mutex_t *m,I self){
45130
UI4 e;if(likely((!(e=lda(&m->v)))&&((e=FREE),casa(&m->v,&e,LOCK))))goto success; //fast path. test-and-test-and-set is from glibc, mildly optimises the case when many threads swarm a locked mutex. Not sure if this is for the best, but after waffling for a bit I think it is
46131
if(e!=WAIT)e=xchga(&m->v,WAIT); //penalise the multi-waiters case, since it's slower anyway
47132
while(e!=FREE){
133+
if(JT(jt,adbreakr)[0])R EVATTN;
48134
#if __linux__
49-
I i=_jfutex_waitn(&m->v,WAIT,(UI)-1);
135+
I i=jfutex_waitn(&m->v,WAIT,(UI)-1);
50136
//bug? futex wait doesn't get interrupted by signals on linux if timeout is null
51137
#else
52138
I i=jfutex_wait(&m->v,WAIT);
53139
#endif
54-
if(uncommon(i<0)){
55-
if(i==-EINTR){if(JT(jt,adbreakr)[0])R EVATTN;}
56-
else if(i==-EOWNERDEAD)R EVCONCURRENCY;
57-
else if(i==-ENOMEM)R EVWSFULL;//lol
58-
else R EVFACE;}
140+
if(i>0)R i;
59141
e=xchga(&m->v,WAIT);} //exit when e==FREE; i.e., _we_ successfully installed WAIT in place of FREE
60142
success:m->ct+=m->recursive;m->owner=self; R 0;}
61143
I jtpthread_mutex_timedlock(J jt,jtpthread_mutex_t *m,UI ns,I self){
@@ -64,17 +146,13 @@ I jtpthread_mutex_timedlock(J jt,jtpthread_mutex_t *m,UI ns,I self){
64146
struct timespec tgt=jtmtil(ns);
65147
if(common(e!=WAIT)){e=xchga(&m->v,WAIT);if(e==FREE)goto success;} //penalise the multi-waiters case, since it's slower anyway
66148
while(1){
67-
I i=_jfutex_waitn(&m->v,WAIT,ns);
68-
if(uncommon(i==-ETIMEDOUT)); //don't penalise this case too harshly
69-
else if(unlikely(i<0)){
70-
if(i==-EINTR){if(JT(jt,adbreakr)[0])R EVATTN;}
71-
else if(i==-EOWNERDEAD)R EVCONCURRENCY;
72-
else if(i==-ENOMEM)R EVWSFULL;
73-
else R EVFACE;}
149+
if(JT(jt,adbreakr)[0])R EVATTN;
150+
I i=jfutex_waitn(&m->v,WAIT,ns);
151+
if(unlikely(i>0))R i;
74152
e=xchga(&m->v,WAIT);
75153
if(e==FREE)goto success; //exit when e==FREE; i.e., _we_ successfully installed WAIT in place of FREE
76-
if(i==-ETIMEDOUT)R -1; //if the kernel says we timed out, trust it rather than doing another syscall to check the time
77-
if(-1==(ns=jtmdif(tgt)))R -1;} //update delta, abort if timed out
154+
if(i==-1)R -1; //if the kernel says we timed out, trust it rather than doing another syscall to check the time
155+
if(-1ull==(ns=jtmdif(tgt)))R -1;} //update delta, abort if timed out
78156
success:m->ct+=m->recursive;m->owner=self; R 0;}
79157
I jtpthread_mutex_trylock(jtpthread_mutex_t *m,I self){
80158
if(uncommon(m->recursive)&&m->owner){if(m->owner!=self)R -1; m->ct++;R 0;}
@@ -87,6 +165,7 @@ C jtpthread_mutex_unlock(jtpthread_mutex_t *m,I self){
87165
if(!casa(&m->v,&(UI4){LOCK},FREE)){sta(&m->v,FREE);jfutex_wake1(&m->v);}
88166
//below is what drepper does; I think the above is always faster, but it should definitely be faster without xadd
89167
//agner sez lock xadd has one cycle better latency vs lock cmpxchg on intel ... ??
168+
//(probably that's only in the uncontended case)
90169
//if(adda(&m->v,-1)){sta(&m->v,FREE);jfutex_wake1(&m->v);}
91170
R 0;}
92171
#endif //__APPLE__

jsrc/mt.h

Lines changed: 6 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ struct jtimespec jtmtil(UI ns); //returns a time ns ns in the future
66
I jtmdif(struct jtimespec when); //returns the time in ns between now and when. If when is not in the future, the result will be -1
77
//both of these are implemented in terms of mtclk and use its clock
88

9+
__attribute__((cold)) C jfutex_wait(UI4 *p,UI4 v); //atomically, compare v to *p and go to sleep if they are equal. Return error code
10+
__attribute__((cold)) I jfutex_waitn(UI4 *p,UI4 v,UI ns); //ditto, but wake up after at most ns ns. Result -1 means timeout definitely exceeded; other result is an error code
11+
__attribute__((cold)) void jfutex_wake1(UI4 *p); //wake 1 thread waiting on p
12+
__attribute__((cold)) void jfutex_wakea(UI4 *p); //wake all threads waiting on p
13+
914
#if !defined(__APPLE__) && !defined(__linux__)
1015
#include <pthread.h>
1116
typedef pthread_mutex_t jtpthread_mutex_t;
@@ -65,36 +70,6 @@ C jtpthread_mutex_unlock(jtpthread_mutex_t*,I self); //0 or error code
6570
#if defined(__linux__)
6671
#include <linux/futex.h>
6772
#include <sys/syscall.h>
68-
//glibc 'syscall': stupid errno
69-
static inline void jfutex_wake1(UI4 *p){
70-
__asm__ volatile("syscall" :: "a" (SYS_futex), //eax: syscall#
71-
"D" (p), //rdi: ptr
72-
"S" (FUTEX_WAKE), //rsi: op
73-
"d" (1));} //rdx: count
74-
static inline void jfutex_wakea(UI4 *p){
75-
__asm__ volatile("syscall" :: "a" (SYS_futex), //eax: syscall#
76-
"D" (p), //rdi: ptr
77-
"S" (FUTEX_WAKE), //rsi: op
78-
"d" (0xffffffff));} //rdx: count
79-
static inline int jfutex_wait(UI4 *p,UI4 v){
80-
register struct timespec *pts asm("r10") = 0;
81-
int r;__asm__ volatile("syscall" : "=a"(r) //result in rax
82-
: "a" (SYS_futex), //eax: syscall#
83-
"D" (p), //rdi: ptr
84-
"S" (FUTEX_WAIT), //rsi: op
85-
"d" (v), //rdx: espected
86-
"r" (pts)); //r10: timeout (null=no timeout)
87-
R r;}
88-
static inline int _jfutex_waitn(UI4 *p,UI4 v,UI ns){
89-
struct timespec ts={.tv_sec=ns/1000000000, .tv_nsec=ns%1000000000};
90-
register struct timespec *pts asm("r10") = &ts;
91-
int r;__asm__ volatile("syscall" : "=a"(r) //result in rax
92-
: "a" (SYS_futex), //eax: syscall#
93-
"D" (p), //rdi: ptr
94-
"S" (FUTEX_WAIT), //rsi: op
95-
"d" (v), //rdx: espected
96-
"r" (pts)); //r10: timeout (relative!)
97-
R r;}
9873
#elif defined(__APPLE__)
9974
// ulock (~futex) junk from xnu. timeout=0 means wait forever
10075
extern int __ulock_wait(uint32_t operation, void *addr, uint64_t value, uint32_t timeout); // timeout in us
@@ -128,32 +103,9 @@ extern int __ulock_wake(uint32_t operation, void *addr, uint64_t wake_value);
128103

129104
//positive (or just 1?) result from wait means someone else is waiting on this too?
130105

131-
static inline void jfutex_wake1(UI4 *p){__ulock_wake(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,0);}
132-
static inline void jfutex_wakea(UI4 *p){__ulock_wake(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO|ULF_WAKE_ALL,p,0);}
133-
static inline int jfutex_wait(UI4 *p,UI4 v){R __ulock_wait(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,0);}
134-
#if __arm64__
135-
// wait2 takes an ns timeout, but it's only available from macos 11 onward; coincidentally, arm macs only support macos 11+
136-
// so we can count on having this
137-
static inline int _jfutex_waitn(UI4 *p,UI4 v,UI ns){R __ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,ns,0);}
138-
#else
139-
// but for the x86 case, we keep compatibility with older macos. Revisit in the future
140-
// deal with >32 bits; 2^32us is just a little over an hour; just too close for comfort
141-
static inline int _jfutex_waitn(UI4 *p,UI4 v,UI ns){
142-
UI us=ns/1000;
143-
while(us>0xfffffff){
144-
I4 r=__ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,0xffffffff,0);
145-
if(r!=-ETIMEDOUT)R r;
146-
us-=0xffffffff;}
147-
R __ulock_wait2(UL_COMPARE_AND_WAIT|ULF_NO_ERRNO,p,v,us,0);}
148-
#endif
149106
#elif defined(_WIN32)
150107
// untested windows path; make henry test it when he gets back from vacation
151-
#define WIN32_LEAN_AND_MEAN
152-
#include <windows.h>
153-
static inline int jfutex_wait(UI4 *p,UI4 v){R WaitOnAddress(p,&v,4,INFINITE);} //todo return wrong
154-
static inline int _jfutex_waitn(UI4 *p,UI4 v,UI ns){R WaitOnAddress(p,&v,4,ns/1000000);} //ditto
155-
static inline void jfutex_wake1(UI4 *p){WakeByAddressSingle(p);}
156-
static inline void jfutex_wakea(UI4 *p){WakeByAddressAll(p);}
108+
// don't pollute everybody with windows.h. win api is fairly basic anyway, so there is not much to take advantage of
157109
#endif //_WIN32
158110
#endif //__APPLE__ || __linux__
159111
#endif //PYXES

0 commit comments

Comments
 (0)