@@ -151,14 +151,14 @@ bool IsStackTopMinus1InRegister (IM3Compilation o)
151151}
152152
153153
154- void MarkExecSlotAllocated (IM3Compilation o , u16 i_slot )
154+ void MarkSlotAllocated (IM3Compilation o , u16 i_slot )
155155{ d_m3Assert (o -> m3Slots [i_slot ] == 0 ); // shouldn't be already allocated
156156 o -> m3Slots [i_slot ] = 1 ;
157157 o -> numAllocatedExecSlots ++ ;
158158}
159159
160160
161- bool AllocateExecSlot (IM3Compilation o , u16 * o_execSlot )
161+ bool AllocateSlot (IM3Compilation o , u16 * o_execSlot )
162162{
163163 bool found = false;
164164
@@ -168,7 +168,7 @@ bool AllocateExecSlot (IM3Compilation o, u16 * o_execSlot)
168168 {
169169 if (o -> m3Slots [i ] == 0 )
170170 {
171- MarkExecSlotAllocated (o , i );
171+ MarkSlotAllocated (o , i );
172172 * o_execSlot = i ;
173173
174174 found = true;
@@ -177,16 +177,32 @@ bool AllocateExecSlot (IM3Compilation o, u16 * o_execSlot)
177177
178178 ++ i ;
179179 }
180- // printf ("allocate %d\n", (i32) i);
181180
182181 return found ;
183182}
184183
185184
185+ M3Result IncrementSlotUsageCount (IM3Compilation o , u16 i_slot )
186+ { d_m3Assert (i_slot < d_m3MaxFunctionStackHeight );
187+ M3Result result = m3Err_none ; d_m3Assert (o -> m3Slots [i_slot ] > 0 );
188+
189+ // OPTZ (memory): 'm3Slots' could still be fused with 'typeStack' if 4 bits were used to indicate: [0,1,2,many]. The many-case
190+ // would scan 'wasmStack' to determine the actual usage count
191+ if (o -> m3Slots [i_slot ] < 0xFF )
192+ {
193+ o -> m3Slots [i_slot ]++ ;
194+ }
195+ else result = "slot usage count overflow" ;
196+
197+ return result ;
198+ }
199+
200+
186201void DeallocateSlot (IM3Compilation o , i16 i_slotIndex )
187202{ d_m3Assert (i_slotIndex >= o -> firstSlotIndex );
188- o -> numAllocatedExecSlots -- ; d_m3Assert (o -> m3Slots [i_slotIndex ]);
189- o -> m3Slots [i_slotIndex ] -- ;
203+ d_m3Assert (o -> m3Slots [i_slotIndex ]);
204+ if (-- o -> m3Slots [i_slotIndex ] == 0 )
205+ o -> numAllocatedExecSlots -- ;
190206}
191207
192208
@@ -259,7 +275,7 @@ M3Result PreserveRegisterIfOccupied (IM3Compilation o, u8 i_registerType)
259275
260276 // and point to a exec slot
261277 u16 slot ;
262- if (AllocateExecSlot (o , & slot ))
278+ if (AllocateSlot (o , & slot ))
263279 {
264280 o -> wasmStack [stackIndex ] = slot ;
265281
@@ -408,7 +424,7 @@ M3Result _PushAllocatedSlotAndEmit (IM3Compilation o, u8 i_m3Type, bool i_doEm
408424
409425 u16 slot ;
410426
411- if (AllocateExecSlot (o , & slot ))
427+ if (AllocateSlot (o , & slot ))
412428 {
413429_ (Push (o , i_m3Type , slot ));
414430
@@ -647,7 +663,7 @@ M3Result ReturnStackTop (IM3Compilation o)
647663
648664
649665// if local is unreferenced, o_preservedSlotIndex will be equal to localIndex on return
650- M3Result IsLocalReferencedWithCurrentBlock (IM3Compilation o , u16 * o_preservedSlotIndex , u32 i_localIndex )
666+ M3Result FindReferencedLocalsWithCurrentBlock (IM3Compilation o , u16 * o_preservedSlotIndex , u32 i_localIndex )
651667{
652668 M3Result result = m3Err_none ;
653669
@@ -671,13 +687,11 @@ M3Result IsLocalReferencedWithCurrentBlock (IM3Compilation o, u16 * o_preserve
671687 {
672688 if (* o_preservedSlotIndex == i_localIndex )
673689 {
674- if (not AllocateExecSlot (o , o_preservedSlotIndex )) {
690+ if (not AllocateSlot (o , o_preservedSlotIndex ))
675691 _throw (m3Err_functionStackOverflow );
676- }
677- } else {
678- o -> m3Slots [* o_preservedSlotIndex ] += 1 ;
679- o -> numAllocatedExecSlots ++ ;
680692 }
693+ else
694+ _ (IncrementSlotUsageCount (o , * o_preservedSlotIndex ));
681695
682696 o -> wasmStack [i ] = * o_preservedSlotIndex ;
683697 }
@@ -842,7 +856,7 @@ _ (ReadLEB_u32 (& localSlot, & o->wasm, o->wasmEnd)); // printf (
842856 if (localSlot < GetFunctionNumArgsAndLocals (o -> function ))
843857 {
844858 u16 preserveSlot ;
845- _ (IsLocalReferencedWithCurrentBlock (o , & preserveSlot , localSlot )); // preserve will be different than local, if referenced
859+ _ (FindReferencedLocalsWithCurrentBlock (o , & preserveSlot , localSlot )); // preserve will be different than local, if referenced
846860
847861 if (preserveSlot == localSlot )
848862_ (CopyTopSlot (o , localSlot ))
@@ -1130,9 +1144,7 @@ _ (Pop (o));
11301144
11311145 if (numReturns )
11321146 {
1133- o -> m3Slots [execTop ] = 1 ;
1134- o -> numAllocatedExecSlots ++ ;
1135-
1147+ MarkSlotAllocated (o , execTop );
11361148_ (Push (o , i_type -> returnType , execTop ));
11371149 }
11381150
@@ -1266,62 +1278,33 @@ _ (NormalizeType (o_blockType, type)); if (* o_
12661278// (versus the COW strategy that happens in SetLocal within a block). Initially, I thought I'd have to be clever and
12671279// retroactively insert preservation code to avoid impacting general performance, but this compilation pattern doesn't
12681280// really occur in compiled Wasm code, so PreserveArgsAndLocals generally does nothing. Still waiting on a real-world case!
1269- M3Result PreserveArgsAndLocals (IM3Compilation o ) {
1281+ M3Result PreserveArgsAndLocals (IM3Compilation o )
1282+ {
12701283 M3Result result = m3Err_none ;
12711284
1272- if (o -> block .initStackIndex >= o -> stackIndex ) // return if block stack is empty.
1273- return result ;
1274-
1275- bool needed = false;
1276- u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o -> function );
1277-
1278- for (u32 i = o -> block .initStackIndex ; i < o -> stackIndex ; ++ i )
1279- {
1280- if (o -> wasmStack [i ] < numArgsAndLocals )
1281- {
1282- needed = true;
1283- break ;
1284- }
1285- }
1286-
1287- if (!needed ) // return if no references to locals.
1288- return result ;
1289-
1290- #if defined(M3_COMPILER_MSVC )
1291- u16 preservedStackIndex [128 ]; // hmm, heap allocate?...
1292-
1293- if (numArgsAndLocals > 128 )
1294- _throw ("argument/local count overflow" );
1295- #else
1296- u16 preservedStackIndex [numArgsAndLocals ];
1297- #endif
1298-
1299- memset (preservedStackIndex , 0xff , numArgsAndLocals * sizeof (u16 ));
1300-
1301- for (u32 i = o -> block .initStackIndex ; i < o -> stackIndex ; ++ i )
1285+ if (o -> stackIndex > o -> firstSlotIndex )
13021286 {
1303- if (o -> wasmStack [i ] < numArgsAndLocals )
1287+ u32 numArgsAndLocals = GetFunctionNumArgsAndLocals (o -> function );
1288+
1289+ for (u32 i = 0 ; i < numArgsAndLocals ; ++ i )
13041290 {
1305- u16 localSlot = o -> wasmStack [i ];
1306-
1307- if (preservedStackIndex [localSlot ] == 0xffff )
1291+ u16 preservedSlotIndex ;
1292+ _ (FindReferencedLocalsWithCurrentBlock (o , & preservedSlotIndex , i ));
1293+
1294+ if (preservedSlotIndex != i )
13081295 {
1309- if (not AllocateExecSlot (o , & preservedStackIndex [localSlot ]))
1310- _throw (m3Err_functionStackOverflow );
1311-
1312- _ (EmitOp (o , op_CopySlot_64 ));
1313- EmitConstant (o , preservedStackIndex [localSlot ]);
1314- EmitConstant (o , localSlot );
1315- } else {
1316- o -> m3Slots [preservedStackIndex [localSlot ]] += 1 ;
1317- o -> numAllocatedExecSlots ++ ;
1296+ u8 type = GetStackType (o , i );
1297+ IM3Operation op = Is64BitType (type ) ? op_CopySlot_64 : op_CopySlot_32 ;
1298+
1299+ EmitOp (o , op );
1300+ EmitSlotOffset (o , preservedSlotIndex );
1301+ EmitSlotOffset (o , i );
13181302 }
1319-
1320- o -> wasmStack [i ] = preservedStackIndex [localSlot ];
13211303 }
13221304 }
1323-
1324- _catch : return result ;
1305+
1306+ _catch :
1307+ return result ;
13251308}
13261309
13271310
0 commit comments