@@ -407,41 +407,13 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
407407 Some ( ( width, signed) ) => match name {
408408 sym:: ctlz => self . count_leading_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
409409
410- sym:: cttz => {
411- let func = self . current_func ( ) ;
412- let then_block = func. new_block ( "then" ) ;
413- let else_block = func. new_block ( "else" ) ;
414- let after_block = func. new_block ( "after" ) ;
415-
416- let arg = args[ 0 ] . immediate ( ) ;
417- let result = func. new_local ( None , self . u32_type , "zeros" ) ;
418- let zero = self . cx . gcc_zero ( arg. get_type ( ) ) ;
419- let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
420- self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
421-
422- let zero_result = self . cx . gcc_uint ( self . u32_type , width) ;
423- then_block. add_assignment ( None , result, zero_result) ;
424- then_block. end_with_jump ( None , after_block) ;
425-
426- // NOTE: since jumps were added in a place
427- // count_leading_zeroes() does not expect, the current block
428- // in the state need to be updated.
429- self . switch_to_block ( else_block) ;
430-
431- let zeros = self . count_trailing_zeroes ( width, arg) ;
432- self . llbb ( ) . add_assignment ( None , result, zeros) ;
433- self . llbb ( ) . end_with_jump ( None , after_block) ;
434-
435- // NOTE: since jumps were added in a place rustc does not
436- // expect, the current block in the state need to be updated.
437- self . switch_to_block ( after_block) ;
438-
439- result. to_rvalue ( )
440- }
441410 sym:: ctlz_nonzero => {
442411 self . count_leading_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) )
443412 }
444- sym:: cttz_nonzero => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
413+ sym:: cttz => self . count_trailing_zeroes ( width, args[ 0 ] . immediate ( ) ) ,
414+ sym:: cttz_nonzero => {
415+ self . count_trailing_zeroes_nonzero ( width, args[ 0 ] . immediate ( ) )
416+ }
445417 sym:: ctpop => self . pop_count ( args[ 0 ] . immediate ( ) ) ,
446418 sym:: bswap => {
447419 if width == 8 {
@@ -984,16 +956,46 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
984956 self . context . new_cast ( self . location , res, result_type)
985957 }
986958
987- fn count_trailing_zeroes ( & mut self , _width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
988- let arg_type = arg. get_type ( ) ;
959+ fn count_trailing_zeroes ( & mut self , width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
960+ // if arg is 0, early return width, else call count_trailing_zeroes_nonzero to compute trailing zeros
961+ let func = self . current_func ( ) ;
962+ let then_block = func. new_block ( "then" ) ;
963+ let else_block = func. new_block ( "else" ) ;
964+ let after_block = func. new_block ( "after" ) ;
965+
966+ let result = func. new_local ( None , self . u32_type , "zeros" ) ;
967+ let zero = self . cx . gcc_zero ( arg. get_type ( ) ) ;
968+ let cond = self . gcc_icmp ( IntPredicate :: IntEQ , arg, zero) ;
969+ self . llbb ( ) . end_with_conditional ( None , cond, then_block, else_block) ;
970+
971+ let zero_result = self . cx . gcc_uint ( self . u32_type , width) ;
972+ then_block. add_assignment ( None , result, zero_result) ;
973+ then_block. end_with_jump ( None , after_block) ;
974+
975+ // NOTE: since jumps were added in a place count_trailing_zeroes_nonzero() does not expect,
976+ // the current block in the state need to be updated.
977+ self . switch_to_block ( else_block) ;
978+
979+ let zeros = self . count_trailing_zeroes_nonzero ( width, arg) ;
980+ self . llbb ( ) . add_assignment ( None , result, zeros) ;
981+ self . llbb ( ) . end_with_jump ( None , after_block) ;
982+
983+ // NOTE: since jumps were added in a place rustc does not
984+ // expect, the current block in the state need to be updated.
985+ self . switch_to_block ( after_block) ;
986+
987+ result. to_rvalue ( )
988+ }
989+
990+ fn count_trailing_zeroes_nonzero ( & mut self , _width : u64 , arg : RValue < ' gcc > ) -> RValue < ' gcc > {
989991 let result_type = self . u32_type ;
992+ let arg_type = arg. get_type ( ) ;
990993 let arg = if arg_type. is_signed ( self . cx ) {
991- let new_type = arg_type. to_unsigned ( self . cx ) ;
992- self . gcc_int_cast ( arg, new_type )
994+ arg_type = arg_type. to_unsigned ( self . cx ) ;
995+ self . gcc_int_cast ( arg, arg_type )
993996 } else {
994997 arg
995998 } ;
996- let arg_type = arg. get_type ( ) ;
997999 let ( count_trailing_zeroes, expected_type) =
9981000 // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
9991001 // instead of using is_uint().
@@ -1008,50 +1010,44 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
10081010 ( "__builtin_ctzll" , self . cx . ulonglong_type )
10091011 }
10101012 else if arg_type. is_u128 ( self . cx ) {
1011- // Adapted from the algorithm to count leading zeroes from: https://stackoverflow.com/a/28433850/389119
1012- let array_type = self . context . new_array_type ( None , arg_type, 3 ) ;
1013+ // arg is guaranteed to no be 0, so either its 64 high or 64 low bits are not 0
1014+ // __buildin_ctzll is UB when called with 0, so call it on the 64 low bits if they are not 0,
1015+ // else call it on the 64 high bits and add 64. In the else case, 64 high bits can't be 0
1016+ // because arg is not 0.
1017+
10131018 let result = self . current_func ( )
1014- . new_local ( None , array_type, "count_loading_zeroes_results" ) ;
1019+ . new_local ( None , result_type, "count_trailing_zeroes_results" ) ;
1020+
1021+ let ctlz_then_block = self . current_func ( ) . new_block ( "cttz_then" ) ;
1022+ let ctlz_else_block = self . current_func ( ) . new_block ( "cttz_else" ) ;
1023+ let ctlz_after_block = self . current_func ( ) . new_block ( "cttz_after" ) ;
1024+ let ctzll = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
10151025
1016- let sixty_four = self . gcc_int ( arg_type, 64 ) ;
1017- let shift = self . gcc_lshr ( arg, sixty_four) ;
1018- let high = self . gcc_int_cast ( shift, self . u64_type ) ;
10191026 let low = self . gcc_int_cast ( arg, self . u64_type ) ;
1027+ let sixty_four = self . const_uint ( arg_type, 64 ) ;
1028+ let shift = self . lshr ( arg, sixty_four) ;
1029+ let high = self . gcc_int_cast ( shift, self . u64_type ) ;
1030+ let zero_low = self . const_uint ( low. get_type ( ) , 0 ) ;
1031+ let cond = self . gcc_icmp ( IntPredicate :: IntNE , low, zero_low) ;
1032+ self . llbb ( ) . end_with_conditional ( self . location , cond, ctlz_then_block, ctlz_else_block) ;
1033+ self . switch_to_block ( ctlz_then_block) ;
10201034
1021- let zero = self . context . new_rvalue_zero ( self . usize_type ) ;
1022- let one = self . context . new_rvalue_one ( self . usize_type ) ;
1023- let two = self . context . new_rvalue_from_long ( self . usize_type , 2 ) ;
1035+ let result_128 =
1036+ self . gcc_int_cast ( self . context . new_call ( None , ctzll, & [ low] ) , result_type) ;
10241037
1025- let ctzll = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
1038+ ctlz_then_block. add_assignment ( self . location , result, result_128) ;
1039+ ctlz_then_block. end_with_jump ( self . location , ctlz_after_block) ;
10261040
1027- let first_elem = self . context . new_array_access ( self . location , result, zero) ;
1028- let first_value = self . gcc_int_cast ( self . context . new_call ( self . location , ctzll, & [ low] ) , arg_type) ;
1029- self . llbb ( )
1030- . add_assignment ( self . location , first_elem, first_value) ;
1031-
1032- let second_elem = self . context . new_array_access ( self . location , result, one) ;
1033- let second_value = self . gcc_add ( self . gcc_int_cast ( self . context . new_call ( self . location , ctzll, & [ high] ) , arg_type) , sixty_four) ;
1034- self . llbb ( )
1035- . add_assignment ( self . location , second_elem, second_value) ;
1036-
1037- let third_elem = self . context . new_array_access ( self . location , result, two) ;
1038- let third_value = self . gcc_int ( arg_type, 128 ) ;
1039- self . llbb ( )
1040- . add_assignment ( self . location , third_elem, third_value) ;
1041-
1042- let not_low = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , low) ;
1043- let not_high = self . context . new_unary_op ( self . location , UnaryOp :: LogicalNegate , self . u64_type , high) ;
1044- let not_low_and_not_high = not_low & not_high;
1045- let index = not_low + not_low_and_not_high;
1046- // NOTE: the following cast is necessary to avoid a GIMPLE verification failure in
1047- // gcc.
1048- // TODO(antoyo): do the correct verification in libgccjit to avoid an error at the
1049- // compilation stage.
1050- let index = self . context . new_cast ( self . location , index, self . i32_type ) ;
1051-
1052- let res = self . context . new_array_access ( self . location , result, index) ;
1053-
1054- return self . gcc_int_cast ( res. to_rvalue ( ) , result_type) ;
1041+ self . switch_to_block ( ctlz_else_block) ;
1042+ let high_trailing_zeroes =
1043+ self . gcc_int_cast ( self . context . new_call ( None , ctzll, & [ high] ) , result_type) ;
1044+
1045+ let sixty_four_result_type = self . const_uint ( result_type, 64 ) ;
1046+ let result_128 = self . add ( high_trailing_zeroes, sixty_four_result_type) ;
1047+ ctlz_else_block. add_assignment ( self . location , result, result_128) ;
1048+ ctlz_else_block. end_with_jump ( self . location , ctlz_after_block) ;
1049+ self . switch_to_block ( ctlz_after_block) ;
1050+ return result. to_rvalue ( ) ;
10551051 }
10561052 else {
10571053 let count_trailing_zeroes = self . context . get_builtin_function ( "__builtin_ctzll" ) ;
0 commit comments