@@ -20,10 +20,10 @@ where
2020 /// No other assumptions should be made on the ordering of the
2121 /// elements after this computation.
2222 ///
23- /// Complexity ([quickselect](https://en.wikipedia.org/wiki/Quickselect)):
24- /// - average case: O(`n`);
25- /// - worst case: O(`n`^2);
26- /// where n is the number of elements in the array.
23+ /// This method performs a variant of [Sesquickselect] with pivot samples of 5 equally spaced
24+ /// elements around the center.
25+ ///
26+ /// [Sesquickselect]: https://www.wild-inter.net/publications/martinez-nebel-wild-2019.pdf
2727 ///
2828 /// **Panics** if `i` is greater than or equal to `n`.
2929 fn get_from_sorted_mut ( & mut self , i : usize ) -> A
4848 S : DataMut ,
4949 S2 : Data < Elem = usize > ;
5050
51- /// Partitions the array in increasing order at two skewed pivot values as 1st and 3rd element
52- /// of a sorted sample of 5 equally spaced elements around the center and returns their indexes.
53- /// For arrays of less than 42 elements the outermost elements serve as sample for pivot values.
51+ /// Sorts a sample of 5 equally spaced elements around the center and returns their indexes.
52+ ///
53+ /// **Panics** for arrays of less than 7 elements.
54+ fn sample_mut ( & mut self ) -> [ usize ; 5 ]
55+ where
56+ A : Ord + Clone ,
57+ S : DataMut ;
58+
59+ /// Partitions the array in increasing order based on the value initially
60+ /// located at `pivot_index` and returns the new index of the value.
61+ ///
62+ /// The elements are rearranged in such a way that the value initially
63+ /// located at `pivot_index` is moved to the position it would be in an
64+ /// array sorted in increasing order. The return value is the new index of
65+ /// the value after rearrangement. All elements smaller than the value are
66+ /// moved to its left and all elements equal or greater than the value are
67+ /// moved to its right. The ordering of the elements in the two partitions
68+ /// is undefined.
69+ ///
70+ /// `self` is shuffled **in place** to operate the desired partition:
71+ /// no copy of the array is allocated.
72+ ///
73+ /// The method uses Hoare's partition algorithm.
74+ /// Complexity: O(`n`), where `n` is the number of elements in the array.
75+ /// Average number of element swaps: n/6 - 1/3 (see
76+ /// [link](https://cs.stackexchange.com/questions/11458/quicksort-partitioning-hoare-vs-lomuto/11550))
77+ ///
78+ /// **Panics** if `pivot_index` is greater than or equal to `n`.
79+ ///
80+ /// # Example
81+ ///
82+ /// ```
83+ /// use ndarray::array;
84+ /// use ndarray_stats::Sort1dExt;
85+ ///
86+ /// let mut data = array![3, 1, 4, 5, 2];
87+ /// let pivot_index = 2;
88+ /// let pivot_value = data[pivot_index];
89+ ///
90+ /// // Partition by the value located at `pivot_index`.
91+ /// let new_index = data.partition_mut(pivot_index);
92+ /// // The pivot value is now located at `new_index`.
93+ /// assert_eq!(data[new_index], pivot_value);
94+ /// // Elements less than that value are moved to the left.
95+ /// for i in 0..new_index {
96+ /// assert!(data[i] < pivot_value);
97+ /// }
98+ /// // Elements greater than or equal to that value are moved to the right.
99+ /// for i in (new_index + 1)..data.len() {
100+ /// assert!(data[i] >= pivot_value);
101+ /// }
102+ /// ```
103+ fn partition_mut ( & mut self , pivot_index : usize ) -> usize
104+ where
105+ A : Ord + Clone ,
106+ S : DataMut ;
107+
108+ /// Partitions the array in increasing order based on the values initially located at the two
109+ /// pivot indexes `lower` and `upper` and returns the new indexes of their values.
54110 ///
55111 /// The elements are rearranged in such a way that the two pivot values are moved to the indexes
56112 /// they would be in an array sorted in increasing order. The return values are the new indexes
@@ -60,22 +116,24 @@ where
60116 ///
61117 /// The array is shuffled **in place**, no copy of the array is allocated.
62118 ///
63- /// This method performs [dual-pivot partitioning] with skewed pivot sampling .
119+ /// This method performs [dual-pivot partitioning].
64120 ///
65121 /// [dual-pivot partitioning]: https://www.wild-inter.net/publications/wild-2018b.pdf
66122 ///
123+ /// **Panics** if `lower` or `upper` is out of bound.
124+ ///
67125 /// # Example
68126 ///
69127 /// ```
70128 /// use ndarray::array;
71129 /// use ndarray_stats::Sort1dExt;
72130 ///
73131 /// let mut data = array![3, 1, 4, 5, 2];
74- /// // Sorted pivot values.
75- /// let (lower_value, upper_value) = (data[data.len() - 1], data[0] );
132+ /// // Skewed pivot values.
133+ /// let (lower_value, upper_value) = (1, 5 );
76134 ///
77- /// // Partitions by the values located at `0 ` and `data.len() - 1 `.
78- /// let (lower_index, upper_index) = data.partition_mut( );
135+ /// // Partitions by the values located at `1 ` and `3 `.
136+ /// let (lower_index, upper_index) = data.dual_partition_mut(1, 3 );
79137 /// // The pivot values are now located at `lower_index` and `upper_index`.
80138 /// assert_eq!(data[lower_index], lower_value);
81139 /// assert_eq!(data[upper_index], upper_value);
94152 /// assert!(upper_value <= data[i]);
95153 /// }
96154 /// ```
97- fn partition_mut ( & mut self ) -> ( usize , usize )
155+ fn dual_partition_mut ( & mut self , lower : usize , upper : usize ) -> ( usize , usize )
98156 where
99157 A : Ord + Clone ,
100158 S : DataMut ;
@@ -112,23 +170,67 @@ where
112170 S : DataMut ,
113171 {
114172 let n = self . len ( ) ;
115- if n == 1 {
116- self [ 0 ] . clone ( )
173+ // Recursion cutoff at integer multiple of sample space divider of 7 elements.
174+ if n < 21 {
175+ for mut index in 1 ..n {
176+ while index > 0 && self [ index - 1 ] > self [ index] {
177+ self . swap ( index - 1 , index) ;
178+ index -= 1 ;
179+ }
180+ }
181+ self [ i] . clone ( )
117182 } else {
118- let ( lower_index, upper_index) = self . partition_mut ( ) ;
119- if i < lower_index {
120- self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( ..lower_index) )
121- . get_from_sorted_mut ( i)
122- } else if i == lower_index {
123- self [ i] . clone ( )
124- } else if i < upper_index {
125- self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( lower_index + 1 ..upper_index) )
126- . get_from_sorted_mut ( i - ( lower_index + 1 ) )
127- } else if i == upper_index {
128- self [ i] . clone ( )
183+ // Adapt pivot sampling to relative sought rank and switch from dual-pivot to
184+ // single-pivot partitioning for extreme sought ranks.
185+ let sought_rank = i as f64 / n as f64 ;
186+ if ( 0.036 ..=0.964 ) . contains ( & sought_rank) {
187+ let ( lower_index, upper_index) = if sought_rank <= 0.5 {
188+ if sought_rank <= 0.153 {
189+ ( 0 , 1 ) // (0, 0, 3)
190+ } else {
191+ ( 0 , 2 ) // (0, 1, 2)
192+ }
193+ } else {
194+ if sought_rank <= 0.847 {
195+ ( 2 , 4 ) // (2, 1, 0)
196+ } else {
197+ ( 3 , 4 ) // (3, 0, 0)
198+ }
199+ } ;
200+ let sample = self . sample_mut ( ) ;
201+ let ( lower_index, upper_index) =
202+ self . dual_partition_mut ( sample[ lower_index] , sample[ upper_index] ) ;
203+ if i < lower_index {
204+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( ..lower_index) )
205+ . get_from_sorted_mut ( i)
206+ } else if i == lower_index {
207+ self [ i] . clone ( )
208+ } else if i < upper_index {
209+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( lower_index + 1 ..upper_index) )
210+ . get_from_sorted_mut ( i - ( lower_index + 1 ) )
211+ } else if i == upper_index {
212+ self [ i] . clone ( )
213+ } else {
214+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( upper_index + 1 ..) )
215+ . get_from_sorted_mut ( i - ( upper_index + 1 ) )
216+ }
129217 } else {
130- self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( upper_index + 1 ..) )
131- . get_from_sorted_mut ( i - ( upper_index + 1 ) )
218+ let sample = self . sample_mut ( ) ;
219+ let pivot_index = if sought_rank <= 0.5 {
220+ 0 // (0, 4)
221+ } else {
222+ 4 // (4, 0)
223+ } ;
224+ let pivot_index = self . partition_mut ( sample[ pivot_index] ) ;
225+ if i < pivot_index {
226+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( ..pivot_index) )
227+ . get_from_sorted_mut ( i)
228+ } else if i == pivot_index {
229+ self [ i] . clone ( )
230+ } else {
231+ self . slice_axis_mut ( Axis ( 0 ) , Slice :: from ( pivot_index + 1 ..) )
232+ . get_from_sorted_mut ( i - ( pivot_index + 1 ) )
233+ }
132234 }
133235 }
134236 }
@@ -146,38 +248,80 @@ where
146248 get_many_from_sorted_mut_unchecked ( self , & deduped_indexes)
147249 }
148250
149- fn partition_mut ( & mut self ) -> ( usize , usize )
251+ fn sample_mut ( & mut self ) -> [ usize ; 5 ]
150252 where
151253 A : Ord + Clone ,
152254 S : DataMut ,
153255 {
154- let lowermost = 0 ;
155- let uppermost = self . len ( ) - 1 ;
156- if self . len ( ) < 42 {
157- // Sort outermost elements and use them as pivots.
158- if self [ lowermost] > self [ uppermost] {
159- self . swap ( lowermost, uppermost) ;
256+ // Space between sample indexes.
257+ let space = self . len ( ) / 7 ;
258+ assert ! ( space > 0 , "Cannot sample array of less then 7 elements" ) ;
259+ // Initialize sample indexes with their lowermost index.
260+ let mut samples = [ self . len ( ) / 2 - 2 * space; 5 ] ;
261+ // Equally space sample indexes and sort by their values by looking up their indexes.
262+ for mut index in 1 ..samples. len ( ) {
263+ // Equally space sample indexes based on their lowermost index.
264+ samples[ index] += index * space;
265+ // Insertion sort looking up only the already equally spaced sample indexes.
266+ while index > 0 && self [ samples[ index - 1 ] ] > self [ samples[ index] ] {
267+ self . swap ( samples[ index - 1 ] , samples[ index] ) ;
268+ index -= 1 ;
160269 }
161- } else {
162- // Sample indexes of 5 evenly spaced elements around the center element.
163- let mut samples = [ 0 ; 5 ] ;
164- // Assume array of at least 7 elements.
165- let seventh = self . len ( ) / ( samples. len ( ) + 2 ) ;
166- samples[ 2 ] = self . len ( ) / 2 ;
167- samples[ 1 ] = samples[ 2 ] - seventh;
168- samples[ 0 ] = samples[ 1 ] - seventh;
169- samples[ 3 ] = samples[ 2 ] + seventh;
170- samples[ 4 ] = samples[ 3 ] + seventh;
171- // Use insertion sort for sample elements by looking up their indexes.
172- for mut index in 1 ..samples. len ( ) {
173- while index > 0 && self [ samples[ index - 1 ] ] > self [ samples[ index] ] {
174- self . swap ( samples[ index - 1 ] , samples[ index] ) ;
175- index -= 1 ;
270+ }
271+ samples
272+ }
273+
274+ fn partition_mut ( & mut self , pivot_index : usize ) -> usize
275+ where
276+ A : Ord + Clone ,
277+ S : DataMut ,
278+ {
279+ let pivot_value = self [ pivot_index] . clone ( ) ;
280+ self . swap ( pivot_index, 0 ) ;
281+ let n = self . len ( ) ;
282+ let mut i = 1 ;
283+ let mut j = n - 1 ;
284+ loop {
285+ loop {
286+ if i > j {
287+ break ;
176288 }
289+ if self [ i] >= pivot_value {
290+ break ;
291+ }
292+ i += 1 ;
293+ }
294+ while pivot_value <= self [ j] {
295+ if j == 1 {
296+ break ;
297+ }
298+ j -= 1 ;
299+ }
300+ if i >= j {
301+ break ;
302+ } else {
303+ self . swap ( i, j) ;
304+ i += 1 ;
305+ j -= 1 ;
177306 }
178- // Use 1st and 3rd element of sorted sample as skewed pivots.
179- self . swap ( lowermost, samples[ 0 ] ) ;
180- self . swap ( uppermost, samples[ 2 ] ) ;
307+ }
308+ self . swap ( 0 , i - 1 ) ;
309+ i - 1
310+ }
311+
312+ fn dual_partition_mut ( & mut self , lower : usize , upper : usize ) -> ( usize , usize )
313+ where
314+ A : Ord + Clone ,
315+ S : DataMut ,
316+ {
317+ let lowermost = 0 ;
318+ let uppermost = self . len ( ) - 1 ;
319+ // Swap pivots with outermost elements.
320+ self . swap ( lowermost, lower) ;
321+ self . swap ( uppermost, upper) ;
322+ if self [ lowermost] > self [ uppermost] {
323+ // Sort pivots instead of panicking via assertion.
324+ self . swap ( lowermost, uppermost) ;
181325 }
182326 // Increasing running and partition index starting after lower pivot.
183327 let mut index = lowermost + 1 ;
@@ -274,16 +418,26 @@ fn _get_many_from_sorted_mut_unchecked<A>(
274418 return ;
275419 }
276420
277- // At this point, `n >= 1` since `indexes.len() >= 1`.
278- if n == 1 {
279- // We can only reach this point if `indexes.len() == 1`, so we only
280- // need to assign the single value, and then we're done.
281- debug_assert_eq ! ( indexes. len( ) , 1 ) ;
282- values[ 0 ] = array[ 0 ] . clone ( ) ;
421+ // Recursion cutoff at integer multiple of sample space divider of 7 elements.
422+ if n < 21 {
423+ for mut index in 1 ..n {
424+ while index > 0 && array[ index - 1 ] > array[ index] {
425+ array. swap ( index - 1 , index) ;
426+ index -= 1 ;
427+ }
428+ }
429+ for ( value, index) in values. iter_mut ( ) . zip ( indexes. iter ( ) ) {
430+ * value = array[ * index] . clone ( ) ;
431+ }
283432 return ;
284433 }
285434
286- // We partition the array with respect to the two pivot values. The pivot values move to
435+ // Since there is no single sought rank to adapt pivot sampling to, the recommended skewed pivot
436+ // sampling of dual-pivot Quicksort is used.
437+ let sample = array. sample_mut ( ) ;
438+ let ( lower_index, upper_index) = ( sample[ 0 ] , sample[ 2 ] ) ; // (0, 1, 2)
439+
440+ // We partition the array with respect to the two pivot values. The pivot values move to the new
287441 // `lower_index` and `upper_index`.
288442 //
289443 // Elements strictly less than the lower pivot value have indexes < `lower_index`.
@@ -292,7 +446,7 @@ fn _get_many_from_sorted_mut_unchecked<A>(
292446 // value have indexes > `lower_index` and < `upper_index`.
293447 //
294448 // Elements less than or equal the upper pivot value have indexes > `upper_index`.
295- let ( lower_index, upper_index) = array. partition_mut ( ) ;
449+ let ( lower_index, upper_index) = array. dual_partition_mut ( lower_index , upper_index ) ;
296450
297451 // We use a divide-and-conquer strategy, splitting the indexes we are searching for (`indexes`)
298452 // and the corresponding portions of the output slice (`values`) into partitions with respect to
0 commit comments