11use std:: ops:: RangeInclusive ;
22
3+ use rustc_hash:: FxHashMap ;
34use smallvec:: { SmallVec , smallvec} ;
45
56use crate :: compaction:: interval_map:: IntervalMap ;
@@ -12,6 +13,12 @@ pub trait Compactable {
1213
1314 /// The size of the compactable database segment in bytes.
1415 fn size ( & self ) -> u64 ;
16+
17+ /// The category of the compactable. Overlap between different categories is not considered for
18+ /// compaction.
19+ fn category ( & self ) -> u8 {
20+ 0
21+ }
1522}
1623
1724fn is_overlapping ( a : & RangeInclusive < u64 > , b : & RangeInclusive < u64 > ) -> bool {
@@ -157,7 +164,7 @@ impl DuplicationInfo {
157164 /// Get a value in the range `0..=u64` that represents the estimated amount of duplication
158165 /// across the given range. The units are arbitrary, but linear.
159166 fn duplication ( & self , range : & RangeInclusive < u64 > ) -> u64 {
160- if self . total_size == 0 {
167+ if self . max_size == self . total_size {
161168 return 0 ;
162169 }
163170 // the maximum numerator value is `u64::MAX + 1`
@@ -193,11 +200,14 @@ impl DuplicationInfo {
193200 }
194201}
195202
196- fn total_duplication_size ( duplication : & IntervalMap < Option < DuplicationInfo > > ) -> u64 {
203+ fn total_duplication_size ( duplication : & IntervalMap < FxHashMap < u8 , DuplicationInfo > > ) -> u64 {
197204 duplication
198205 . iter ( )
199- . flat_map ( |( range, info) | Some ( ( range, info. as_ref ( ) ?) ) )
200- . map ( |( range, info) | info. duplication ( & range) )
206+ . map ( |( range, info) | {
207+ info. values ( )
208+ . map ( |info| info. duplication ( & range) )
209+ . sum :: < u64 > ( )
210+ } )
201211 . sum ( )
202212}
203213
@@ -241,16 +251,18 @@ pub fn get_merge_segments<T: Compactable>(
241251 }
242252 let start_compactable_range = start_compactable. range ( ) ;
243253 let start_compactable_size = start_compactable. size ( ) ;
254+ let start_compactable_category = start_compactable. category ( ) ;
244255 let mut current_range = start_compactable_range. clone ( ) ;
245256
246257 // We might need to restart the search if we need to extend the range.
247258 ' search: loop {
248259 let mut current_set = smallvec ! [ start_index] ;
249260 let mut current_size = start_compactable_size;
250- let mut duplication = IntervalMap :: < Option < DuplicationInfo > > :: new ( ) ;
261+ let mut duplication = IntervalMap :: < FxHashMap < u8 , DuplicationInfo > > :: new ( ) ;
251262 duplication. update ( start_compactable_range. clone ( ) , |dup_info| {
252263 dup_info
253- . get_or_insert_default ( )
264+ . entry ( start_compactable_category)
265+ . or_default ( )
254266 . add ( start_compactable_size, & start_compactable_range) ;
255267 } ) ;
256268 let mut current_skip = 0 ;
@@ -340,8 +352,9 @@ pub fn get_merge_segments<T: Compactable>(
340352 // set.
341353 current_set. push ( next_index) ;
342354 current_size += size;
355+ let category = compactable. category ( ) ;
343356 duplication. update ( range. clone ( ) , |dup_info| {
344- dup_info. get_or_insert_default ( ) . add ( size, & range) ;
357+ dup_info. entry ( category ) . or_default ( ) . add ( size, & range) ;
345358 } ) ;
346359 }
347360 }
0 commit comments