Skip to content

Commit a2a19e0

Browse files
committed
fix compaction for cold and warm files
1 parent be0c1de commit a2a19e0

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

turbopack/crates/turbo-persistence/src/compaction/selector.rs

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::ops::RangeInclusive;
22

3+
use rustc_hash::FxHashMap;
34
use smallvec::{SmallVec, smallvec};
45

56
use crate::compaction::interval_map::IntervalMap;
@@ -12,6 +13,12 @@ pub trait Compactable {
1213

1314
/// The size of the compactable database segment in bytes.
1415
fn size(&self) -> u64;
16+
17+
/// The category of the compactable. Overlap between different categories is not considered for
18+
/// compaction.
19+
fn category(&self) -> u8 {
20+
0
21+
}
1522
}
1623

1724
fn is_overlapping(a: &RangeInclusive<u64>, b: &RangeInclusive<u64>) -> bool {
@@ -157,7 +164,7 @@ impl DuplicationInfo {
157164
/// Get a value in the range `0..=u64` that represents the estimated amount of duplication
158165
/// across the given range. The units are arbitrary, but linear.
159166
fn duplication(&self, range: &RangeInclusive<u64>) -> u64 {
160-
if self.total_size == 0 {
167+
if self.max_size == self.total_size {
161168
return 0;
162169
}
163170
// the maximum numerator value is `u64::MAX + 1`
@@ -193,11 +200,14 @@ impl DuplicationInfo {
193200
}
194201
}
195202

196-
fn total_duplication_size(duplication: &IntervalMap<Option<DuplicationInfo>>) -> u64 {
203+
fn total_duplication_size(duplication: &IntervalMap<FxHashMap<u8, DuplicationInfo>>) -> u64 {
197204
duplication
198205
.iter()
199-
.flat_map(|(range, info)| Some((range, info.as_ref()?)))
200-
.map(|(range, info)| info.duplication(&range))
206+
.map(|(range, info)| {
207+
info.values()
208+
.map(|info| info.duplication(&range))
209+
.sum::<u64>()
210+
})
201211
.sum()
202212
}
203213

@@ -241,16 +251,18 @@ pub fn get_merge_segments<T: Compactable>(
241251
}
242252
let start_compactable_range = start_compactable.range();
243253
let start_compactable_size = start_compactable.size();
254+
let start_compactable_category = start_compactable.category();
244255
let mut current_range = start_compactable_range.clone();
245256

246257
// We might need to restart the search if we need to extend the range.
247258
'search: loop {
248259
let mut current_set = smallvec![start_index];
249260
let mut current_size = start_compactable_size;
250-
let mut duplication = IntervalMap::<Option<DuplicationInfo>>::new();
261+
let mut duplication = IntervalMap::<FxHashMap<u8, DuplicationInfo>>::new();
251262
duplication.update(start_compactable_range.clone(), |dup_info| {
252263
dup_info
253-
.get_or_insert_default()
264+
.entry(start_compactable_category)
265+
.or_default()
254266
.add(start_compactable_size, &start_compactable_range);
255267
});
256268
let mut current_skip = 0;
@@ -340,8 +352,9 @@ pub fn get_merge_segments<T: Compactable>(
340352
// set.
341353
current_set.push(next_index);
342354
current_size += size;
355+
let category = compactable.category();
343356
duplication.update(range.clone(), |dup_info| {
344-
dup_info.get_or_insert_default().add(size, &range);
357+
dup_info.entry(category).or_default().add(size, &range);
345358
});
346359
}
347360
}

turbopack/crates/turbo-persistence/src/db.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,7 @@ impl<S: ParallelScheduler, const FAMILIES: usize> TurboPersistence<S, FAMILIES>
831831
seq: u32,
832832
range: StaticSortedFileRange,
833833
size: u64,
834+
flags: MetaEntryFlags,
834835
}
835836

836837
impl Compactable for SstWithRange {
@@ -841,6 +842,10 @@ impl<S: ParallelScheduler, const FAMILIES: usize> TurboPersistence<S, FAMILIES>
841842
fn size(&self) -> u64 {
842843
self.size
843844
}
845+
846+
fn category(&self) -> u8 {
847+
if self.flags.cold() { 1 } else { 0 }
848+
}
844849
}
845850

846851
let ssts_with_ranges = meta_files
@@ -856,6 +861,7 @@ impl<S: ParallelScheduler, const FAMILIES: usize> TurboPersistence<S, FAMILIES>
856861
seq: entry.sequence_number(),
857862
range: entry.range(),
858863
size: entry.size(),
864+
flags: entry.flags(),
859865
})
860866
})
861867
.collect::<Vec<_>>();

0 commit comments

Comments
 (0)