Skip to content

Commit 67c1324

Browse files
authored
Reduce downtime during shutdown (#742)
1 parent 33f33b7 commit 67c1324

File tree

10 files changed

+85
-191
lines changed

10 files changed

+85
-191
lines changed

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,8 @@ features = ["lz4"]
219219
solana-account = { git = "https://github.com/magicblock-labs/solana-account.git", rev = "1beed4c" }
220220
solana-storage-proto = { path = "./storage-proto" }
221221
solana-svm = { git = "https://github.com/magicblock-labs/magicblock-svm.git", rev = "3e9456ec4" }
222-
# using parent commit d4e9e16 of rocksdb 0.23.0
222+
# Fork is used to enable `disable_manual_compaction` usage
223+
# Fork is based on commit d4e9e16 of rocksdb (parent commit of 0.23.0 release)
223224
# without patching update isn't possible due to conflict with solana deps
224-
# TODO(edwin): remove once solana version is raised to 2.3
225-
rocksdb = { git = "https://github.com/rust-rocksdb/rust-rocksdb.git", rev = "d4e9e16" }
225+
# TODO(edwin): remove once solana deps upgraded and are using rust-rocksdb 0.25.0(likely)
226+
rocksdb = { git = "https://github.com/magicblock-labs/rust-rocksdb.git", rev = "6d975197" }

magicblock-api/src/magic_validator.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,6 @@ impl MagicValidator {
662662
committor_service.stop();
663663
}
664664

665-
self.ledger_truncator.stop();
666665
self.claim_fees_task.stop();
667666

668667
if self.config.chain_operation.is_some()
@@ -687,6 +686,15 @@ impl MagicValidator {
687686
pub fn ledger(&self) -> &Ledger {
688687
&self.ledger
689688
}
689+
690+
/// Prepares RocksDB for shutdown by cancelling all Manual compactions
691+
/// This speeds up `stop` as it doesn't have to await for compaction cancellation
692+
/// Calling this still allows to write or read from DB
693+
pub fn prepare_ledger_for_shutdown(&mut self) {
694+
self.ledger_truncator.stop();
695+
// Calls & awaits until manual compaction is canceled
696+
self.ledger.cancel_manual_compactions();
697+
}
690698
}
691699

692700
fn programs_to_load(programs: &[LoadableProgram]) -> Vec<(Pubkey, PathBuf)> {

magicblock-config/src/validator.rs

Lines changed: 0 additions & 169 deletions
This file was deleted.

magicblock-ledger/src/store/api.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ use std::{
1111
use bincode::{deserialize, serialize};
1212
use log::*;
1313
use magicblock_core::link::blocks::BlockHash;
14+
use magicblock_metrics::metrics::{
15+
start_ledger_disable_compactions_timer, start_ledger_shutdown_timer,
16+
HistogramTimer,
17+
};
1418
use rocksdb::{Direction as IteratorDirection, FlushOptions};
1519
use scc::HashCache;
1620
use solana_clock::{Slot, UnixTimestamp};
@@ -1262,12 +1266,28 @@ impl Ledger {
12621266

12631267
/// Graceful db shutdown
12641268
pub fn shutdown(&self, wait: bool) -> LedgerResult<()> {
1269+
let _guard = MeasureGuard {
1270+
measure: Measure::start("Ledger shutdown"),
1271+
_timer: start_ledger_shutdown_timer(),
1272+
};
12651273
self.flush()?;
12661274
self.db.backend.db.cancel_all_background_work(wait);
12671275

12681276
Ok(())
12691277
}
12701278

1279+
/// Cancels manual compaction
1280+
/// Here we utilize the internal of `disable_manual_compaction`
1281+
/// Which not only disables future manual compaction,
1282+
/// but also cancels all the running one
1283+
pub fn cancel_manual_compactions(&self) {
1284+
let _guard = MeasureGuard {
1285+
measure: Measure::start("Compaction cancellation"),
1286+
_timer: start_ledger_disable_compactions_timer(),
1287+
};
1288+
self.db.backend.db.disable_manual_compaction();
1289+
}
1290+
12711291
/// Cached latest block data
12721292
pub fn latest_block(&self) -> &LatestBlock {
12731293
&self.latest_block
@@ -1311,6 +1331,19 @@ impl_has_column!(TransactionMemos, transaction_memos_cf);
13111331
impl_has_column!(PerfSamples, perf_samples_cf);
13121332
impl_has_column!(AccountModDatas, account_mod_datas_cf);
13131333

1334+
struct MeasureGuard {
1335+
measure: Measure,
1336+
_timer: HistogramTimer,
1337+
}
1338+
1339+
impl Drop for MeasureGuard {
1340+
fn drop(&mut self) {
1341+
self.measure.stop();
1342+
// We print it in case metrics wouldn't have time to be scraped
1343+
info!("{}", self.measure);
1344+
}
1345+
}
1346+
13141347
// -----------------
13151348
// Tests
13161349
// -----------------

magicblock-ledger/tests/test_ledger_truncator.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ async fn test_truncator_not_purged_size() {
6363
signature.unwrap()
6464
})
6565
.collect::<Vec<_>>();
66+
ledger.flush().unwrap();
6667

6768
ledger_truncator.start();
6869
tokio::time::sleep(Duration::from_millis(10)).await;
@@ -87,6 +88,7 @@ async fn test_truncator_non_empty_ledger() {
8788
})
8889
.collect::<Vec<_>>();
8990

91+
ledger.flush().unwrap();
9092
let mut ledger_truncator =
9193
LedgerTruncator::new(ledger.clone(), TEST_TRUNCATION_TIME_INTERVAL, 0);
9294

magicblock-metrics/src/metrics/mod.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,25 @@ lazy_static::lazy_static! {
118118
),
119119
).unwrap();
120120

121+
static ref LEDGER_DISABLE_COMPACTIONS_TIME: Histogram = Histogram::with_opts(
122+
HistogramOpts::new(
123+
"ledger_disable_compactions_time",
124+
"Time in seconds spent on disabling manual compaction"
125+
)
126+
.buckets(
127+
vec![0.1, 3.0, 10.0, 60.0, (10 * 60) as f64, (30 * 60) as f64]
128+
),
129+
).unwrap();
130+
131+
static ref LEDGER_SHUTDOWN_TIME: Histogram = Histogram::with_opts(
132+
HistogramOpts::new(
133+
"ledger_shutdown_time",
134+
"Time taken for ledger to shutdown"
135+
)
136+
.buckets(
137+
vec![0.1, 1.0, 2.0, 3.0, 10.0, 60.0]
138+
),
139+
).unwrap();
121140

122141
// -----------------
123142
// Accounts
@@ -377,6 +396,8 @@ pub(crate) fn register() {
377396
register!(LEDGER_COLUMNS_COUNT_DURATION_SECONDS);
378397
register!(LEDGER_TRUNCATOR_COMPACTION_SECONDS);
379398
register!(LEDGER_TRUNCATOR_DELETE_SECONDS);
399+
register!(LEDGER_DISABLE_COMPACTIONS_TIME);
400+
register!(LEDGER_SHUTDOWN_TIME);
380401
register!(ACCOUNTS_SIZE_GAUGE);
381402
register!(ACCOUNTS_COUNT_GAUGE);
382403
register!(PENDING_ACCOUNT_CLONES_GAUGE);
@@ -483,6 +504,14 @@ pub fn observe_ledger_truncator_delete<T, F: FnOnce() -> T>(f: F) -> T {
483504
LEDGER_TRUNCATOR_DELETE_SECONDS.observe_closure_duration(f)
484505
}
485506

507+
pub fn start_ledger_disable_compactions_timer() -> HistogramTimer {
508+
LEDGER_DISABLE_COMPACTIONS_TIME.start_timer()
509+
}
510+
511+
pub fn start_ledger_shutdown_timer() -> HistogramTimer {
512+
LEDGER_SHUTDOWN_TIME.start_timer()
513+
}
514+
486515
pub fn set_accounts_size(value: i64) {
487516
ACCOUNTS_SIZE_GAUGE.set(value)
488517
}

magicblock-validator/src/main.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,5 +138,7 @@ async fn run() {
138138
if let Err(err) = Shutdown::wait().await {
139139
error!("Failed to gracefully shutdown: {}", err);
140140
}
141+
142+
api.prepare_ledger_for_shutdown();
141143
api.stop().await;
142144
}

0 commit comments

Comments
 (0)