From 6e327377533e1217bc7084f7df183452860d2f68 Mon Sep 17 00:00:00 2001 From: John Spray Date: Tue, 2 May 2023 13:13:47 +0100 Subject: [PATCH] application: syncfs on storage during startup If Redpanda crashes during storage writes, there may be non-persistent metadata left visible in the filesystem. To provide the guarantee that everything the storage layer sees on disk during recovery is persistent, call syncfs during startup. The strength of this guarantee depends on the underlying filesystem, as some filesystems may lie about persistence if there has previously been an I/O error on a device, by marking pages clean even though they were not successfully flushed. This does not help in all cases, but it is a cheap thing to do that may help us reason about storage consistency issues across crash/restart cycles if they are seen in the field. --- src/v/redpanda/application.cc | 22 ++++++++++++++++++++++ src/v/redpanda/application.h | 1 + src/v/utils/file_sanitizer.h | 7 +++++++ 3 files changed, 30 insertions(+) diff --git a/src/v/redpanda/application.cc b/src/v/redpanda/application.cc index d3c3ad5d44d36..463e540dcd923 100644 --- a/src/v/redpanda/application.cc +++ b/src/v/redpanda/application.cc @@ -351,6 +351,7 @@ int application::run(int ac, char** av) { }); // must initialize configuration before services hydrate_config(cfg); + sync_disks().get(); initialize(); check_environment(); check_for_crash_loop(); @@ -375,6 +376,27 @@ int application::run(int ac, char** av) { }); } +ss::future<> application::sync_disks() { + /* + * The last running instance of redpanda might have left un-flushed + * data or metadata in the filesystem. + * + * This does not help us in the case of e.g. IO-failed fsync() calls, which + * generally mark pages as clean anyway[1], but it does provide a more + * consistent basis for reasoning about persistence of metadata changes + * that might not have been fsync'd by a previous run of Redpanda. + * + * 1. https://dl.acm.org/doi/pdf/10.1145/3450338 + */ + vlog(_log.info, "Running syncfs on data directory..."); + auto data_dir = co_await ss::open_directory(config::node().data_directory().as_sstring()); + co_await data_dir.syncfs(); + + vlog(_log.info, "Running syncfs on cache directory..."); + auto cache_dir = co_await ss::open_directory(config::node().cloud_storage_cache_path().string()); + co_await cache_dir.syncfs(); +} + void application::initialize( std::optional proxy_cfg, std::optional proxy_client_cfg, diff --git a/src/v/redpanda/application.h b/src/v/redpanda/application.h index 69fce650960c2..36e42d236b827 100644 --- a/src/v/redpanda/application.h +++ b/src/v/redpanda/application.h @@ -70,6 +70,7 @@ class application { public: int run(int, char**); + ss::future<> sync_disks(); void initialize( std::optional proxy_cfg = std::nullopt, std::optional proxy_client_cfg = std::nullopt, diff --git a/src/v/utils/file_sanitizer.h b/src/v/utils/file_sanitizer.h index e61e29a067522..4b2f9f00aaae9 100644 --- a/src/v/utils/file_sanitizer.h +++ b/src/v/utils/file_sanitizer.h @@ -134,6 +134,13 @@ class file_io_sanitizer final : public ss::file_impl { get_file_impl(_file)->flush()); } + ss::future<> syncfs() final { + assert_file_not_closed(); + return with_op( + ssx::sformat("ss::future<> syncfs(void)"), + get_file_impl(_file)->syncfs()); + } + ss::future stat() final { assert_file_not_closed(); return with_op(