diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index 6234b96cdc7b9..e05ef685759ea 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1555,3 +1555,27 @@ options: - mds flags: - runtime +- name: mds_damage_log_to_file + type: bool + level: advanced + desc: send mds damage lines to a file + fmt_desc: Determines if damages should appear in a file. + default: false + see_also: + - log_file + with_legacy: true +- name: mds_damage_log_file + type: str + level: advanced + desc: path to log file where damage will be written + fmt_desc: The location of the logging file for where damage of mds will be written. + daemon_default: /var/log/ceph/$cluster-$name-damages.log + with_legacy: true +- name: mds_force_hard_link_scrubbing + type: bool + level: advanced + desc: force scrubbing hard link + default: false + services: + - mds + with_legacy: true \ No newline at end of file diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index a8aaf11c0512c..4a089dc61d06e 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1585,8 +1585,8 @@ void CDir::fetch(std::string_view dname, snapid_t last, mdcache->mds->balancer->hit_dir(this, META_POP_FETCH); } -void CDir::fetch_keys(const std::vector& keys, MDSContext *c) -{ +void CDir::fetch_keys(const std::vector &keys, MDSContext *c, + bool from_scrub) { dout(10) << __func__ << " " << keys.size() << " keys on " << *this << dendl; ceph_assert(is_auth()); ceph_assert(!is_complete()); @@ -1643,7 +1643,7 @@ void CDir::fetch_keys(const std::vector& keys, MDSContext *c) } auth_pin(this); - _omap_fetch(&str_keys, c); + _omap_fetch(&str_keys, c, from_scrub); if (mdcache->mds->logger) mdcache->mds->logger->inc(l_mds_dir_fetch_keys); @@ -1698,6 +1698,7 @@ class C_IO_Dir_OMAP_Fetched : public CDirIOContext { map omap; bufferlist btbl; int ret1, ret2, ret3; + bool from_scrub = false; C_IO_Dir_OMAP_Fetched(CDir *d, MDSContext *f) : CDirIOContext(d), fin(f), @@ -1719,7 +1720,7 @@ class C_IO_Dir_OMAP_Fetched : public CDirIOContext { return; } - dir->_omap_fetched(hdrbl, omap, complete, keys, r); + dir->_omap_fetched(hdrbl, omap, complete, keys, r, from_scrub); if (fin) fin->complete(r); } @@ -1728,8 +1729,7 @@ class C_IO_Dir_OMAP_Fetched : public CDirIOContext { } }; -void CDir::_omap_fetch(std::set *keys, MDSContext *c) -{ +void CDir::_omap_fetch(std::set *keys, MDSContext *c, bool from_scrub) { C_IO_Dir_OMAP_Fetched *fin = new C_IO_Dir_OMAP_Fetched(this, c); object_t oid = get_ondisk_object(); object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool()); @@ -1737,6 +1737,7 @@ void CDir::_omap_fetch(std::set *keys, MDSContext *c) rd.omap_get_header(&fin->hdrbl, &fin->ret1); if (keys) { fin->complete = false; + fin->from_scrub = from_scrub; fin->keys.swap(*keys); rd.omap_get_vals_by_keys(fin->keys, &fin->omap, &fin->ret2); } else { @@ -1989,9 +1990,9 @@ CDentry *CDir::_load_dentry( return dn; } -void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, - bool complete, const std::set& keys, int r) -{ +void CDir::_omap_fetched(bufferlist &hdrbl, map &omap, + bool complete, const std::set &keys, int r, + bool from_scrub) { LogChannelRef clog = mdcache->mds->clog; dout(10) << "_fetched header " << hdrbl.length() << " bytes " << omap.size() << " keys for " << *this << dendl; @@ -2006,7 +2007,7 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, clog->error() << "dir " << dirfrag() << " object missing on disk; some " "files may be lost (" << get_path() << ")"; - go_bad(complete); + go_bad(complete || from_scrub); return; } @@ -2020,14 +2021,14 @@ void CDir::_omap_fetched(bufferlist& hdrbl, map& omap, << ": " << err.what() << dendl; clog->warn() << "Corrupt fnode header in " << dirfrag() << ": " << err.what() << " (" << get_path() << ")"; - go_bad(complete); + go_bad(complete || from_scrub); return; } if (!p.end()) { clog->warn() << "header buffer of dir " << dirfrag() << " has " << hdrbl.length() - p.get_off() << " extra bytes (" << get_path() << ")"; - go_bad(complete); + go_bad(complete || from_scrub); return; } } diff --git a/src/mds/CDir.h b/src/mds/CDir.h index 7cc4dc7ffcf83..2c124350d0f9a 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -483,7 +483,8 @@ class CDir : public MDSCacheObject, public Counter { void fetch(MDSContext *c, bool ignore_authpinnability=false) { fetch("", CEPH_NOSNAP, c, ignore_authpinnability); } - void fetch_keys(const std::vector& keys, MDSContext *c); + void fetch_keys(const std::vector &keys, MDSContext *c, + bool from_scrub = false); #if 0 // unused? void wait_for_commit(Context *c, version_t v=0); @@ -653,7 +654,8 @@ class CDir : public MDSCacheObject, public Counter { friend class C_IO_Dir_Committed; friend class C_IO_Dir_Commit_Ops; - void _omap_fetch(std::set *keys, MDSContext *fin=nullptr); + void _omap_fetch(std::set *keys, MDSContext *fin = nullptr, + bool from_scrub = false); void _omap_fetch_more(version_t omap_version, bufferlist& hdrbl, std::map& omap, MDSContext *fin); CDentry *_load_dentry( @@ -671,8 +673,10 @@ class CDir : public MDSCacheObject, public Counter { */ void go_bad(bool complete); - void _omap_fetched(ceph::buffer::list& hdrbl, std::map& omap, - bool complete, const std::set& keys, int r); + void _omap_fetched(ceph::buffer::list &hdrbl, + std::map &omap, + bool complete, const std::set &keys, int r, + bool from_scrub = false); // -- commit -- void _commit(version_t want, int op_prio); diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 71b6081be7de2..6fdc20d7f2f9a 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -5198,7 +5198,6 @@ void CInode::scrub_info_create() const { dout(25) << __func__ << dendl; ceph_assert(!scrub_infop); - // break out of const-land to set up implicit initial state CInode *me = const_cast(this); const auto& pi = me->get_projected_inode(); @@ -5231,23 +5230,46 @@ void CInode::scrub_initialize(ScrubHeaderRef& header) // right now we don't handle remote inodes } +void CInode::set_forward_scrub(bool forward_scrub) { + scrub_infop->forward_scrub = forward_scrub; +} + +void CInode::scrub_add_remote_link( + std::vector> &&remote_links) { + + for (auto &p : remote_links) { + scrub_infop->remote_links.emplace_back(std::move(p)); + } +} + +void CInode::scrub_reset_remote_links() { + scrub_infop->remote_links.clear(); +} + +std::vector> && +CInode::scrub_move_remote_links() { + return std::move(scrub_infop->remote_links); +} + void CInode::scrub_aborted() { dout(20) << __func__ << dendl; ceph_assert(scrub_is_in_progress()); - scrub_infop->scrub_in_progress = false; scrub_infop->header->dec_num_pending(); + scrub_infop->remote_links.clear(); + scrub_infop->forward_scrub = true; scrub_maybe_delete_info(); } void CInode::scrub_finished() { dout(20) << __func__ << dendl; ceph_assert(scrub_is_in_progress()); - scrub_infop->last_scrub_version = get_version(); scrub_infop->last_scrub_stamp = ceph_clock_now(); scrub_infop->last_scrub_dirty = true; scrub_infop->scrub_in_progress = false; + scrub_infop->remote_links.clear(); + scrub_infop->forward_scrub = true; scrub_infop->header->dec_num_pending(); } diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 6f965bffa8ea9..71d57a452f8e7 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -305,6 +305,8 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter> remote_links; + bool forward_scrub = true; fragset_t queued_frags; @@ -458,6 +460,15 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter> &&remote_links); + + void scrub_reset_remote_links(); + + std::vector> &&scrub_move_remote_links(); + + void set_forward_scrub(bool forward_scrub); + fragset_t& scrub_queued_frags() { ceph_assert(scrub_infop); return scrub_infop->queued_frags; diff --git a/src/mds/DamageTable.cc b/src/mds/DamageTable.cc index 2079d23333a83..944cba980188e 100644 --- a/src/mds/DamageTable.cc +++ b/src/mds/DamageTable.cc @@ -29,6 +29,12 @@ namespace { * Record damage to a particular dirfrag, implicitly affecting * any dentries within it. */ +inline std::ostream& operator<<(std::ostream& os, const DamageEntry& entry) +{ + entry.print(os); + return os; +} + class DirFragDamage : public DamageEntry { public: @@ -123,6 +129,28 @@ class BacktraceDamage : public DamageEntry f->close_section(); } }; + +class RemoteLinkDamage : public DamageEntry { +public: + inodeno_t ino; + std::string head_path; + RemoteLinkDamage(inodeno_t ino_, const std::string &head_path_ = "") + : ino(ino_), head_path(head_path_) {} + + damage_entry_type_t get_type() const override { + return DAMAGE_ENTRY_REMOTE_LINK; + } + + void dump(Formatter *f) const override { + f->open_object_section("remote_link_damage"); + f->dump_string("damage_type", "remote_link"); + f->dump_int("id", id); + f->dump_int("ino", ino); + f->dump_string("path", path); + f->dump_string("head_path", head_path); + f->close_section(); + } +}; } DamageEntry::~DamageEntry() @@ -132,28 +160,34 @@ bool DamageTable::notify_dentry( inodeno_t ino, frag_t frag, snapid_t snap_id, std::string_view dname, std::string_view path) { - if (oversized()) { + bool over_sized = oversized(); + if (!log_to_file && over_sized) { return true; } // Special cases: damage to these dirfrags is considered fatal to // the MDS rank that owns them. - if ( - (MDS_INO_IS_MDSDIR(ino) && MDS_INO_MDSDIR_OWNER(ino) == rank) - || - (MDS_INO_IS_STRAY(ino) && MDS_INO_STRAY_OWNER(ino) == rank) - ) { + if ((MDS_INO_IS_MDSDIR(ino) && MDS_INO_MDSDIR_OWNER(ino) == rank) || + (MDS_INO_IS_STRAY(ino) && MDS_INO_STRAY_OWNER(ino) == rank)) { derr << "Damage to dentries in fragment " << frag << " of ino " << ino << "is fatal because it is a system directory for this rank" << dendl; return true; } - auto& df_dentries = dentries[DirFragIdent(ino, frag)]; - if (auto [it, inserted] = df_dentries.try_emplace(DentryIdent(dname, snap_id)); inserted) { - auto entry = std::make_shared(ino, frag, dname, snap_id); - entry->path = path; - it->second = entry; - by_id[entry->id] = std::move(entry); + auto entry = std::make_shared(ino, frag, dname, snap_id); + entry->path = path; + if (log_to_file && log_file_opened) { + fout << *entry << std::endl; + } + + if (!over_sized) { + auto &df_dentries = dentries[DirFragIdent(ino, frag)]; + if (auto [it, inserted] = + df_dentries.try_emplace(DentryIdent(dname, snap_id)); + inserted) { + it->second = entry; + by_id[entry->id] = std::move(entry); + } } return false; @@ -171,15 +205,24 @@ bool DamageTable::notify_dirfrag(inodeno_t ino, frag_t frag, return true; } - if (oversized()) { + bool over_sized = oversized(); + + if (!log_to_file && over_sized) { return true; } - if (auto [it, inserted] = dirfrags.try_emplace(DirFragIdent(ino, frag)); inserted) { - DamageEntryRef entry = std::make_shared(ino, frag); - entry->path = path; - it->second = entry; - by_id[entry->id] = std::move(entry); + DamageEntryRef entry = std::make_shared(ino, frag); + entry->path = path; + if (log_to_file && log_file_opened) { + fout << *entry << std::endl; + } + + if (!over_sized) { + if (auto [it, inserted] = dirfrags.try_emplace(DirFragIdent(ino, frag)); + inserted) { + it->second = entry; + by_id[entry->id] = std::move(entry); + } } return false; @@ -187,15 +230,47 @@ bool DamageTable::notify_dirfrag(inodeno_t ino, frag_t frag, bool DamageTable::notify_remote_damaged(inodeno_t ino, std::string_view path) { - if (oversized()) { + bool over_sized = oversized(); + if (!log_to_file && over_sized) { return true; } - if (auto [it, inserted] = remotes.try_emplace(ino); inserted) { - auto entry = std::make_shared(ino); - entry->path = path; - it->second = entry; - by_id[entry->id] = std::move(entry); + auto entry = std::make_shared(ino); + entry->path = path; + if (log_to_file && log_file_opened) { + fout << *entry << std::endl; + } + + if (!over_sized) { + if (auto [it, inserted] = remotes.try_emplace(ino); inserted) { + it->second = entry; + by_id[entry->id] = std::move(entry); + } + } + + return false; +} + +bool DamageTable::notify_remote_link_damaged(inodeno_t ino, + const std::string &path, + const std::string &head_path) { + bool over_sized = oversized(); + if (!log_to_file && over_sized) { + return true; + } + + auto entry = std::make_shared(ino, head_path); + entry->path = path; + if (log_to_file && log_file_opened) { + fout << *entry << std::endl; + } + + if (!over_sized) { + auto& df_remote_links = remote_links[ino]; + if (auto [it, inserted] = df_remote_links.try_emplace(path); inserted) { + it->second = entry; + by_id[entry->id] = std::move(entry); + } } return false; @@ -263,6 +338,10 @@ bool DamageTable::is_remote_damaged( return remotes.count(ino) > 0; } +bool DamageTable::is_remote_link_damaged(const inodeno_t ino) const { + return remote_links.count(ino) > 0; +} + void DamageTable::dump(Formatter *f) const { f->open_array_section("damage_table"); @@ -293,6 +372,15 @@ void DamageTable::erase(damage_entry_id_t damage_id) } else if (type == DAMAGE_ENTRY_BACKTRACE) { auto backtrace_entry = std::static_pointer_cast(entry); remotes.erase(backtrace_entry->ino); + } else if (type == DAMAGE_ENTRY_REMOTE_LINK) { + auto remote_link_entry = std::static_pointer_cast(entry); + auto df_remote_link_it = remote_links.find(remote_link_entry->ino); + if (df_remote_link_it != remote_links.end()) { + df_remote_link_it->second.erase(entry->path); + if(df_remote_link_it->second.empty()) { + remote_links.erase(df_remote_link_it); + } + } } else { derr << "Invalid type " << type << dendl; ceph_abort(); @@ -301,3 +389,53 @@ void DamageTable::erase(damage_entry_id_t damage_id) by_id.erase(by_id_entry); } +void DamageTable::open_damage_log_file(std::ofstream &fout, + const std::filesystem::path &file_path) { + namespace fs = std::filesystem; + close_damage_log_file(); + + const fs::path dir = file_path.parent_path(); + + if (!dir.empty()) { + std::error_code ec; + + if (!fs::exists(dir, ec)) { + if (ec) { + derr << "error checking existence of damage dir: " << dir << " (" + << ec.message() << ")" << dendl; + return; + } + + if (!fs::create_directories(dir, ec)) { + derr << "failed to create directories for damage file: " << dir << " (" + << ec.message() << ")" << dendl; + return; + } + } + } + + fout.open(file_path, std::ios::out | std::ios::app); + + if (!fout.is_open()) { + derr << "failed to open damage file: " << file_path << dendl; + return; + } + + log_file_opened = true; +} + +void DamageTable::close_damage_log_file() { + if (fout.is_open()) { + fout.close(); + } + fout.clear(); + log_file_opened = false; +} + +void DamageTable::clear() { + dirfrags.clear(); + dentries.clear(); + remotes.clear(); + remote_links.clear(); + by_id.clear(); +} diff --git a/src/mds/DamageTable.h b/src/mds/DamageTable.h index a1b96fe221864..5524828278463 100644 --- a/src/mds/DamageTable.h +++ b/src/mds/DamageTable.h @@ -16,8 +16,12 @@ #ifndef DAMAGE_TABLE_H_ #define DAMAGE_TABLE_H_ +#include +#include +#include #include +#include "common/Formatter.h" #include "mdstypes.h" #include "include/random.h" @@ -30,7 +34,8 @@ typedef enum { DAMAGE_ENTRY_DIRFRAG, DAMAGE_ENTRY_DENTRY, - DAMAGE_ENTRY_BACKTRACE + DAMAGE_ENTRY_BACKTRACE, + DAMAGE_ENTRY_REMOTE_LINK } damage_entry_type_t; @@ -47,6 +52,11 @@ class DamageEntry virtual damage_entry_type_t get_type() const = 0; virtual void dump(Formatter *f) const = 0; + void print(std::ostream &os) const { + JSONFormatter jf; + dump(&jf); + jf.flush(os); + } damage_entry_id_t id; utime_t reported_at; @@ -121,10 +131,13 @@ class DentryIdent class DamageTable { public: - explicit DamageTable(const mds_rank_t rank_) - : rank(rank_) - { + explicit DamageTable(const mds_rank_t rank_, bool log_to_file_, + const std::string &log_file_) + : rank(rank_), log_to_file(log_to_file_), log_file(log_file_) { ceph_assert(rank_ != MDS_RANK_NONE); + if (log_to_file) { + open_damage_log_file(fout, log_file); + } } /** @@ -156,6 +169,9 @@ class DamageTable */ bool notify_remote_damaged(inodeno_t ino, std::string_view path); + bool notify_remote_link_damaged(inodeno_t ino, const std::string &path, + const std::string &head_path = ""); + void remove_dentry_damage_entry(CDir *dir); void remove_dirfrag_damage_entry(CDir *dir); @@ -171,10 +187,36 @@ class DamageTable bool is_remote_damaged(const inodeno_t ino) const; + bool is_remote_link_damaged(const inodeno_t ino) const; + void dump(Formatter *f) const; void erase(damage_entry_id_t damage_id); + void set_log_to_file(bool _log_to_file) { + log_to_file = _log_to_file; + if (log_to_file) { + open_damage_log_file(fout, log_file); + } else { + close_damage_log_file(); + } + } + + void set_log_file(const std::string &_log_file) { + log_file = _log_file; + if (log_to_file) { + open_damage_log_file(fout, log_file); + } + } + + void clear(); + + private: + void open_damage_log_file(std::ofstream &fout, + const std::filesystem::path &file_path); + void close_damage_log_file(); + std::ofstream fout; + protected: // I need to know my MDS rank so that I can check if // metadata items are part of my mydir. @@ -194,10 +236,17 @@ class DamageTable // (i.e. have probably/possibly missing backtraces) std::map remotes; + // Map of all links which could not be resolved + // (i.e. have probably/possibly missing primary inodes) + std::map> remote_links; + // All damage, by ID. This is a secondary index // to the dirfrag, dentry, remote maps. It exists // to enable external tools to unambiguously operate // on particular entries. std::map by_id; + bool log_to_file = false; + std::string log_file = ""; + bool log_file_opened = false; }; #endif // DAMAGE_TABLE_H_ diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 5480e6dcd5efe..6675a28d96908 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -8493,6 +8493,12 @@ int MDCache::path_traverse(MDRequestRef& mdr, MDSContextFactory& cf, if (mds->damage_table.is_remote_damaged(dnl->get_remote_ino())) { dout(4) << "traverse: remote dentry points to damaged ino " << *dn << dendl; + std::string path; + dn->get_dir()->get_inode()->make_path_string(path); + path += "/"; + path += dn->get_name(); + mds->damage_table.notify_remote_link_damaged(dnl->get_remote_ino(), + path); return -CEPHFS_EIO; } open_remote_dentry(dn, true, cf.build(), @@ -8817,7 +8823,7 @@ void MDCache::_open_remote_dentry_finish(CDentry *dn, inodeno_t ino, MDSContext path += dn->get_name(); } - bool fatal = mds->damage_table.notify_remote_damaged(ino, path); + bool fatal = mds->damage_table.notify_remote_link_damaged(ino, path); if (fatal) { mds->damaged(); ceph_abort(); // unreachable, damaged() respawns us diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index e97fd2cf83f8f..67718aae08947 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -402,6 +402,9 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "Remove a damage table entry"); ceph_assert(r == 0); + r = admin_socket->register_command("damage clear", asok_hook, + "clear the damage list"); + ceph_assert(r == 0); r = admin_socket->register_command("osdmap barrier name=target_epoch,type=CephInt", asok_hook, "Wait until the MDS has this OSD map epoch"); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index aa6a8c162f4f5..462125216e4d0 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -498,7 +498,9 @@ MDSRank::MDSRank( cct(msgr->cct), mds_lock(mds_lock_), clog(clog_), timer(timer_), mdsmap(mdsmap_), objecter(new Objecter(g_ceph_context, msgr, monc_, ioc)), - damage_table(whoami_), sessionmap(this), + damage_table(whoami_, g_conf()->mds_damage_log_to_file, + g_conf()->mds_damage_log_file), + sessionmap(this), op_tracker(g_ceph_context, g_conf()->mds_enable_op_tracker, g_conf()->osd_num_op_tracker_shard), progress_thread(this), whoami(whoami_), @@ -2929,6 +2931,9 @@ void MDSRankDispatcher::handle_asok_command( goto out; } damage_table.erase(id); + } else if (command == "damage clear") { + std::lock_guard l(mds_lock); + damage_table.clear(); } else { r = -CEPHFS_ENOSYS; } @@ -3866,6 +3871,8 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const "mds_inject_rename_corrupt_dentry_first", "mds_inject_journal_corrupt_dentry_first", "mds_session_metadata_threshold", + "mds_damage_log_to_file", + "mds_damage_log_file", NULL }; return KEYS; @@ -3936,6 +3943,14 @@ void MDSRankDispatcher::handle_conf_change(const ConfigProxy& conf, const std::s if (changed.count("mds_inject_journal_corrupt_dentry_first")) { inject_journal_corrupt_dentry_first = g_conf().get_val("mds_inject_journal_corrupt_dentry_first"); } + if (changed.count("mds_damage_log_to_file")) { + damage_table.set_log_to_file( + g_conf().get_val("mds_damage_log_to_file")); + } + if (changed.count("mds_damage_log_file")) { + damage_table.set_log_file( + g_conf().get_val("mds_damage_log_file")); + } finisher->queue(new LambdaContext([this, changed](int) { std::scoped_lock lock(mds_lock); diff --git a/src/mds/ScrubHeader.h b/src/mds/ScrubHeader.h index a5d35f61ce428..0a27ab5ee4871 100644 --- a/src/mds/ScrubHeader.h +++ b/src/mds/ScrubHeader.h @@ -64,6 +64,18 @@ class ScrubHeader { } unsigned get_num_pending() const { return num_pending; } + void inc_scrubbed_inode_count() { ++scrubbed_inode_count; } + + uint64_t get_scrubbed_inode_count() const { return scrubbed_inode_count; } + + void inc_scrubbed_remote_link_count(uint64_t val = 1) { + scrubbed_remote_link_count += val; + } + + uint64_t get_scrubbed_remote_link_count() const { + return scrubbed_remote_link_count; + } + protected: const std::string tag; bool is_tag_internal; @@ -76,6 +88,8 @@ class ScrubHeader { bool repaired = false; // May be set during scrub if repairs happened unsigned epoch_last_forwarded = 0; unsigned num_pending = 0; + uint64_t scrubbed_inode_count = 0; + uint64_t scrubbed_remote_link_count = 0; }; typedef std::shared_ptr ScrubHeaderRef; diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index 742c464f4d37a..753c790c66918 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -59,12 +59,18 @@ void ScrubStack::dequeue(MDSCacheObject *obj) stack_size--; } -int ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top) -{ +int ScrubStack::_enqueue( + MDSCacheObject *obj, ScrubHeaderRef &header, bool top, bool *added, + std::vector> &&remote_links) { ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock)); if (CInode *in = dynamic_cast(obj)) { if (in->scrub_is_in_progress()) { dout(10) << __func__ << " with {" << *in << "}" << ", already in scrubbing" << dendl; + if (!remote_links.empty()) { + in->scrub_add_remote_link(std::move(remote_links)); + } else { + in->set_forward_scrub(true); + } return -CEPHFS_EBUSY; } if(in->state_test(CInode::STATE_PURGING)) { @@ -75,6 +81,11 @@ int ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top) dout(10) << __func__ << " with {" << *in << "}" << ", top=" << top << dendl; in->scrub_initialize(header); + if (!remote_links.empty()) { + in->scrub_add_remote_link(std::move(remote_links)); + in->set_forward_scrub(false); + } + } else if (CDir *dir = dynamic_cast(obj)) { if (dir->scrub_is_in_progress()) { dout(10) << __func__ << " with {" << *dir << "}" << ", already in scrubbing" << dendl; @@ -103,6 +114,11 @@ int ScrubStack::_enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top) scrub_stack.push_front(&obj->item_scrub); else scrub_stack.push_back(&obj->item_scrub); + + if (added) { + *added = true; + } + return 0; } @@ -209,49 +225,53 @@ void ScrubStack::kick_off_scrubs() if (scrubs_in_progress == 0) { set_state(STATE_IDLE); } - return; } assert(state == STATE_RUNNING || state == STATE_IDLE); set_state(STATE_RUNNING); - if (CInode *in = dynamic_cast(*it)) { + if (CInode *in = dynamic_cast(*it)) { dout(20) << __func__ << " examining " << *in << dendl; ++it; if (!validate_inode_auth(in)) - continue; + continue; if (!in->is_dir()) { - // it's a regular file, symlink, or hard link - dequeue(in); // we only touch it this once, so remove from stack - - scrub_file_inode(in); + // it's a regular file, symlink, or hard link + dequeue(in); // we only touch it this once, so remove from stack + scrub_file_inode(in); + } else if (in->scrub_info()->forward_scrub) { + bool added_children = false; + bool done = false; // it's done, so pop it off the stack + scrub_dir_inode(in, &added_children, &done); + if (done) { + dout(20) << __func__ << " dir inode, done" << dendl; + in->set_forward_scrub(false); + dequeue(in); + } + if (added_children) { + // dirfrags were queued at top of stack + it = scrub_stack.begin(); + } + } else if (!in->scrub_info()->remote_links.empty()){ + dequeue(in); + scrub_dir_inode_final(in); } else { - bool added_children = false; - bool done = false; // it's done, so pop it off the stack - scrub_dir_inode(in, &added_children, &done); - if (done) { - dout(20) << __func__ << " dir inode, done" << dendl; - dequeue(in); - } - if (added_children) { - // dirfrags were queued at top of stack - it = scrub_stack.begin(); - } + dequeue(in); } - } else if (CDir *dir = dynamic_cast(*it)) { - auto next = it; - ++next; + } else if (CDir *dir = dynamic_cast(*it)) { + ++it; + bool added_children = false; bool done = false; // it's done, so pop it off the stack - scrub_dirfrag(dir, &done); + scrub_dirfrag(dir, &added_children, &done); if (done) { - dout(20) << __func__ << " dirfrag, done" << dendl; - ++it; // child inodes were queued at bottom of stack - dequeue(dir); - } else { - it = next; + dout(20) << __func__ << " dirfrag, done" << dendl; + dequeue(dir); + } + if (added_children) { + it = scrub_stack.begin(); } } else { ceph_assert(0 == "dentry in scrub stack"); @@ -341,7 +361,7 @@ void ScrubStack::scrub_dir_inode(CInode *in, bool *added_children, bool *done) dir->add_waiter(CDir::WAIT_UNFREEZE, gather.new_sub()); } else if (dir->get_version() == 0) { dout(20) << __func__ << " barebones " << *dir << dendl; - dir->fetch_keys({}, gather.new_sub()); + dir->fetch_keys({}, gather.new_sub(), true); } else { _enqueue(dir, header, true); queued.insert_raw(dir->get_frag()); @@ -392,9 +412,10 @@ class C_InodeValidated : public MDSInternalContext ScrubStack *stack; CInode::validated_data result; CInode *target; + MDCache* mdcache; C_InodeValidated(MDSRank *mds, ScrubStack *stack_, CInode *target_) - : MDSInternalContext(mds), stack(stack_), target(target_) + : MDSInternalContext(mds), stack(stack_), target(target_), mdcache(mds->mdcache) { stack->scrubs_in_progress++; } @@ -408,16 +429,112 @@ class C_InodeValidated : public MDSInternalContext void ScrubStack::scrub_dir_inode_final(CInode *in) { dout(20) << __func__ << " " << *in << dendl; + ScrubHeaderRef header = in->scrub_info()->header; + if (!in->scrub_info()->forward_scrub && + !in->scrub_info()->remote_links.empty()) { + auto parent = in->get_projected_parent_dn(); + if (mdcache->mds->damage_table.is_remote_damaged(in->ino()) || + (parent && mdcache->mds->damage_table.is_dentry_damaged( + parent->get_dir(), parent->get_name(), parent->last))) { + for (auto &[remote_link_path, remote_ino] : + in->scrub_info()->remote_links) { + add_remote_link_damage(remote_link_path, remote_ino, header); + } + in->scrub_finished(); + return; + } + } C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in); in->validate_disk_state(&fin->result, fin); return; } -void ScrubStack::scrub_dirfrag(CDir *dir, bool *done) +void ScrubStack::add_remote_link_damage(const std::string &path, inodeno_t ino, + ScrubHeaderRef &header) { + CInode* remote_inode = mdcache->get_inode(ino); + std::string head_path = ""; + if (remote_inode) { + remote_inode->make_path_string(head_path); + } + mdcache->mds->damage_table.notify_remote_link_damaged(ino, path, head_path); + header->inc_scrubbed_remote_link_count(); +} + +class C_RemoteInodeOpened : public MDSInternalContext { +public: + ScrubStack *stack; + CDentry *dn; + ScrubHeaderRef header; + inodeno_t ino; + MDCache* mdcache; + C_RemoteInodeOpened(MDSRank *mds, ScrubStack *stack_, + ScrubHeaderRef &header_, CDentry *dn_, inodeno_t ino_) + : MDSInternalContext(mds), stack(stack_), header(header_), dn(dn_), + ino(ino_), mdcache(stack_->mdcache) { + stack->scrubs_in_progress++; + header->inc_num_pending(); + dn->get(MDSCacheObject::PIN_SCRUBQUEUE); + } + void finish(int r) override { + std::string path; + CDir *dir = dn->get_dir(); + CInode *remote_inode = nullptr; + + stack->scrubs_in_progress--; + CDentry::linkage_t *dnl = dn->get_projected_linkage(); + if (r < 0 || !(dnl->is_remote() && dnl->get_remote_ino() == ino)) { + goto safe_exit; + } + remote_inode = mds->mdcache->get_inode(dnl->get_remote_ino()); + if (!remote_inode) { + std::string path; + if (dir) { + dir->get_inode()->make_path_string(path); + path += "/"; + path += dn->get_name(); + } + stack->add_remote_link_damage(path, ino, header); + goto safe_exit; + } + stack->_enqueue(remote_inode, header, true, nullptr, + {std::make_pair(std::move(path), ino)}); + stack->kick_off_scrubs(); + safe_exit: + dn->put(MDSCacheObject::PIN_SCRUBQUEUE); + header->dec_num_pending(); + } +}; + +CInode *ScrubStack::remote_link_checkup(CDentry *dn, ScrubHeaderRef &header) { + + CDentry::linkage_t *dnl = dn->get_linkage(); + CInode *remote_inode = mdcache->get_inode(dnl->get_remote_ino()); + if (!remote_inode) { + if (mdcache->mds->damage_table.is_remote_damaged(dnl->get_remote_ino())) { + dout(4) << "scrub: remote dentry points to damaged ino " << *dn << dendl; + std::string path; + dn->get_dir()->get_inode()->make_path_string(path); + path += "/"; + path += dn->get_name(); + mdcache->mds->damage_table.notify_remote_link_damaged( + dnl->get_remote_ino(), path); + return nullptr; + } + MDSContext *ctx = + (!header->get_repair() && g_conf()->mds_force_hard_link_scrubbing) + ? (MDSContext *)(new C_RemoteInodeOpened( + mdcache->mds, this, header, dn, dnl->get_remote_ino())) + : (MDSContext *)(new C_MDSInternalNoop()); + + mdcache->open_remote_dentry(dn, true, ctx); + } + return remote_inode; +} + +void ScrubStack::scrub_dirfrag(CDir *dir, bool *added_children, bool *done) { ceph_assert(dir != NULL); - dout(10) << __func__ << " " << *dir << dendl; if (!dir->is_complete()) { @@ -455,9 +572,19 @@ void ScrubStack::scrub_dirfrag(CDir *dir, bool *done) continue; } if (dnl->is_primary()) { - _enqueue(dnl->get_inode(), header, false); + _enqueue(dnl->get_inode(), header, true, added_children); } else if (dnl->is_remote()) { - // TODO: check remote linkage + auto remote_ino = dnl->get_remote_ino(); + CInode *remote_inode = remote_link_checkup(dn, header); + if (remote_inode && !header->get_repair() && + g_conf()->mds_force_hard_link_scrubbing) { + std::string remote_path; + dir->get_inode()->make_path_string(remote_path); + remote_path += "/"; + remote_path += dn->get_name(); + _enqueue(remote_inode, header, true, added_children, + {std::make_pair(std::move(remote_path), remote_ino)}); + } } } } @@ -479,6 +606,22 @@ void ScrubStack::scrub_dirfrag(CDir *dir, bool *done) void ScrubStack::scrub_file_inode(CInode *in) { + ScrubHeaderRef header = in->scrub_info()->header; + if (!in->scrub_info()->forward_scrub && + !in->scrub_info()->remote_links.empty()) { + auto parent = in->get_projected_parent_dn(); + if (mdcache->mds->damage_table.is_remote_damaged(in->ino()) || + (parent && mdcache->mds->damage_table.is_dentry_damaged( + parent->get_dir(), parent->get_name(), parent->last))) { + for (auto &[remote_link_path, remote_ino] : + in->scrub_info()->remote_links) { + add_remote_link_damage(remote_link_path, remote_ino, header); + } + in->scrub_finished(); + return; + } + } + C_InodeValidated *fin = new C_InodeValidated(mdcache->mds, this, in); // At this stage the DN is already past scrub_initialize, so // it's in the cache, it has PIN_SCRUBQUEUE and it is authpinned @@ -489,7 +632,7 @@ void ScrubStack::_validate_inode_done(CInode *in, int r, const CInode::validated_data &result) { LogChannelRef clog = mdcache->mds->clog; - const ScrubHeaderRefConst header = in->scrub_info()->header; + ScrubHeaderRef header = in->scrub_info()->header; std::string path; if (!result.passed_validation) { @@ -537,7 +680,34 @@ void ScrubStack::_validate_inode_done(CInode *in, int r, dout(10) << __func__ << " scrub passed on inode " << *in << dendl; } - in->scrub_finished(); + if (!in->scrub_info()->remote_links.empty()) { + if (!result.passed_validation) { + for (auto &[remote_link_path, remote_ino] : + in->scrub_info()->remote_links) { + add_remote_link_damage(remote_link_path, remote_ino, header); + } + } else { + CDentry *pdn = in->get_parent_dn(); + if (pdn) { + CInode *diri = pdn->get_dir()->get_inode(); + _enqueue(diri, header, true, nullptr, + std::move(in->scrub_move_remote_links())); + } else { + header->inc_scrubbed_remote_link_count( + in->scrub_info()->remote_links.size()); + } + } + } + in->scrub_reset_remote_links(); + + if (in->scrub_info()->forward_scrub && in->is_dir()) { + in->scrub_finished(); + _enqueue(in, header, true); + } else { + in->scrub_finished(); + } + + header->inc_scrubbed_inode_count(); } void ScrubStack::complete_control_contexts(int r) { @@ -637,7 +807,8 @@ void ScrubStack::scrub_status(Formatter *f) { if (scrubbing_map.empty()) *css << "no active scrubs running"; else - *css << state << " (waiting for more scrubs)"; + *css << state << " (waiting for more scrubs, " << stack_size + << "inodes in the stack)"; } else if (state == STATE_RUNNING) { if (clear_stack) { *css << "ABORTING"; @@ -676,6 +847,10 @@ void ScrubStack::scrub_status(Formatter *f) { f->dump_stream("path") << "#" << header->get_origin(); f->dump_string("tag", header->get_tag()); + f->dump_unsigned("scrubbed_inode_count", + header->get_scrubbed_inode_count()); + f->dump_unsigned("scrubbed_remote_link_count", + header->get_scrubbed_remote_link_count()); CachedStackStringStream optcss; if (header->get_recursive()) { diff --git a/src/mds/ScrubStack.h b/src/mds/ScrubStack.h index 756ebd9cb0e95..85412b07b39c4 100644 --- a/src/mds/ScrubStack.h +++ b/src/mds/ScrubStack.h @@ -154,8 +154,13 @@ class ScrubStack { friend std::ostream &operator<<(std::ostream &os, const State &state); friend class C_InodeValidated; + friend class C_RemoteInodeOpened; + friend class C_RemoteLinkCheckFinished; - int _enqueue(MDSCacheObject *obj, ScrubHeaderRef& header, bool top); + int _enqueue( + MDSCacheObject *obj, ScrubHeaderRef &header, bool top, + bool *added = nullptr, + std::vector> &&remote_links = {}); /** * Remove the inode/dirfrag from the stack. */ @@ -188,6 +193,12 @@ class ScrubStack { */ void scrub_file_inode(CInode *in); + /** + * Scrub a file inode. + * @param dn The remote dentry to identify + */ + CInode *remote_link_checkup(CDentry *dn, ScrubHeaderRef &header); + /** * Callback from completion of CInode::validate_disk_state * @param in The inode we were validating @@ -211,9 +222,10 @@ class ScrubStack { * scrub of the dirfrag. * * @param dir The dirfrag to scrub (must be auth) + * @param added_children set to true if we pushed some of our children * @param done set to true if we started to do final scrub */ - void scrub_dirfrag(CDir *dir, bool *done); + void scrub_dirfrag(CDir *dir, bool *added_children, bool *done); /** * Scrub a directory-representing dentry. * @@ -267,6 +279,8 @@ class ScrubStack { void handle_scrub(const cref_t &m); void handle_scrub_stats(const cref_t &m); + void add_remote_link_damage(const std::string &path, inodeno_t ino, + ScrubHeaderRef &header); State state = STATE_IDLE; bool clear_stack = false;