diff options
Diffstat (limited to 'src/mds/MDSRank.cc')
-rw-r--r-- | src/mds/MDSRank.cc | 314 |
1 files changed, 152 insertions, 162 deletions
diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 91e7d4a7d55..52ed930d71b 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -12,6 +12,7 @@ * */ +#include <array> #include <string_view> #include <typeinfo> #include "common/debug.h" @@ -70,7 +71,7 @@ public: } void send() { - ceph_assert(ceph_mutex_is_locked(mds->mds_lock)); + ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock)); dout(20) << __func__ << dendl; @@ -96,11 +97,12 @@ private: // I need to seal off the current segment, and then mark all // previous segments for expiry - auto sle = mdcache->create_subtree_map(); + auto* sle = mdcache->create_subtree_map(); mdlog->submit_entry(sle); + seq = sle->get_seq(); Context *ctx = new LambdaContext([this](int r) { - handle_flush_mdlog(r); + handle_clear_mdlog(r); }); // Flush initially so that all the segments older than our new one @@ -109,34 +111,8 @@ private: mdlog->wait_for_safe(new MDSInternalContextWrapper(mds, ctx)); } - void handle_flush_mdlog(int r) { - dout(20) << __func__ << ": r=" << r << dendl; - - if (r != 0) { - *ss << "Error " << r << " (" << cpp_strerror(r) << ") while flushing journal"; - complete(r); - return; - } - - clear_mdlog(); - } - - void clear_mdlog() { - dout(20) << __func__ << dendl; - - Context *ctx = new LambdaContext([this](int r) { - handle_clear_mdlog(r); - }); - - // Because we may not be the last wait_for_safe context on MDLog, - // and subsequent contexts might wake up in the middle of our - // later trim_all and interfere with expiry (by e.g. marking - // dirs/dentries dirty on previous log segments), we run a second - // wait_for_safe here. See #10368 - mdlog->wait_for_safe(new MDSInternalContextWrapper(mds, ctx)); - } - void handle_clear_mdlog(int r) { + ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock)); dout(20) << __func__ << ": r=" << r << dendl; if (r != 0) { @@ -152,7 +128,7 @@ private: // Put all the old log segments into expiring or expired state dout(5) << __func__ << ": beginning segment expiry" << dendl; - int ret = mdlog->trim_all(); + int ret = mdlog->trim_to(seq); if (ret != 0) { *ss << "Error " << ret << " (" << cpp_strerror(ret) << ") while trimming log"; complete(ret); @@ -176,60 +152,38 @@ private: << " segments to expire" << dendl; if (!expiry_gather.has_subs()) { - trim_segments(); + trim_expired_segments(); return; } - Context *ctx = new LambdaContext([this](int r) { - handle_expire_segments(r); - }); + /* Because this context may be finished with the MDLog::submit_mutex held, + * complete it in the MDS finisher thread. + */ + Context *ctx = new C_OnFinisher(new LambdaContext([this,mds=mds](int r) { + ceph_assert(r == 0); // MDLog is not allowed to raise errors via + // wait_for_expiry + std::lock_guard locker(mds->mds_lock); + trim_expired_segments(); + }), mds->finisher); expiry_gather.set_finisher(new MDSInternalContextWrapper(mds, ctx)); expiry_gather.activate(); } - void handle_expire_segments(int r) { - dout(20) << __func__ << ": r=" << r << dendl; - - ceph_assert(r == 0); // MDLog is not allowed to raise errors via - // wait_for_expiry - trim_segments(); - } - - void trim_segments() { - dout(20) << __func__ << dendl; - - Context *ctx = new C_OnFinisher(new LambdaContext([this](int) { - std::lock_guard locker(mds->mds_lock); - trim_expired_segments(); - }), mds->finisher); - ctx->complete(0); - } - void trim_expired_segments() { + ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock)); dout(5) << __func__ << ": expiry complete, expire_pos/trim_pos is now " << std::hex << mdlog->get_journaler()->get_expire_pos() << "/" << mdlog->get_journaler()->get_trimmed_pos() << dendl; // Now everyone I'm interested in is expired - mdlog->trim_expired_segments(); + auto* ctx = new MDSInternalContextWrapper(mds, new LambdaContext([this](int r) { + handle_write_head(r); + })); + mdlog->trim_expired_segments(ctx); - dout(5) << __func__ << ": trim complete, expire_pos/trim_pos is now " + dout(5) << __func__ << ": trimming is complete; wait for journal head write. Journal expire_pos/trim_pos is now " << std::hex << mdlog->get_journaler()->get_expire_pos() << "/" << mdlog->get_journaler()->get_trimmed_pos() << dendl; - - write_journal_head(); - } - - void write_journal_head() { - dout(20) << __func__ << dendl; - - Context *ctx = new LambdaContext([this](int r) { - std::lock_guard locker(mds->mds_lock); - handle_write_head(r); - }); - // Flush the journal header so that readers will start from after - // the flushed region - mdlog->get_journaler()->write_head(ctx); } void handle_write_head(int r) { @@ -243,12 +197,17 @@ private: } void finish(int r) override { + /* We don't need the mds_lock but MDLog::write_head takes an MDSContext so + * we are expected to have it. + */ + ceph_assert(ceph_mutex_is_locked_by_me(mds->mds_lock)); dout(20) << __func__ << ": r=" << r << dendl; on_finish->complete(r); } MDCache *mdcache; MDLog *mdlog; + SegmentBoundary::seq_t seq = 0; std::ostream *ss; Context *on_finish; @@ -537,7 +496,7 @@ MDSRank::MDSRank( objecter->unset_honor_pool_full(); - finisher = new Finisher(cct, "MDSRank", "MR_Finisher"); + finisher = new Finisher(cct, "MDSRank", "mds-rank-fin"); mdcache = new MDCache(this, purge_queue); mdlog = new MDLog(this); @@ -622,7 +581,7 @@ void MDSRankDispatcher::init() // who is interested in it. handle_osd_map(); - progress_thread.create("mds_rank_progr"); + progress_thread.create("mds-rank-progr"); purge_queue.init(); @@ -781,8 +740,10 @@ void MDSRankDispatcher::tick() } } - if (whoami == 0) + if (whoami == 0) { scrubstack->advance_scrub_status(); + scrubstack->purge_old_scrub_counters(); + } } if (is_active() || is_stopping()) { @@ -2808,6 +2769,9 @@ void MDSRankDispatcher::handle_asok_command( if (!op_tracker.dump_historic_ops(f, true)) { *css << "op_tracker disabled; set mds_enable_op_tracker=true to enable"; } + } else if (command == "dump_export_states") { + std::lock_guard l(mds_lock); + mdcache->migrator->dump_export_states(f); } else if (command == "osdmap barrier") { int64_t target_epoch = 0; bool got_val = cmd_getval(cmdmap, "target_epoch", target_epoch); @@ -2964,6 +2928,15 @@ void MDSRankDispatcher::handle_asok_command( command_scrub_resume(f); } else if (command == "scrub status") { command_scrub_status(f); + } else if (command == "scrub purge_status") { + if (whoami != 0) { + *css << "Not rank 0"; + r = -CEPHFS_EXDEV; + goto out; + } + string tag; + cmd_getval(cmdmap, "tag", tag); + command_scrub_purge_status(tag); } else if (command == "tag path") { if (whoami != 0) { *css << "Not rank 0"; @@ -3137,7 +3110,7 @@ void MDSRankDispatcher::evict_clients( dout(20) << __func__ << " matched " << victims.size() << " sessions" << dendl; if (victims.empty()) { - on_finish(-ESRCH, "no hosts match", outbl); + on_finish(0, "no hosts match", outbl); return; } @@ -3224,6 +3197,11 @@ void MDSRank::command_scrub_status(Formatter *f) { scrubstack->scrub_status(f); } +void MDSRank::command_scrub_purge_status(std::string_view tag) { + std::lock_guard l(mds_lock); + scrubstack->purge_scrub_counters(tag); +} + void MDSRank::command_get_subtrees(Formatter *f) { ceph_assert(f != NULL); @@ -4053,95 +4031,106 @@ epoch_t MDSRank::get_osd_epoch() const const char** MDSRankDispatcher::get_tracked_conf_keys() const { - static const char* KEYS[] = { - "clog_to_graylog", - "clog_to_graylog_host", - "clog_to_graylog_port", - "clog_to_monitors", - "clog_to_syslog", - "clog_to_syslog_facility", - "clog_to_syslog_level", - "fsid", - "host", - "mds_alternate_name_max", - "mds_bal_export_pin", - "mds_bal_fragment_dirs", - "mds_bal_fragment_fast_factor", - "mds_bal_fragment_interval", - "mds_bal_fragment_size_max", - "mds_bal_interval", - "mds_bal_max", - "mds_bal_max_until", - "mds_bal_merge_size", - "mds_bal_mode", - "mds_bal_replicate_threshold", - "mds_bal_sample_interval", - "mds_bal_split_bits", - "mds_bal_split_rd", - "mds_bal_split_size", - "mds_bal_split_wr", - "mds_bal_unreplicate_threshold", - "mds_cache_memory_limit", - "mds_cache_mid", - "mds_cache_reservation", - "mds_cache_quiesce_decay_rate", - "mds_cache_quiesce_threshold", - "mds_cache_quiesce_sleep", - "mds_cache_trim_decay_rate", - "mds_cap_acquisition_throttle_retry_request_time", - "mds_cap_revoke_eviction_timeout", - "mds_debug_subtrees", - "mds_dir_max_entries", - "mds_dump_cache_threshold_file", - "mds_server_dispatch_client_request_delay", - "mds_server_dispatch_killpoint_random", - "mds_dump_cache_threshold_formatter", - "mds_enable_op_tracker", - "mds_export_ephemeral_distributed", - "mds_export_ephemeral_random", - "mds_export_ephemeral_random_max", - "mds_extraordinary_events_dump_interval", - "mds_forward_all_requests_to_auth", - "mds_health_cache_threshold", - "mds_heartbeat_grace", - "mds_heartbeat_reset_grace", - "mds_inject_journal_corrupt_dentry_first", - "mds_inject_migrator_session_race", - "mds_inject_rename_corrupt_dentry_first", - "mds_kill_dirfrag_at", - "mds_kill_shutdown_at", - "mds_log_event_large_threshold", - "mds_log_events_per_segment", - "mds_log_major_segment_event_ratio", - "mds_log_max_events", - "mds_log_max_segments", - "mds_log_pause", - "mds_log_skip_corrupt_events", - "mds_log_skip_unbounded_events", - "mds_max_caps_per_client", - "mds_max_export_size", - "mds_max_purge_files", - "mds_max_purge_ops", - "mds_max_purge_ops_per_pg", - "mds_max_snaps_per_dir", - "mds_op_complaint_time", - "mds_op_history_duration", - "mds_op_history_size", - "mds_op_log_threshold", - "mds_recall_max_decay_rate", - "mds_recall_warning_decay_rate", - "mds_request_load_average_decay_rate", - "mds_session_cache_liveness_decay_rate", - "mds_session_cap_acquisition_decay_rate", - "mds_session_cap_acquisition_throttle", - "mds_session_max_caps_throttle_ratio", - "mds_symlink_recovery", - "mds_session_metadata_threshold", - "mds_log_trim_threshold", - "mds_log_trim_decay_rate", - NULL - }; - return KEYS; +#define KEYS \ + "clog_to_graylog", \ + "clog_to_graylog_host", \ + "clog_to_graylog_port", \ + "clog_to_monitors", \ + "clog_to_syslog", \ + "clog_to_syslog_facility", \ + "clog_to_syslog_level", \ + "fsid", \ + "host", \ + "mds_alternate_name_max", \ + "mds_bal_export_pin", \ + "mds_bal_fragment_dirs", \ + "mds_bal_fragment_fast_factor", \ + "mds_bal_fragment_interval", \ + "mds_bal_fragment_size_max", \ + "mds_bal_interval", \ + "mds_bal_max", \ + "mds_bal_max_until", \ + "mds_bal_merge_size", \ + "mds_bal_mode", \ + "mds_bal_replicate_threshold", \ + "mds_bal_sample_interval", \ + "mds_bal_split_bits", \ + "mds_bal_split_rd", \ + "mds_bal_split_size", \ + "mds_bal_split_wr", \ + "mds_bal_unreplicate_threshold", \ + "mds_cache_memory_limit", \ + "mds_cache_mid", \ + "mds_cache_quiesce_decay_rate", \ + "mds_cache_quiesce_sleep", \ + "mds_cache_quiesce_threshold", \ + "mds_cache_reservation", \ + "mds_cache_trim_decay_rate", \ + "mds_cap_acquisition_throttle_retry_request_time", \ + "mds_cap_revoke_eviction_timeout", \ + "mds_debug_subtrees", \ + "mds_dir_max_entries", \ + "mds_dump_cache_threshold_file", \ + "mds_dump_cache_threshold_formatter", \ + "mds_enable_op_tracker", \ + "mds_export_ephemeral_distributed", \ + "mds_export_ephemeral_random", \ + "mds_export_ephemeral_random_max", \ + "mds_extraordinary_events_dump_interval", \ + "mds_forward_all_requests_to_auth", \ + "mds_health_cache_threshold", \ + "mds_heartbeat_grace", \ + "mds_heartbeat_reset_grace", \ + "mds_inject_journal_corrupt_dentry_first", \ + "mds_inject_migrator_session_race", \ + "mds_inject_rename_corrupt_dentry_first", \ + "mds_kill_dirfrag_at", \ + "mds_kill_shutdown_at", \ + "mds_log_event_large_threshold", \ + "mds_log_events_per_segment", \ + "mds_log_major_segment_event_ratio", \ + "mds_log_max_events", \ + "mds_log_max_segments", \ + "mds_log_pause", \ + "mds_log_skip_corrupt_events", \ + "mds_log_skip_unbounded_events", \ + "mds_log_trim_decay_rate", \ + "mds_log_trim_threshold", \ + "mds_max_caps_per_client", \ + "mds_max_export_size", \ + "mds_max_purge_files", \ + "mds_max_purge_ops", \ + "mds_max_purge_ops_per_pg", \ + "mds_max_snaps_per_dir", \ + "mds_op_complaint_time", \ + "mds_op_history_duration", \ + "mds_op_history_size", \ + "mds_op_log_threshold", \ + "mds_recall_max_decay_rate", \ + "mds_recall_warning_decay_rate", \ + "mds_request_load_average_decay_rate", \ + "mds_server_dispatch_client_request_delay", \ + "mds_server_dispatch_killpoint_random", \ + "mds_session_cache_liveness_decay_rate", \ + "mds_session_cap_acquisition_decay_rate", \ + "mds_session_cap_acquisition_throttle", \ + "mds_session_max_caps_throttle_ratio", \ + "mds_session_metadata_threshold", \ + "mds_symlink_recovery" + + constexpr bool is_sorted = [] () constexpr { + constexpr auto arr = std::to_array<std::string_view>({KEYS}); + for (unsigned long i = 0; i < arr.size()-1; ++i) { + if (arr[i] > arr[i+1]) { + return false; + } + } + return true; + }(); + static_assert(is_sorted, "keys are not sorted!"); + + static char const* keys[] = {KEYS, nullptr}; + return keys; } void MDSRankDispatcher::handle_conf_change(const ConfigProxy& conf, const std::set<std::string>& changed) @@ -4221,6 +4210,7 @@ void MDSRankDispatcher::handle_conf_change(const ConfigProxy& conf, const std::s mdcache->handle_conf_change(changed, *mdsmap); mdlog->handle_conf_change(changed, *mdsmap); purge_queue.handle_conf_change(changed, *mdsmap); + scrubstack->handle_conf_change(changed); })); } |