diff options
Diffstat (limited to 'src/mon')
-rw-r--r-- | src/mon/FSCommands.cc | 5 | ||||
-rw-r--r-- | src/mon/MDSMonitor.cc | 8 | ||||
-rw-r--r-- | src/mon/MonCommands.h | 5 | ||||
-rw-r--r-- | src/mon/MonMap.cc | 11 | ||||
-rw-r--r-- | src/mon/Monitor.cc | 44 | ||||
-rw-r--r-- | src/mon/Monitor.h | 5 | ||||
-rw-r--r-- | src/mon/MonmapMonitor.cc | 36 | ||||
-rwxr-xr-x | src/mon/NVMeofGwMap.cc | 84 | ||||
-rwxr-xr-x | src/mon/NVMeofGwMap.h | 5 | ||||
-rw-r--r-- | src/mon/NVMeofGwMon.cc | 20 | ||||
-rw-r--r-- | src/mon/NVMeofGwMon.h | 2 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 61 | ||||
-rw-r--r-- | src/mon/OSDMonitor.h | 14 |
13 files changed, 289 insertions, 11 deletions
diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 6220a357ff0..cc53d2869f7 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -1211,6 +1211,11 @@ class RemoveFilesystemHandler : public FileSystemCommandHandler fsmap.erase_filesystem(fsp->get_fscid()); + ss << "If there are active snapshot schedules associated with this " + << "file-system, you might see EIO errors in the mgr logs or at the " + << "snap-schedule command-line due to the missing file-system. " + << "However, these errors are transient and will get auto-resolved."; + return 0; } }; diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index d8cca4ceb61..f742303c6e9 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -758,6 +758,14 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) if (state == MDSMap::STATE_DNE) { dout(1) << __func__ << ": DNE from " << info << dendl; + + /* send a beacon reply so MDSDaemon::suicide() finishes the + Beacon::send_and_wait() call */ + auto beacon = make_message<MMDSBeacon>(mon.monmap->fsid, + m->get_global_id(), m->get_name(), get_fsmap().get_epoch(), + m->get_state(), m->get_seq(), CEPH_FEATURES_SUPPORTED_DEFAULT); + mon.send_reply(op, beacon.detach()); + goto evict; } diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 1a5d1ebd737..5564042eaf7 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -558,6 +558,11 @@ COMMAND("mon enable_stretch_mode " \ "as the tiebreaker and setting <dividing_bucket> locations " "as the units for stretching across", "mon", "rw") +COMMAND("mon disable_stretch_mode " \ + "name=crush_rule,type=CephString,req=false, " + "name=yes_i_really_mean_it,type=CephBool,req=false, ", + "disable stretch mode, reverting to normal peering rules", + "mon", "rw") COMMAND("mon set_new_tiebreaker " \ "name=name,type=CephString " "name=yes_i_really_mean_it,type=CephBool,req=false", diff --git a/src/mon/MonMap.cc b/src/mon/MonMap.cc index 6eb37df171a..8d0540d71f2 100644 --- a/src/mon/MonMap.cc +++ b/src/mon/MonMap.cc @@ -196,7 +196,12 @@ void MonMap::encode(ceph::buffer::list& blist, uint64_t con_features) const if (!HAVE_FEATURE(con_features, MONENC) || !HAVE_FEATURE(con_features, SERVER_NAUTILUS)) { for (auto& [name, info] : mon_info) { - legacy_mon_addr[name] = info.public_addrs.legacy_addr(); + // see note in mon_info_t::encode() + auto addr = info.public_addrs.legacy_addr(); + if (addr == entity_addr_t()) { + addr = info.public_addrs.as_legacy_addr(); + } + legacy_mon_addr[name] = addr; } } @@ -431,10 +436,10 @@ void MonMap::dump(Formatter *f) const f->dump_unsigned("min_mon_release", to_integer<unsigned>(min_mon_release)); f->dump_string("min_mon_release_name", to_string(min_mon_release)); f->dump_int ("election_strategy", strategy); - f->dump_stream("disallowed_leaders: ") << disallowed_leaders; + f->dump_stream("disallowed_leaders") << disallowed_leaders; f->dump_bool("stretch_mode", stretch_mode_enabled); f->dump_string("tiebreaker_mon", tiebreaker_mon); - f->dump_stream("removed_ranks: ") << removed_ranks; + f->dump_stream("removed_ranks") << removed_ranks; f->open_object_section("features"); persistent_features.dump(f, "persistent"); optional_features.dump(f, "optional"); diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 0d25c4b96ac..833bdddc71b 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -4024,7 +4024,7 @@ void Monitor::handle_command(MonOpRequestRef op) for (auto& p : mgrstatmon()->get_service_map().services) { auto &service = p.first; - if (ServiceMap::is_normal_ceph_entity(service)) { + if (ServiceMap::is_normal_ceph_entity(service) || service == "nvmeof") { continue; } f->open_object_section(service.c_str()); @@ -5675,10 +5675,13 @@ void Monitor::handle_scrub(MonOpRequestRef op) if (scrub_result.size() == quorum.size()) { scrub_check_results(); scrub_result.clear(); - if (scrub_state->finished) + if (scrub_state->finished) { + const utime_t lat = ceph_clock_now() - scrub_state->start; + dout(10) << __func__ << " mon scrub latency: " << lat << dendl; scrub_finish(); - else + } else { scrub(); + } } } break; @@ -6688,6 +6691,8 @@ void Monitor::notify_new_monmap(bool can_change_external_state, bool remove_rank if (monmap->stretch_mode_enabled) { try_engage_stretch_mode(); + } else { + try_disable_stretch_mode(); } if (is_stretch_mode()) { @@ -6746,6 +6751,32 @@ void Monitor::try_engage_stretch_mode() disconnect_disallowed_stretch_sessions(); } } +struct CMonDisableStretchMode : public Context { + Monitor *m; + CMonDisableStretchMode(Monitor *mon) : m(mon) {} + void finish(int r) { + m->try_disable_stretch_mode(); + } +}; +void Monitor::try_disable_stretch_mode() +{ + dout(20) << __func__ << dendl; + if (!stretch_mode_engaged) return; + if (!osdmon()->is_readable()) { + dout(20) << "osdmon is not readable" << dendl; + osdmon()->wait_for_readable_ctx(new CMonDisableStretchMode(this)); + return; + } + if (!osdmon()->osdmap.stretch_mode_enabled && + !monmap->stretch_mode_enabled) { + dout(10) << "Disabling stretch mode!" << dendl; + stretch_mode_engaged = false; + stretch_bucket_divider.clear(); + degraded_stretch_mode = false; + recovering_stretch_mode = false; + } + +} void Monitor::do_stretch_mode_election_work() { @@ -6802,6 +6833,7 @@ struct CMonGoRecovery : public Context { void Monitor::go_recovery_stretch_mode() { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; dout(20) << "is_leader(): " << is_leader() << dendl; if (!is_leader()) return; dout(20) << "is_degraded_stretch_mode(): " << is_degraded_stretch_mode() << dendl; @@ -6832,6 +6864,7 @@ void Monitor::go_recovery_stretch_mode() void Monitor::set_recovery_stretch_mode() { + if (!is_stretch_mode()) return; degraded_stretch_mode = true; recovering_stretch_mode = true; osdmon()->set_recovery_stretch_mode(); @@ -6840,6 +6873,7 @@ void Monitor::set_recovery_stretch_mode() void Monitor::maybe_go_degraded_stretch_mode() { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; if (is_degraded_stretch_mode()) return; if (!is_leader()) return; if (dead_mon_buckets.empty()) return; @@ -6878,6 +6912,7 @@ void Monitor::trigger_degraded_stretch_mode(const set<string>& dead_mons, const set<int>& dead_buckets) { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; ceph_assert(osdmon()->is_writeable()); ceph_assert(monmon()->is_writeable()); @@ -6898,6 +6933,7 @@ void Monitor::trigger_degraded_stretch_mode(const set<string>& dead_mons, void Monitor::set_degraded_stretch_mode() { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; degraded_stretch_mode = true; recovering_stretch_mode = false; osdmon()->set_degraded_stretch_mode(); @@ -6915,6 +6951,7 @@ struct CMonGoHealthy : public Context { void Monitor::trigger_healthy_stretch_mode() { dout(20) << __func__ << dendl; + if (!is_stretch_mode()) return; if (!is_degraded_stretch_mode()) return; if (!is_leader()) return; if (!osdmon()->is_writeable()) { @@ -6935,6 +6972,7 @@ void Monitor::trigger_healthy_stretch_mode() void Monitor::set_healthy_stretch_mode() { + if (!is_stretch_mode()) return; degraded_stretch_mode = false; recovering_stretch_mode = false; osdmon()->set_healthy_stretch_mode(); diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 90fbc8f09c0..557edbf2eb4 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -293,6 +293,7 @@ public: * updates across the entire cluster. */ void try_engage_stretch_mode(); + void try_disable_stretch_mode(); void maybe_go_degraded_stretch_mode(); void trigger_degraded_stretch_mode(const std::set<std::string>& dead_mons, const std::set<int>& dead_buckets); @@ -341,8 +342,10 @@ private: struct ScrubState { std::pair<std::string,std::string> last_key; ///< last scrubbed key bool finished; + const utime_t start; - ScrubState() : finished(false) { } + ScrubState() : finished(false), + start(ceph_clock_now()) { } virtual ~ScrubState() { } }; std::shared_ptr<ScrubState> scrub_state; ///< keeps track of current scrub diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 1226c8a8241..732238f4358 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -1187,6 +1187,42 @@ bool MonmapMonitor::prepare_command(MonOpRequestRef op) ceph_assert(okay == true); } request_proposal(mon.osdmon()); + } else if (prefix == "mon disable_stretch_mode") { + if (!mon.osdmon()->is_writeable()) { + dout(10) << __func__ + << ": waiting for osdmon writeable for stretch mode" << dendl; + mon.osdmon()->wait_for_writeable(op, new Monitor::C_RetryMessage(&mon, op)); + return false; /* do not propose, yet */ + } + bool sure = false; + bool okay = false; + int errcode = 0; + if (!pending_map.stretch_mode_enabled) { + ss << "stretch mode is already disabled"; + err = -EINVAL; + goto reply_no_propose; + } + cmd_getval(cmdmap, "yes_i_really_mean_it", sure); + if (!sure) { + ss << " This command will disable stretch mode, " + "which means all your pools will be reverted back " + "to the default size, min_size and crush_rule. " + "Pass --yes-i-really-mean-it to proceed."; + err = -EPERM; + goto reply_no_propose; + } + string crush_rule = cmd_getval_or<string>(cmdmap, "crush_rule", string{}); + mon.osdmon()->try_disable_stretch_mode(ss, &okay, &errcode, crush_rule); + if (!okay) { + err = errcode; + goto reply_no_propose; + } + pending_map.stretch_mode_enabled = false; + pending_map.tiebreaker_mon = ""; + pending_map.disallowed_leaders.clear(); + pending_map.stretch_marked_down_mons.clear(); + pending_map.last_changed = ceph_clock_now(); + request_proposal(mon.osdmon()); } else { ss << "unknown command " << prefix; err = -EINVAL; diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 9fda03b4905..2d2735f1e7c 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -16,7 +16,9 @@ #include "NVMeofGwMon.h" #include "NVMeofGwMap.h" #include "OSDMonitor.h" +#include "mon/health_check.h" +using std::list; using std::map; using std::make_pair; using std::ostream; @@ -169,6 +171,8 @@ int NVMeofGwMap::cfg_delete_gw( << state.availability << " Resulting GW availability: " << state.availability << dendl; state.subsystems.clear();//ignore subsystems of this GW + utime_t now = ceph_clock_now(); + mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now; return 0; } } @@ -893,6 +897,86 @@ struct CMonRequestProposal : public Context { } }; +void NVMeofGwMap::get_health_checks(health_check_map_t *checks) +{ + list<string> singleGatewayDetail; + list<string> gatewayDownDetail; + list<string> gatewayInDeletingDetail; + int deleting_gateways = 0; + for (const auto& created_map_pair: created_gws) { + const auto& group_key = created_map_pair.first; + auto& group = group_key.second; + const NvmeGwMonStates& gw_created_map = created_map_pair.second; + if ( gw_created_map.size() == 1) { + ostringstream ss; + ss << "NVMeoF Gateway Group '" << group << "' has 1 gateway." ; + singleGatewayDetail.push_back(ss.str()); + } + for (const auto& gw_created_pair: gw_created_map) { + const auto& gw_id = gw_created_pair.first; + const auto& gw_created = gw_created_pair.second; + if (gw_created.availability == gw_availability_t::GW_UNAVAILABLE) { + ostringstream ss; + ss << "NVMeoF Gateway '" << gw_id << "' is unavailable." ; + gatewayDownDetail.push_back(ss.str()); + } else if (gw_created.availability == gw_availability_t::GW_DELETING) { + deleting_gateways++; + utime_t now = ceph_clock_now(); + bool found_deleting_time = false; + auto gws_deleting_time = mon->nvmegwmon()->gws_deleting_time; + auto group_it = gws_deleting_time.find(group_key); + if (group_it != gws_deleting_time.end()) { + auto& gw_map = group_it->second; + auto gw_it = gw_map.find(gw_id); + if (gw_it != gw_map.end()) { + found_deleting_time = true; + utime_t delete_time = gw_it->second; + if ((now - delete_time) > g_conf().get_val<std::chrono::seconds>("mon_nvmeofgw_delete_grace").count()) { + ostringstream ss; + ss << "NVMeoF Gateway '" << gw_id << "' is in deleting state."; + gatewayInDeletingDetail.push_back(ss.str()); + } + } + } + if (!found_deleting_time) { + // DELETING gateway not found in gws_deleting_time, set timeout now + mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now; + } + } + } + } + if (deleting_gateways == 0) { + // no gateway in GW_DELETING state currently, flush old gws_deleting_time + mon->nvmegwmon()->gws_deleting_time.clear(); + } + + if (!singleGatewayDetail.empty()) { + ostringstream ss; + ss << singleGatewayDetail.size() << " group(s) have only 1 nvmeof gateway" + << "; HA is not possible with single gateway."; + auto& d = checks->add("NVMEOF_SINGLE_GATEWAY", HEALTH_WARN, + ss.str(), singleGatewayDetail.size()); + d.detail.swap(singleGatewayDetail); + } + if (!gatewayDownDetail.empty()) { + ostringstream ss; + ss << gatewayDownDetail.size() << " gateway(s) are in unavailable state" + << "; gateway might be down, try to redeploy."; + auto& d = checks->add("NVMEOF_GATEWAY_DOWN", HEALTH_WARN, + ss.str(), gatewayDownDetail.size()); + d.detail.swap(gatewayDownDetail); + } + if (!gatewayInDeletingDetail.empty()) { + ostringstream ss; + ss << gatewayInDeletingDetail.size() << " gateway(s) are in deleting state" + << "; namespaces are automatically balanced across remaining gateways, " + << "this should take a few minutes."; + auto& d = checks->add("NVMEOF_GATEWAY_DELETING", HEALTH_WARN, + ss.str(), gatewayInDeletingDetail.size()); + d.detail.swap(gatewayInDeletingDetail); + } +} + int NVMeofGwMap::blocklist_gw( const NvmeGwId &gw_id, const NvmeGroupKey& group_key, NvmeAnaGrpId grpid, epoch_t &epoch, bool failover) diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 267d85b10f9..85fd62b3a07 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -27,6 +27,9 @@ #include "NVMeofGwTypes.h" using ceph::coarse_mono_clock; + +class health_check_map_t; + class Monitor; /*-------------------*/ class NVMeofGwMap @@ -140,6 +143,8 @@ public: decode(fsm_timers, bl); DECODE_FINISH(bl); } + + void get_health_checks(health_check_map_t *checks); }; #include "NVMeofGwSerialize.h" diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 4d2b5074b4d..c9a6b789b89 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -176,6 +176,11 @@ void NVMeofGwMon::encode_pending(MonitorDBStore::TransactionRef t) << HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA) << dendl; put_version(t, pending_map.epoch, bl); put_last_committed(t, pending_map.epoch); + + //health + health_check_map_t checks; + pending_map.get_health_checks(&checks); + encode_health(checks, t); } void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) @@ -188,6 +193,7 @@ void NVMeofGwMon::update_from_paxos(bool *need_bootstrap) bufferlist bl; int err = get_version(version, bl); ceph_assert(err == 0); + load_health(); auto p = bl.cbegin(); map.decode(p); @@ -317,6 +323,12 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op) f->dump_string("group", group); if (HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA)) { f->dump_string("features", "LB"); + if (map.created_gws[group_key].size()) { + time_t seconds_since_1970 = time(NULL); + uint32_t index = ((seconds_since_1970/60) % + map.created_gws[group_key].size()) + 1; + f->dump_unsigned("rebalance_ana_group", index); + } } f->dump_unsigned("num gws", map.created_gws[group_key].size()); if (map.created_gws[group_key].size() == 0) { @@ -609,15 +621,15 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) avail = gw_availability_t::GW_CREATED; dout(20) << "No-subsystems condition detected for GW " << gw_id <<dendl; } else { - bool listener_found = true; + bool listener_found = false; for (auto &subs: sub) { - if (subs.listeners.size() == 0) { - listener_found = false; - dout(10) << "No-listeners condition detected for GW " << gw_id << " for nqn " << subs.nqn << dendl; + if (subs.listeners.size()) { + listener_found = true; break; } } if (!listener_found) { + dout(10) << "No-listeners condition detected for GW " << gw_id << dendl; avail = gw_availability_t::GW_CREATED; } }// for HA no-subsystems and no-listeners are same usecases diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h index 7fae8b766a5..d7f5fd89cde 100644 --- a/src/mon/NVMeofGwMon.h +++ b/src/mon/NVMeofGwMon.h @@ -82,6 +82,8 @@ public: void check_subs(bool type); void check_sub(Subscription *sub); + std::map<NvmeGroupKey, std::map<NvmeGwId, utime_t>> gws_deleting_time; + private: void synchronize_last_beacon(); void process_gw_down(const NvmeGwId &gw_id, diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index ecde838a74c..69be79b3a8f 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -983,6 +983,8 @@ void OSDMonitor::update_from_paxos(bool *need_bootstrap) dout(20) << "Checking degraded stretch mode due to osd changes" << dendl; mon.maybe_go_degraded_stretch_mode(); } + } else { + mon.try_disable_stretch_mode(); } } @@ -15079,6 +15081,65 @@ void OSDMonitor::convert_pool_priorities(void) } } +void OSDMonitor::try_disable_stretch_mode(stringstream& ss, + bool *okay, + int *errcode, + const string& crush_rule) +{ + dout(20) << __func__ << dendl; + *okay = false; + if (!osdmap.stretch_mode_enabled) { + ss << "stretch mode is already disabled"; + *errcode = -EINVAL; + return; + } + if (osdmap.recovering_stretch_mode) { + ss << "stretch mode is currently recovering and cannot be disabled"; + *errcode = -EBUSY; + return; + } + for (const auto& pi : osdmap.get_pools()) { + pg_pool_t *pool = pending_inc.get_new_pool(pi.first, &pi.second); + pool->peering_crush_bucket_count = 0; + pool->peering_crush_bucket_target = 0; + pool->peering_crush_bucket_barrier = 0; + pool->peering_crush_mandatory_member = CRUSH_ITEM_NONE; + pool->size = g_conf().get_val<uint64_t>("osd_pool_default_size"); + pool->min_size = g_conf().get_osd_pool_default_min_size(pool->size); + // if crush rule is supplied, use it if it exists in crush map + if (!crush_rule.empty()) { + int crush_rule_id = osdmap.crush->get_rule_id(crush_rule); + if (crush_rule_id < 0) { + ss << "unrecognized crush rule " << crush_rule; + *errcode = -EINVAL; + return; + } + if (!osdmap.crush->rule_valid_for_pool_type(crush_rule_id, pool->get_type())) { + ss << "crush rule " << crush_rule << " type does not match pool type"; + *errcode = -EINVAL; + return; + } + if (crush_rule_id == pool->crush_rule) { + ss << "You can't disable stretch mode with the same crush rule you are using"; + *errcode = -EINVAL; + return; + } + pool->crush_rule = crush_rule_id; + } else { + // otherwise, use the default rule + pool->crush_rule = osdmap.crush->get_osd_pool_default_crush_replicated_rule(cct); + } + } + pending_inc.change_stretch_mode = true; + pending_inc.stretch_mode_enabled = false; + pending_inc.new_stretch_bucket_count = 0; + pending_inc.new_degraded_stretch_mode = 0; + pending_inc.new_stretch_mode_bucket = 0; + pending_inc.new_recovering_stretch_mode = 0; + *okay = true; + return; +} + void OSDMonitor::try_enable_stretch_mode_pools(stringstream& ss, bool *okay, int *errcode, set<pg_pool_t*>* pools, diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index ccd11be8a83..c82373c634d 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -845,6 +845,20 @@ public: const std::set<pg_pool_t*>& pools, const std::string& new_crush_rule); /** + * + * Set all stretch mode values of all pools back to pre-stretch mode values. + * Set all stretch mode values of OSDMap back to pre-stretch mode values. + * If crush_rule is not empty, set the crush rule to that value, else use + * the default replicated crush rule. + * @param ss: a stringstream to write errors into + * @param errcode: filled with -errno if there's a problem + * @param crush_rule: the crush rule that will used after disabling stretch mode + */ + void try_disable_stretch_mode(std::stringstream& ss, + bool *okay, + int *errcode, + const std::string& crush_rule); + /** * Check the input dead_buckets mapping (buckets->dead monitors) to see * if the OSDs are also down. If so, fill in really_down_buckets and * really_down_mons and return true; else return false. |