diff options
Diffstat (limited to 'src/mon')
-rw-r--r-- | src/mon/FSCommands.cc | 5 | ||||
-rw-r--r-- | src/mon/Monitor.cc | 7 | ||||
-rw-r--r-- | src/mon/Monitor.h | 4 | ||||
-rwxr-xr-x | src/mon/NVMeofGwMap.cc | 43 | ||||
-rwxr-xr-x | src/mon/NVMeofGwMap.h | 2 | ||||
-rw-r--r-- | src/mon/NVMeofGwMon.cc | 18 | ||||
-rw-r--r-- | src/mon/NVMeofGwMon.h | 5 |
7 files changed, 65 insertions, 19 deletions
diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 6220a357ff0..cc53d2869f7 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -1211,6 +1211,11 @@ class RemoveFilesystemHandler : public FileSystemCommandHandler fsmap.erase_filesystem(fsp->get_fscid()); + ss << "If there are active snapshot schedules associated with this " + << "file-system, you might see EIO errors in the mgr logs or at the " + << "snap-schedule command-line due to the missing file-system. " + << "However, these errors are transient and will get auto-resolved."; + return 0; } }; diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 03826e3dd48..7332ec3edb1 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -5675,10 +5675,13 @@ void Monitor::handle_scrub(MonOpRequestRef op) if (scrub_result.size() == quorum.size()) { scrub_check_results(); scrub_result.clear(); - if (scrub_state->finished) + if (scrub_state->finished) { + const utime_t lat = ceph_clock_now() - scrub_state->start; + dout(10) << __func__ << " mon scrub latency: " << lat << dendl; scrub_finish(); - else + } else { scrub(); + } } } break; diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 8c152fe108f..557edbf2eb4 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -342,8 +342,10 @@ private: struct ScrubState { std::pair<std::string,std::string> last_key; ///< last scrubbed key bool finished; + const utime_t start; - ScrubState() : finished(false) { } + ScrubState() : finished(false), + start(ceph_clock_now()) { } virtual ~ScrubState() { } }; std::shared_ptr<ScrubState> scrub_state; ///< keeps track of current scrub diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc index 719403925ad..2d2735f1e7c 100755 --- a/src/mon/NVMeofGwMap.cc +++ b/src/mon/NVMeofGwMap.cc @@ -171,6 +171,8 @@ int NVMeofGwMap::cfg_delete_gw( << state.availability << " Resulting GW availability: " << state.availability << dendl; state.subsystems.clear();//ignore subsystems of this GW + utime_t now = ceph_clock_now(); + mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now; return 0; } } @@ -895,10 +897,12 @@ struct CMonRequestProposal : public Context { } }; -void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const +void NVMeofGwMap::get_health_checks(health_check_map_t *checks) { list<string> singleGatewayDetail; list<string> gatewayDownDetail; + list<string> gatewayInDeletingDetail; + int deleting_gateways = 0; for (const auto& created_map_pair: created_gws) { const auto& group_key = created_map_pair.first; auto& group = group_key.second; @@ -915,9 +919,37 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const ostringstream ss; ss << "NVMeoF Gateway '" << gw_id << "' is unavailable." ; gatewayDownDetail.push_back(ss.str()); + } else if (gw_created.availability == gw_availability_t::GW_DELETING) { + deleting_gateways++; + utime_t now = ceph_clock_now(); + bool found_deleting_time = false; + auto gws_deleting_time = mon->nvmegwmon()->gws_deleting_time; + auto group_it = gws_deleting_time.find(group_key); + if (group_it != gws_deleting_time.end()) { + auto& gw_map = group_it->second; + auto gw_it = gw_map.find(gw_id); + if (gw_it != gw_map.end()) { + found_deleting_time = true; + utime_t delete_time = gw_it->second; + if ((now - delete_time) > g_conf().get_val<std::chrono::seconds>("mon_nvmeofgw_delete_grace").count()) { + ostringstream ss; + ss << "NVMeoF Gateway '" << gw_id << "' is in deleting state."; + gatewayInDeletingDetail.push_back(ss.str()); + } + } + } + if (!found_deleting_time) { + // DELETING gateway not found in gws_deleting_time, set timeout now + mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now; + } } } } + if (deleting_gateways == 0) { + // no gateway in GW_DELETING state currently, flush old gws_deleting_time + mon->nvmegwmon()->gws_deleting_time.clear(); + } + if (!singleGatewayDetail.empty()) { ostringstream ss; ss << singleGatewayDetail.size() << " group(s) have only 1 nvmeof gateway" @@ -934,6 +966,15 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const ss.str(), gatewayDownDetail.size()); d.detail.swap(gatewayDownDetail); } + if (!gatewayInDeletingDetail.empty()) { + ostringstream ss; + ss << gatewayInDeletingDetail.size() << " gateway(s) are in deleting state" + << "; namespaces are automatically balanced across remaining gateways, " + << "this should take a few minutes."; + auto& d = checks->add("NVMEOF_GATEWAY_DELETING", HEALTH_WARN, + ss.str(), gatewayInDeletingDetail.size()); + d.detail.swap(gatewayInDeletingDetail); + } } int NVMeofGwMap::blocklist_gw( diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h index 5f657733012..85fd62b3a07 100755 --- a/src/mon/NVMeofGwMap.h +++ b/src/mon/NVMeofGwMap.h @@ -144,7 +144,7 @@ public: DECODE_FINISH(bl); } - void get_health_checks(health_check_map_t *checks) const; + void get_health_checks(health_check_map_t *checks); }; #include "NVMeofGwSerialize.h" diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc index 0fe5c3e655f..c9a6b789b89 100644 --- a/src/mon/NVMeofGwMon.cc +++ b/src/mon/NVMeofGwMon.cc @@ -66,11 +66,6 @@ void NVMeofGwMon::on_shutdown() void NVMeofGwMon::tick() { - if (++tick_ratio == 10) { - global_rebalance_index++; - dout(20) << "rebalance index " << global_rebalance_index << dendl; - tick_ratio = 0; - } if (!is_active() || !mon.is_leader()) { dout(10) << "NVMeofGwMon leader : " << mon.is_leader() << "active : " << is_active() << dendl; @@ -329,8 +324,9 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op) if (HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA)) { f->dump_string("features", "LB"); if (map.created_gws[group_key].size()) { - uint32_t index = (global_rebalance_index % - map.created_gws[group_key].size()) + 1; + time_t seconds_since_1970 = time(NULL); + uint32_t index = ((seconds_since_1970/60) % + map.created_gws[group_key].size()) + 1; f->dump_unsigned("rebalance_ana_group", index); } } @@ -625,15 +621,15 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op) avail = gw_availability_t::GW_CREATED; dout(20) << "No-subsystems condition detected for GW " << gw_id <<dendl; } else { - bool listener_found = true; + bool listener_found = false; for (auto &subs: sub) { - if (subs.listeners.size() == 0) { - listener_found = false; - dout(10) << "No-listeners condition detected for GW " << gw_id << " for nqn " << subs.nqn << dendl; + if (subs.listeners.size()) { + listener_found = true; break; } } if (!listener_found) { + dout(10) << "No-listeners condition detected for GW " << gw_id << dendl; avail = gw_availability_t::GW_CREATED; } }// for HA no-subsystems and no-listeners are same usecases diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h index 2d13e153bd2..d7f5fd89cde 100644 --- a/src/mon/NVMeofGwMon.h +++ b/src/mon/NVMeofGwMon.h @@ -82,10 +82,9 @@ public: void check_subs(bool type); void check_sub(Subscription *sub); + std::map<NvmeGroupKey, std::map<NvmeGwId, utime_t>> gws_deleting_time; + private: - // used for calculate pool & group GW responsible for rebalance - uint32_t global_rebalance_index = 1; - uint8_t tick_ratio = 0; void synchronize_last_beacon(); void process_gw_down(const NvmeGwId &gw_id, const NvmeGroupKey& group_key, bool &propose_pending, |