summaryrefslogtreecommitdiffstats
path: root/src/mon
diff options
context:
space:
mode:
Diffstat (limited to 'src/mon')
-rw-r--r--src/mon/FSCommands.cc5
-rw-r--r--src/mon/Monitor.cc7
-rw-r--r--src/mon/Monitor.h4
-rwxr-xr-xsrc/mon/NVMeofGwMap.cc43
-rwxr-xr-xsrc/mon/NVMeofGwMap.h2
-rw-r--r--src/mon/NVMeofGwMon.cc18
-rw-r--r--src/mon/NVMeofGwMon.h5
7 files changed, 65 insertions, 19 deletions
diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc
index 6220a357ff0..cc53d2869f7 100644
--- a/src/mon/FSCommands.cc
+++ b/src/mon/FSCommands.cc
@@ -1211,6 +1211,11 @@ class RemoveFilesystemHandler : public FileSystemCommandHandler
fsmap.erase_filesystem(fsp->get_fscid());
+ ss << "If there are active snapshot schedules associated with this "
+ << "file-system, you might see EIO errors in the mgr logs or at the "
+ << "snap-schedule command-line due to the missing file-system. "
+ << "However, these errors are transient and will get auto-resolved.";
+
return 0;
}
};
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index 03826e3dd48..7332ec3edb1 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -5675,10 +5675,13 @@ void Monitor::handle_scrub(MonOpRequestRef op)
if (scrub_result.size() == quorum.size()) {
scrub_check_results();
scrub_result.clear();
- if (scrub_state->finished)
+ if (scrub_state->finished) {
+ const utime_t lat = ceph_clock_now() - scrub_state->start;
+ dout(10) << __func__ << " mon scrub latency: " << lat << dendl;
scrub_finish();
- else
+ } else {
scrub();
+ }
}
}
break;
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index 8c152fe108f..557edbf2eb4 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -342,8 +342,10 @@ private:
struct ScrubState {
std::pair<std::string,std::string> last_key; ///< last scrubbed key
bool finished;
+ const utime_t start;
- ScrubState() : finished(false) { }
+ ScrubState() : finished(false),
+ start(ceph_clock_now()) { }
virtual ~ScrubState() { }
};
std::shared_ptr<ScrubState> scrub_state; ///< keeps track of current scrub
diff --git a/src/mon/NVMeofGwMap.cc b/src/mon/NVMeofGwMap.cc
index 719403925ad..2d2735f1e7c 100755
--- a/src/mon/NVMeofGwMap.cc
+++ b/src/mon/NVMeofGwMap.cc
@@ -171,6 +171,8 @@ int NVMeofGwMap::cfg_delete_gw(
<< state.availability << " Resulting GW availability: "
<< state.availability << dendl;
state.subsystems.clear();//ignore subsystems of this GW
+ utime_t now = ceph_clock_now();
+ mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now;
return 0;
}
}
@@ -895,10 +897,12 @@ struct CMonRequestProposal : public Context {
}
};
-void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
+void NVMeofGwMap::get_health_checks(health_check_map_t *checks)
{
list<string> singleGatewayDetail;
list<string> gatewayDownDetail;
+ list<string> gatewayInDeletingDetail;
+ int deleting_gateways = 0;
for (const auto& created_map_pair: created_gws) {
const auto& group_key = created_map_pair.first;
auto& group = group_key.second;
@@ -915,9 +919,37 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
ostringstream ss;
ss << "NVMeoF Gateway '" << gw_id << "' is unavailable." ;
gatewayDownDetail.push_back(ss.str());
+ } else if (gw_created.availability == gw_availability_t::GW_DELETING) {
+ deleting_gateways++;
+ utime_t now = ceph_clock_now();
+ bool found_deleting_time = false;
+ auto gws_deleting_time = mon->nvmegwmon()->gws_deleting_time;
+ auto group_it = gws_deleting_time.find(group_key);
+ if (group_it != gws_deleting_time.end()) {
+ auto& gw_map = group_it->second;
+ auto gw_it = gw_map.find(gw_id);
+ if (gw_it != gw_map.end()) {
+ found_deleting_time = true;
+ utime_t delete_time = gw_it->second;
+ if ((now - delete_time) > g_conf().get_val<std::chrono::seconds>("mon_nvmeofgw_delete_grace").count()) {
+ ostringstream ss;
+ ss << "NVMeoF Gateway '" << gw_id << "' is in deleting state.";
+ gatewayInDeletingDetail.push_back(ss.str());
+ }
+ }
+ }
+ if (!found_deleting_time) {
+ // DELETING gateway not found in gws_deleting_time, set timeout now
+ mon->nvmegwmon()->gws_deleting_time[group_key][gw_id] = now;
+ }
}
}
}
+ if (deleting_gateways == 0) {
+ // no gateway in GW_DELETING state currently, flush old gws_deleting_time
+ mon->nvmegwmon()->gws_deleting_time.clear();
+ }
+
if (!singleGatewayDetail.empty()) {
ostringstream ss;
ss << singleGatewayDetail.size() << " group(s) have only 1 nvmeof gateway"
@@ -934,6 +966,15 @@ void NVMeofGwMap::get_health_checks(health_check_map_t *checks) const
ss.str(), gatewayDownDetail.size());
d.detail.swap(gatewayDownDetail);
}
+ if (!gatewayInDeletingDetail.empty()) {
+ ostringstream ss;
+ ss << gatewayInDeletingDetail.size() << " gateway(s) are in deleting state"
+ << "; namespaces are automatically balanced across remaining gateways, "
+ << "this should take a few minutes.";
+ auto& d = checks->add("NVMEOF_GATEWAY_DELETING", HEALTH_WARN,
+ ss.str(), gatewayInDeletingDetail.size());
+ d.detail.swap(gatewayInDeletingDetail);
+ }
}
int NVMeofGwMap::blocklist_gw(
diff --git a/src/mon/NVMeofGwMap.h b/src/mon/NVMeofGwMap.h
index 5f657733012..85fd62b3a07 100755
--- a/src/mon/NVMeofGwMap.h
+++ b/src/mon/NVMeofGwMap.h
@@ -144,7 +144,7 @@ public:
DECODE_FINISH(bl);
}
- void get_health_checks(health_check_map_t *checks) const;
+ void get_health_checks(health_check_map_t *checks);
};
#include "NVMeofGwSerialize.h"
diff --git a/src/mon/NVMeofGwMon.cc b/src/mon/NVMeofGwMon.cc
index 0fe5c3e655f..c9a6b789b89 100644
--- a/src/mon/NVMeofGwMon.cc
+++ b/src/mon/NVMeofGwMon.cc
@@ -66,11 +66,6 @@ void NVMeofGwMon::on_shutdown()
void NVMeofGwMon::tick()
{
- if (++tick_ratio == 10) {
- global_rebalance_index++;
- dout(20) << "rebalance index " << global_rebalance_index << dendl;
- tick_ratio = 0;
- }
if (!is_active() || !mon.is_leader()) {
dout(10) << "NVMeofGwMon leader : " << mon.is_leader()
<< "active : " << is_active() << dendl;
@@ -329,8 +324,9 @@ bool NVMeofGwMon::preprocess_command(MonOpRequestRef op)
if (HAVE_FEATURE(mon.get_quorum_con_features(), NVMEOFHA)) {
f->dump_string("features", "LB");
if (map.created_gws[group_key].size()) {
- uint32_t index = (global_rebalance_index %
- map.created_gws[group_key].size()) + 1;
+ time_t seconds_since_1970 = time(NULL);
+ uint32_t index = ((seconds_since_1970/60) %
+ map.created_gws[group_key].size()) + 1;
f->dump_unsigned("rebalance_ana_group", index);
}
}
@@ -625,15 +621,15 @@ bool NVMeofGwMon::prepare_beacon(MonOpRequestRef op)
avail = gw_availability_t::GW_CREATED;
dout(20) << "No-subsystems condition detected for GW " << gw_id <<dendl;
} else {
- bool listener_found = true;
+ bool listener_found = false;
for (auto &subs: sub) {
- if (subs.listeners.size() == 0) {
- listener_found = false;
- dout(10) << "No-listeners condition detected for GW " << gw_id << " for nqn " << subs.nqn << dendl;
+ if (subs.listeners.size()) {
+ listener_found = true;
break;
}
}
if (!listener_found) {
+ dout(10) << "No-listeners condition detected for GW " << gw_id << dendl;
avail = gw_availability_t::GW_CREATED;
}
}// for HA no-subsystems and no-listeners are same usecases
diff --git a/src/mon/NVMeofGwMon.h b/src/mon/NVMeofGwMon.h
index 2d13e153bd2..d7f5fd89cde 100644
--- a/src/mon/NVMeofGwMon.h
+++ b/src/mon/NVMeofGwMon.h
@@ -82,10 +82,9 @@ public:
void check_subs(bool type);
void check_sub(Subscription *sub);
+ std::map<NvmeGroupKey, std::map<NvmeGwId, utime_t>> gws_deleting_time;
+
private:
- // used for calculate pool & group GW responsible for rebalance
- uint32_t global_rebalance_index = 1;
- uint8_t tick_ratio = 0;
void synchronize_last_beacon();
void process_gw_down(const NvmeGwId &gw_id,
const NvmeGroupKey& group_key, bool &propose_pending,