summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@redhat.com>2018-09-10 14:45:58 +0200
committerSage Weil <sage@redhat.com>2018-09-10 14:45:58 +0200
commit4d2a73c7f144fbd511fa3f72b2abd5237065cba8 (patch)
treef0f7374e55ea430c6d1cf9a43b66919bf9f3d7f0
parentMerge PR #23949 into master (diff)
parentosd/OSDMap: include age in up and in counts for ceph status (diff)
downloadceph-4d2a73c7f144fbd511fa3f72b2abd5237065cba8.tar.xz
ceph-4d2a73c7f144fbd511fa3f72b2abd5237065cba8.zip
Merge PR #23845 into master
* refs/pull/23845/head: osd/OSDMap: include age in up and in counts for ceph status mon/OSDMonitor: set new_last_{up,in}_change osd/OSDMap: store last_up_change and last_in_change mgr/MgrMap: include mgr age in map printer mon/MgrMap: track active_changed timestamp mon: include mon quorum age in status include/utime: add utimespan_str helper Reviewed-by: John Spray <john.spray@redhat.com>
-rwxr-xr-xqa/standalone/scrub/osd-scrub-repair.sh3
-rwxr-xr-xqa/workunits/cephtool/test.sh2
-rw-r--r--src/include/utime.h5
-rw-r--r--src/mon/MgrMap.h26
-rw-r--r--src/mon/MgrMonitor.cc3
-rw-r--r--src/mon/Monitor.cc16
-rw-r--r--src/mon/Monitor.h1
-rw-r--r--src/mon/OSDMonitor.cc17
-rw-r--r--src/osd/OSDMap.cc46
-rw-r--r--src/osd/OSDMap.h4
10 files changed, 109 insertions, 14 deletions
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh
index a2a1333eb48..ee1833c6fdf 100755
--- a/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/qa/standalone/scrub/osd-scrub-repair.sh
@@ -385,7 +385,8 @@ function unfound_erasure_coded() {
ceph -s | grep "1/1 objects unfound" && break
sleep 1
done
- ceph -s|grep "4 osds: 4 up, 4 in" || return 1
+ ceph -s|grep "4 up" || return 1
+ ceph -s|grep "4 in" || return 1
ceph -s|grep "1/1 objects unfound" || return 1
teardown $dir || return 1
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
index a132fd42af2..006effffec2 100755
--- a/qa/workunits/cephtool/test.sh
+++ b/qa/workunits/cephtool/test.sh
@@ -1658,7 +1658,7 @@ function test_mon_osd()
ceph osd perf
ceph osd blocked-by
- ceph osd stat | grep up,
+ ceph osd stat | grep up
}
function test_mon_crush()
diff --git a/src/include/utime.h b/src/include/utime.h
index 61fc73f61d1..08db335e246 100644
--- a/src/include/utime.h
+++ b/src/include/utime.h
@@ -533,4 +533,9 @@ inline std::ostream& operator<<(std::ostream& out, const utime_t& t)
return t.localtime(out);
}
+inline std::string utimespan_str(const utime_t& age) {
+ auto age_ts = ceph::timespan(age.nsec()) + std::chrono::seconds(age.sec());
+ return timespan_str(age_ts);
+}
+
#endif
diff --git a/src/mon/MgrMap.h b/src/mon/MgrMap.h
index 592f7e58c7f..bdfb27d37fc 100644
--- a/src/mon/MgrMap.h
+++ b/src/mon/MgrMap.h
@@ -137,6 +137,8 @@ public:
bool available = false;
/// the name (foo in mgr.<foo>) of the active daemon
std::string active_name;
+ /// when the active mgr became active, or we lost the active mgr
+ utime_t active_change;
std::map<uint64_t, StandbyInfo> standbys;
@@ -155,6 +157,7 @@ public:
uint64_t get_active_gid() const { return active_gid; }
bool get_available() const { return available; }
const std::string &get_active_name() const { return active_name; }
+ const utime_t& get_active_change() const { return active_change; }
bool all_support_module(const std::string& module) {
if (!have_module(module)) {
@@ -258,7 +261,7 @@ public:
ENCODE_FINISH(bl);
return;
}
- ENCODE_START(6, 6, bl);
+ ENCODE_START(7, 6, bl);
encode(epoch, bl);
encode(active_addrs, bl, features);
encode(active_gid, bl);
@@ -268,13 +271,14 @@ public:
encode(modules, bl);
encode(services, bl);
encode(available_modules, bl);
+ encode(active_change, bl);
ENCODE_FINISH(bl);
return;
}
void decode(bufferlist::const_iterator& p)
{
- DECODE_START(6, p);
+ DECODE_START(7, p);
decode(epoch, p);
decode(active_addrs, p);
decode(active_gid, p);
@@ -305,6 +309,11 @@ public:
if (struct_v >= 4) {
decode(available_modules, p);
}
+ if (struct_v >= 7) {
+ decode(active_change, p);
+ } else {
+ active_change = {};
+ }
DECODE_FINISH(p);
}
@@ -313,6 +322,7 @@ public:
f->dump_int("active_gid", get_active_gid());
f->dump_string("active_name", get_active_name());
f->dump_object("active_addrs", active_addrs);
+ f->dump_stream("active_change") << active_change;
f->dump_bool("available", available);
f->open_array_section("standbys");
for (const auto &i : standbys) {
@@ -356,16 +366,24 @@ public:
if (f) {
dump(f);
} else {
+ utime_t now = ceph_clock_now();
if (get_active_gid() != 0) {
*ss << get_active_name();
if (!available) {
// If the daemon hasn't gone active yet, indicate that.
- *ss << "(active, starting)";
+ *ss << "(active, starting";
} else {
- *ss << "(active)";
+ *ss << "(active";
}
+ if (active_change) {
+ *ss << ", since " << utimespan_str(now - active_change);
+ }
+ *ss << ")";
} else {
*ss << "no daemons active";
+ if (active_change) {
+ *ss << " (since " << utimespan_str(now - active_change) << ")";
+ }
}
if (standbys.size()) {
*ss << ", standbys: ";
diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc
index 3b4d0c27941..c02088d4cc2 100644
--- a/src/mon/MgrMonitor.cc
+++ b/src/mon/MgrMonitor.cc
@@ -409,6 +409,7 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op)
<< pending_map.active_name << ")" << dendl;
pending_map.active_gid = m->get_gid();
pending_map.active_name = m->get_name();
+ pending_map.active_change = ceph_clock_now();
pending_map.available_modules = m->get_available_modules();
encode(m->get_metadata(), pending_metadata[m->get_name()]);
pending_metadata_rm.erase(m->get_name());
@@ -666,6 +667,7 @@ bool MgrMonitor::promote_standby()
pending_map.active_name = pending_map.standbys.at(replacement_gid).name;
pending_map.available = false;
pending_map.active_addrs = entity_addrvec_t();
+ pending_map.active_change = ceph_clock_now();
drop_standby(replacement_gid, false);
@@ -685,6 +687,7 @@ void MgrMonitor::drop_active()
pending_metadata.erase(pending_map.active_name);
pending_map.active_name = "";
pending_map.active_gid = 0;
+ pending_map.active_change = ceph_clock_now();
pending_map.available = false;
pending_map.active_addrs = entity_addrvec_t();
pending_map.services.clear();
diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc
index b575b1b0b1c..98c80ed4b31 100644
--- a/src/mon/Monitor.cc
+++ b/src/mon/Monitor.cc
@@ -1107,6 +1107,7 @@ void Monitor::_reset()
scrub_event_cancel();
leader_since = utime_t();
+ quorum_since = {};
if (!quorum.empty()) {
exited_quorum = ceph_clock_now();
}
@@ -1972,6 +1973,7 @@ void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features,
ceph_assert(is_electing());
state = STATE_LEADER;
leader_since = ceph_clock_now();
+ quorum_since = mono_clock::now();
leader = rank;
quorum = active;
quorum_con_features = features;
@@ -2050,6 +2052,7 @@ void Monitor::lose_election(epoch_t epoch, set<int> &q, int l,
{
state = STATE_PEON;
leader_since = utime_t();
+ quorum_since = mono_clock::now();
leader = l;
quorum = q;
outside_quorum.clear();
@@ -2293,6 +2296,10 @@ void Monitor::_quorum_status(Formatter *f, ostream& ss)
f->dump_string("quorum_leader_name", quorum.empty() ? string() : monmap->get_name(*quorum.begin()));
+ if (!quorum.empty()) {
+ f->dump_stream("quorum_age") << (mono_clock::now() - quorum_since);
+ }
+
f->open_object_section("monmap");
monmap->dump(f);
f->close_section(); // monmap
@@ -2323,9 +2330,12 @@ void Monitor::get_mon_status(Formatter *f, ostream& ss)
for (set<int>::iterator p = quorum.begin(); p != quorum.end(); ++p) {
f->dump_int("mon", *p);
}
-
f->close_section(); // quorum
+ if (!quorum.empty()) {
+ f->dump_stream("quorum_age") << (mono_clock::now() - quorum_since);
+ }
+
f->open_object_section("features");
f->dump_stream("required_con") << required_features;
mon_feature_t req_mon_features = get_required_mon_features();
@@ -2729,6 +2739,7 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f)
if (f)
f->open_object_section("status");
+ mono_clock::time_point now = mono_clock::now();
if (f) {
f->dump_stream("fsid") << monmap->get_fsid();
get_health_status(false, f, nullptr);
@@ -2742,6 +2753,7 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f)
for (set<int>::iterator p = quorum.begin(); p != quorum.end(); ++p)
f->dump_string("id", monmap->get_name(*p));
f->close_section();
+ f->dump_stream("quorum_age") << (now - quorum_since);
}
f->open_object_section("monmap");
monmap->dump(f);
@@ -2781,7 +2793,7 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f)
const auto quorum_names = get_quorum_names();
const auto mon_count = monmap->mon_info.size();
ss << " mon: " << spacing << mon_count << " daemons, quorum "
- << quorum_names;
+ << quorum_names << " (age " << timespan_str(now - quorum_since) << ")";
if (quorum_names.size() != mon_count) {
std::list<std::string> out_of_q;
for (size_t i = 0; i < monmap->ranks.size(); ++i) {
diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h
index 6f8e3ca451b..2dffb7d002b 100644
--- a/src/mon/Monitor.h
+++ b/src/mon/Monitor.h
@@ -230,6 +230,7 @@ private:
int leader; // current leader (to best of knowledge)
set<int> quorum; // current active set of monitors (if !starting)
+ mono_clock::time_point quorum_since; // when quorum formed
utime_t leader_since; // when this monitor became the leader, if it is the leader
utime_t exited_quorum; // time detected as not in quorum; 0 if in
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index dc7aefd493e..30ba63ae35f 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1056,9 +1056,26 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t)
dout(10) << "new_state for osd." << p->first << " is 0, removing" << dendl;
p = pending_inc.new_state.erase(p);
} else {
+ if (p->second & CEPH_OSD_UP) {
+ pending_inc.new_last_up_change = pending_inc.modified;
+ }
++p;
}
}
+ if (!pending_inc.new_up_client.empty()) {
+ pending_inc.new_last_up_change = pending_inc.modified;
+ }
+ for (auto& i : pending_inc.new_weight) {
+ if (i.first > osdmap.max_osd) {
+ if (i.second) {
+ // new osd is already marked in
+ pending_inc.new_last_in_change = pending_inc.modified;
+ }
+ } else if (!!i.second != !!osdmap.osd_weight[i.first]) {
+ // existing osd marked in or out
+ pending_inc.new_last_in_change = pending_inc.modified;
+ }
+ }
{
OSDMap tmp;
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 53faeafece9..a8ad62c8ab0 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -514,7 +514,7 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const
ENCODE_START(8, 7, bl);
{
- uint8_t v = 7;
+ uint8_t v = 8;
if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
v = 3;
} else if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
@@ -566,6 +566,10 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const
encode(new_removed_snaps, bl);
encode(new_purged_snaps, bl);
}
+ if (v >= 8) {
+ encode(new_last_up_change, bl);
+ encode(new_last_in_change, bl);
+ }
ENCODE_FINISH(bl); // client-usable data
}
@@ -763,7 +767,7 @@ void OSDMap::Incremental::decode(bufferlist::const_iterator& bl)
return;
}
{
- DECODE_START(7, bl); // client-usable data
+ DECODE_START(8, bl); // client-usable data
decode(fsid, bl);
decode(epoch, bl);
decode(modified, bl);
@@ -810,6 +814,10 @@ void OSDMap::Incremental::decode(bufferlist::const_iterator& bl)
decode(new_removed_snaps, bl);
decode(new_purged_snaps, bl);
}
+ if (struct_v >= 8) {
+ decode(new_last_up_change, bl);
+ decode(new_last_in_change, bl);
+ }
DECODE_FINISH(bl); // client-usable data
}
@@ -904,6 +912,8 @@ void OSDMap::Incremental::dump(Formatter *f) const
f->dump_int("epoch", epoch);
f->dump_stream("fsid") << fsid;
f->dump_stream("modified") << modified;
+ f->dump_stream("new_last_up_change") << new_last_up_change;
+ f->dump_stream("new_last_in_change") << new_last_in_change;
f->dump_int("new_pool_max", new_pool_max);
f->dump_int("new_flags", new_flags);
f->dump_float("new_full_ratio", new_full_ratio);
@@ -1917,6 +1927,13 @@ int OSDMap::apply_incremental(const Incremental &inc)
}
}
+ if (inc.new_last_up_change != utime_t()) {
+ last_up_change = inc.new_last_up_change;
+ }
+ if (inc.new_last_in_change != utime_t()) {
+ last_in_change = inc.new_last_in_change;
+ }
+
for (const auto &pname : inc.new_pool_names) {
auto pool_name_entry = pool_name.find(pname.first);
if (pool_name_entry != pool_name.end()) {
@@ -2647,7 +2664,7 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
{
// NOTE: any new encoding dependencies must be reflected by
// SIGNIFICANT_FEATURES
- uint8_t v = 8;
+ uint8_t v = 9;
if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) {
v = 3;
} else if (!HAVE_FEATURE(features, SERVER_MIMIC)) {
@@ -2725,6 +2742,10 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const
encode(new_removed_snaps, bl);
encode(new_purged_snaps, bl);
}
+ if (v >= 9) {
+ encode(last_up_change, bl);
+ encode(last_in_change, bl);
+ }
ENCODE_FINISH(bl); // client-usable data
}
@@ -2958,7 +2979,7 @@ void OSDMap::decode(bufferlist::const_iterator& bl)
* Since we made it past that hurdle, we can use our normal paths.
*/
{
- DECODE_START(8, bl); // client-usable data
+ DECODE_START(9, bl); // client-usable data
// base
decode(fsid, bl);
decode(epoch, bl);
@@ -3020,6 +3041,10 @@ void OSDMap::decode(bufferlist::const_iterator& bl)
decode(new_removed_snaps, bl);
decode(new_purged_snaps, bl);
}
+ if (struct_v >= 9) {
+ decode(last_up_change, bl);
+ decode(last_in_change, bl);
+ }
DECODE_FINISH(bl); // client-usable data
}
@@ -3143,6 +3168,8 @@ void OSDMap::dump(Formatter *f) const
f->dump_stream("fsid") << get_fsid();
f->dump_stream("created") << get_created();
f->dump_stream("modified") << get_modified();
+ f->dump_stream("last_up_change") << last_up_change;
+ f->dump_stream("last_in_change") << last_in_change;
f->dump_string("flags", get_flag_string());
f->dump_unsigned("flags_num", flags);
f->open_array_section("flags_set");
@@ -3669,9 +3696,16 @@ void OSDMap::print_summary(Formatter *f, ostream& out,
f->dump_unsigned("num_remapped_pgs", get_num_pg_temp());
f->close_section();
} else {
+ utime_t now = ceph_clock_now();
out << get_num_osds() << " osds: "
- << get_num_up_osds() << " up, "
- << get_num_in_osds() << " in";
+ << get_num_up_osds() << " up";
+ if (last_up_change != utime_t()) {
+ out << " (since " << utimespan_str(now - last_up_change) << ")";
+ }
+ out << ", " << get_num_in_osds() << " in";
+ if (last_in_change != utime_t()) {
+ out << " (since " << utimespan_str(now - last_in_change) << ")";
+ }
if (extra)
out << "; epoch: e" << get_epoch();
if (get_num_pg_temp())
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index f1ef8f3189c..efae118fd7e 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -407,6 +407,8 @@ public:
int8_t new_require_min_compat_client = -1;
+ utime_t new_last_up_change, new_last_in_change;
+
mutable bool have_crc; ///< crc values are defined
uint32_t full_crc; ///< crc of the resulting OSDMap
mutable uint32_t inc_crc; ///< crc of this incremental
@@ -506,6 +508,8 @@ private:
int32_t max_osd;
vector<uint32_t> osd_state;
+ utime_t last_up_change, last_in_change;
+
// These features affect OSDMap[::Incremental] encoding, or the
// encoding of some type embedded therein (CrushWrapper, something
// from osd_types, etc.).