diff options
author | Sage Weil <sage@redhat.com> | 2018-09-10 14:45:58 +0200 |
---|---|---|
committer | Sage Weil <sage@redhat.com> | 2018-09-10 14:45:58 +0200 |
commit | 4d2a73c7f144fbd511fa3f72b2abd5237065cba8 (patch) | |
tree | f0f7374e55ea430c6d1cf9a43b66919bf9f3d7f0 | |
parent | Merge PR #23949 into master (diff) | |
parent | osd/OSDMap: include age in up and in counts for ceph status (diff) | |
download | ceph-4d2a73c7f144fbd511fa3f72b2abd5237065cba8.tar.xz ceph-4d2a73c7f144fbd511fa3f72b2abd5237065cba8.zip |
Merge PR #23845 into master
* refs/pull/23845/head:
osd/OSDMap: include age in up and in counts for ceph status
mon/OSDMonitor: set new_last_{up,in}_change
osd/OSDMap: store last_up_change and last_in_change
mgr/MgrMap: include mgr age in map printer
mon/MgrMap: track active_changed timestamp
mon: include mon quorum age in status
include/utime: add utimespan_str helper
Reviewed-by: John Spray <john.spray@redhat.com>
-rwxr-xr-x | qa/standalone/scrub/osd-scrub-repair.sh | 3 | ||||
-rwxr-xr-x | qa/workunits/cephtool/test.sh | 2 | ||||
-rw-r--r-- | src/include/utime.h | 5 | ||||
-rw-r--r-- | src/mon/MgrMap.h | 26 | ||||
-rw-r--r-- | src/mon/MgrMonitor.cc | 3 | ||||
-rw-r--r-- | src/mon/Monitor.cc | 16 | ||||
-rw-r--r-- | src/mon/Monitor.h | 1 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 17 | ||||
-rw-r--r-- | src/osd/OSDMap.cc | 46 | ||||
-rw-r--r-- | src/osd/OSDMap.h | 4 |
10 files changed, 109 insertions, 14 deletions
diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index a2a1333eb48..ee1833c6fdf 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -385,7 +385,8 @@ function unfound_erasure_coded() { ceph -s | grep "1/1 objects unfound" && break sleep 1 done - ceph -s|grep "4 osds: 4 up, 4 in" || return 1 + ceph -s|grep "4 up" || return 1 + ceph -s|grep "4 in" || return 1 ceph -s|grep "1/1 objects unfound" || return 1 teardown $dir || return 1 diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index a132fd42af2..006effffec2 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -1658,7 +1658,7 @@ function test_mon_osd() ceph osd perf ceph osd blocked-by - ceph osd stat | grep up, + ceph osd stat | grep up } function test_mon_crush() diff --git a/src/include/utime.h b/src/include/utime.h index 61fc73f61d1..08db335e246 100644 --- a/src/include/utime.h +++ b/src/include/utime.h @@ -533,4 +533,9 @@ inline std::ostream& operator<<(std::ostream& out, const utime_t& t) return t.localtime(out); } +inline std::string utimespan_str(const utime_t& age) { + auto age_ts = ceph::timespan(age.nsec()) + std::chrono::seconds(age.sec()); + return timespan_str(age_ts); +} + #endif diff --git a/src/mon/MgrMap.h b/src/mon/MgrMap.h index 592f7e58c7f..bdfb27d37fc 100644 --- a/src/mon/MgrMap.h +++ b/src/mon/MgrMap.h @@ -137,6 +137,8 @@ public: bool available = false; /// the name (foo in mgr.<foo>) of the active daemon std::string active_name; + /// when the active mgr became active, or we lost the active mgr + utime_t active_change; std::map<uint64_t, StandbyInfo> standbys; @@ -155,6 +157,7 @@ public: uint64_t get_active_gid() const { return active_gid; } bool get_available() const { return available; } const std::string &get_active_name() const { return active_name; } + const utime_t& get_active_change() const { return active_change; } bool all_support_module(const std::string& module) { if (!have_module(module)) { @@ -258,7 +261,7 @@ public: ENCODE_FINISH(bl); return; } - ENCODE_START(6, 6, bl); + ENCODE_START(7, 6, bl); encode(epoch, bl); encode(active_addrs, bl, features); encode(active_gid, bl); @@ -268,13 +271,14 @@ public: encode(modules, bl); encode(services, bl); encode(available_modules, bl); + encode(active_change, bl); ENCODE_FINISH(bl); return; } void decode(bufferlist::const_iterator& p) { - DECODE_START(6, p); + DECODE_START(7, p); decode(epoch, p); decode(active_addrs, p); decode(active_gid, p); @@ -305,6 +309,11 @@ public: if (struct_v >= 4) { decode(available_modules, p); } + if (struct_v >= 7) { + decode(active_change, p); + } else { + active_change = {}; + } DECODE_FINISH(p); } @@ -313,6 +322,7 @@ public: f->dump_int("active_gid", get_active_gid()); f->dump_string("active_name", get_active_name()); f->dump_object("active_addrs", active_addrs); + f->dump_stream("active_change") << active_change; f->dump_bool("available", available); f->open_array_section("standbys"); for (const auto &i : standbys) { @@ -356,16 +366,24 @@ public: if (f) { dump(f); } else { + utime_t now = ceph_clock_now(); if (get_active_gid() != 0) { *ss << get_active_name(); if (!available) { // If the daemon hasn't gone active yet, indicate that. - *ss << "(active, starting)"; + *ss << "(active, starting"; } else { - *ss << "(active)"; + *ss << "(active"; } + if (active_change) { + *ss << ", since " << utimespan_str(now - active_change); + } + *ss << ")"; } else { *ss << "no daemons active"; + if (active_change) { + *ss << " (since " << utimespan_str(now - active_change) << ")"; + } } if (standbys.size()) { *ss << ", standbys: "; diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc index 3b4d0c27941..c02088d4cc2 100644 --- a/src/mon/MgrMonitor.cc +++ b/src/mon/MgrMonitor.cc @@ -409,6 +409,7 @@ bool MgrMonitor::prepare_beacon(MonOpRequestRef op) << pending_map.active_name << ")" << dendl; pending_map.active_gid = m->get_gid(); pending_map.active_name = m->get_name(); + pending_map.active_change = ceph_clock_now(); pending_map.available_modules = m->get_available_modules(); encode(m->get_metadata(), pending_metadata[m->get_name()]); pending_metadata_rm.erase(m->get_name()); @@ -666,6 +667,7 @@ bool MgrMonitor::promote_standby() pending_map.active_name = pending_map.standbys.at(replacement_gid).name; pending_map.available = false; pending_map.active_addrs = entity_addrvec_t(); + pending_map.active_change = ceph_clock_now(); drop_standby(replacement_gid, false); @@ -685,6 +687,7 @@ void MgrMonitor::drop_active() pending_metadata.erase(pending_map.active_name); pending_map.active_name = ""; pending_map.active_gid = 0; + pending_map.active_change = ceph_clock_now(); pending_map.available = false; pending_map.active_addrs = entity_addrvec_t(); pending_map.services.clear(); diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index b575b1b0b1c..98c80ed4b31 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -1107,6 +1107,7 @@ void Monitor::_reset() scrub_event_cancel(); leader_since = utime_t(); + quorum_since = {}; if (!quorum.empty()) { exited_quorum = ceph_clock_now(); } @@ -1972,6 +1973,7 @@ void Monitor::win_election(epoch_t epoch, set<int>& active, uint64_t features, ceph_assert(is_electing()); state = STATE_LEADER; leader_since = ceph_clock_now(); + quorum_since = mono_clock::now(); leader = rank; quorum = active; quorum_con_features = features; @@ -2050,6 +2052,7 @@ void Monitor::lose_election(epoch_t epoch, set<int> &q, int l, { state = STATE_PEON; leader_since = utime_t(); + quorum_since = mono_clock::now(); leader = l; quorum = q; outside_quorum.clear(); @@ -2293,6 +2296,10 @@ void Monitor::_quorum_status(Formatter *f, ostream& ss) f->dump_string("quorum_leader_name", quorum.empty() ? string() : monmap->get_name(*quorum.begin())); + if (!quorum.empty()) { + f->dump_stream("quorum_age") << (mono_clock::now() - quorum_since); + } + f->open_object_section("monmap"); monmap->dump(f); f->close_section(); // monmap @@ -2323,9 +2330,12 @@ void Monitor::get_mon_status(Formatter *f, ostream& ss) for (set<int>::iterator p = quorum.begin(); p != quorum.end(); ++p) { f->dump_int("mon", *p); } - f->close_section(); // quorum + if (!quorum.empty()) { + f->dump_stream("quorum_age") << (mono_clock::now() - quorum_since); + } + f->open_object_section("features"); f->dump_stream("required_con") << required_features; mon_feature_t req_mon_features = get_required_mon_features(); @@ -2729,6 +2739,7 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) if (f) f->open_object_section("status"); + mono_clock::time_point now = mono_clock::now(); if (f) { f->dump_stream("fsid") << monmap->get_fsid(); get_health_status(false, f, nullptr); @@ -2742,6 +2753,7 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) for (set<int>::iterator p = quorum.begin(); p != quorum.end(); ++p) f->dump_string("id", monmap->get_name(*p)); f->close_section(); + f->dump_stream("quorum_age") << (now - quorum_since); } f->open_object_section("monmap"); monmap->dump(f); @@ -2781,7 +2793,7 @@ void Monitor::get_cluster_status(stringstream &ss, Formatter *f) const auto quorum_names = get_quorum_names(); const auto mon_count = monmap->mon_info.size(); ss << " mon: " << spacing << mon_count << " daemons, quorum " - << quorum_names; + << quorum_names << " (age " << timespan_str(now - quorum_since) << ")"; if (quorum_names.size() != mon_count) { std::list<std::string> out_of_q; for (size_t i = 0; i < monmap->ranks.size(); ++i) { diff --git a/src/mon/Monitor.h b/src/mon/Monitor.h index 6f8e3ca451b..2dffb7d002b 100644 --- a/src/mon/Monitor.h +++ b/src/mon/Monitor.h @@ -230,6 +230,7 @@ private: int leader; // current leader (to best of knowledge) set<int> quorum; // current active set of monitors (if !starting) + mono_clock::time_point quorum_since; // when quorum formed utime_t leader_since; // when this monitor became the leader, if it is the leader utime_t exited_quorum; // time detected as not in quorum; 0 if in diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index dc7aefd493e..30ba63ae35f 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1056,9 +1056,26 @@ void OSDMonitor::encode_pending(MonitorDBStore::TransactionRef t) dout(10) << "new_state for osd." << p->first << " is 0, removing" << dendl; p = pending_inc.new_state.erase(p); } else { + if (p->second & CEPH_OSD_UP) { + pending_inc.new_last_up_change = pending_inc.modified; + } ++p; } } + if (!pending_inc.new_up_client.empty()) { + pending_inc.new_last_up_change = pending_inc.modified; + } + for (auto& i : pending_inc.new_weight) { + if (i.first > osdmap.max_osd) { + if (i.second) { + // new osd is already marked in + pending_inc.new_last_in_change = pending_inc.modified; + } + } else if (!!i.second != !!osdmap.osd_weight[i.first]) { + // existing osd marked in or out + pending_inc.new_last_in_change = pending_inc.modified; + } + } { OSDMap tmp; diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 53faeafece9..a8ad62c8ab0 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -514,7 +514,7 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const ENCODE_START(8, 7, bl); { - uint8_t v = 7; + uint8_t v = 8; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { v = 3; } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) { @@ -566,6 +566,10 @@ void OSDMap::Incremental::encode(bufferlist& bl, uint64_t features) const encode(new_removed_snaps, bl); encode(new_purged_snaps, bl); } + if (v >= 8) { + encode(new_last_up_change, bl); + encode(new_last_in_change, bl); + } ENCODE_FINISH(bl); // client-usable data } @@ -763,7 +767,7 @@ void OSDMap::Incremental::decode(bufferlist::const_iterator& bl) return; } { - DECODE_START(7, bl); // client-usable data + DECODE_START(8, bl); // client-usable data decode(fsid, bl); decode(epoch, bl); decode(modified, bl); @@ -810,6 +814,10 @@ void OSDMap::Incremental::decode(bufferlist::const_iterator& bl) decode(new_removed_snaps, bl); decode(new_purged_snaps, bl); } + if (struct_v >= 8) { + decode(new_last_up_change, bl); + decode(new_last_in_change, bl); + } DECODE_FINISH(bl); // client-usable data } @@ -904,6 +912,8 @@ void OSDMap::Incremental::dump(Formatter *f) const f->dump_int("epoch", epoch); f->dump_stream("fsid") << fsid; f->dump_stream("modified") << modified; + f->dump_stream("new_last_up_change") << new_last_up_change; + f->dump_stream("new_last_in_change") << new_last_in_change; f->dump_int("new_pool_max", new_pool_max); f->dump_int("new_flags", new_flags); f->dump_float("new_full_ratio", new_full_ratio); @@ -1917,6 +1927,13 @@ int OSDMap::apply_incremental(const Incremental &inc) } } + if (inc.new_last_up_change != utime_t()) { + last_up_change = inc.new_last_up_change; + } + if (inc.new_last_in_change != utime_t()) { + last_in_change = inc.new_last_in_change; + } + for (const auto &pname : inc.new_pool_names) { auto pool_name_entry = pool_name.find(pname.first); if (pool_name_entry != pool_name.end()) { @@ -2647,7 +2664,7 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const { // NOTE: any new encoding dependencies must be reflected by // SIGNIFICANT_FEATURES - uint8_t v = 8; + uint8_t v = 9; if (!HAVE_FEATURE(features, SERVER_LUMINOUS)) { v = 3; } else if (!HAVE_FEATURE(features, SERVER_MIMIC)) { @@ -2725,6 +2742,10 @@ void OSDMap::encode(bufferlist& bl, uint64_t features) const encode(new_removed_snaps, bl); encode(new_purged_snaps, bl); } + if (v >= 9) { + encode(last_up_change, bl); + encode(last_in_change, bl); + } ENCODE_FINISH(bl); // client-usable data } @@ -2958,7 +2979,7 @@ void OSDMap::decode(bufferlist::const_iterator& bl) * Since we made it past that hurdle, we can use our normal paths. */ { - DECODE_START(8, bl); // client-usable data + DECODE_START(9, bl); // client-usable data // base decode(fsid, bl); decode(epoch, bl); @@ -3020,6 +3041,10 @@ void OSDMap::decode(bufferlist::const_iterator& bl) decode(new_removed_snaps, bl); decode(new_purged_snaps, bl); } + if (struct_v >= 9) { + decode(last_up_change, bl); + decode(last_in_change, bl); + } DECODE_FINISH(bl); // client-usable data } @@ -3143,6 +3168,8 @@ void OSDMap::dump(Formatter *f) const f->dump_stream("fsid") << get_fsid(); f->dump_stream("created") << get_created(); f->dump_stream("modified") << get_modified(); + f->dump_stream("last_up_change") << last_up_change; + f->dump_stream("last_in_change") << last_in_change; f->dump_string("flags", get_flag_string()); f->dump_unsigned("flags_num", flags); f->open_array_section("flags_set"); @@ -3669,9 +3696,16 @@ void OSDMap::print_summary(Formatter *f, ostream& out, f->dump_unsigned("num_remapped_pgs", get_num_pg_temp()); f->close_section(); } else { + utime_t now = ceph_clock_now(); out << get_num_osds() << " osds: " - << get_num_up_osds() << " up, " - << get_num_in_osds() << " in"; + << get_num_up_osds() << " up"; + if (last_up_change != utime_t()) { + out << " (since " << utimespan_str(now - last_up_change) << ")"; + } + out << ", " << get_num_in_osds() << " in"; + if (last_in_change != utime_t()) { + out << " (since " << utimespan_str(now - last_in_change) << ")"; + } if (extra) out << "; epoch: e" << get_epoch(); if (get_num_pg_temp()) diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index f1ef8f3189c..efae118fd7e 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -407,6 +407,8 @@ public: int8_t new_require_min_compat_client = -1; + utime_t new_last_up_change, new_last_in_change; + mutable bool have_crc; ///< crc values are defined uint32_t full_crc; ///< crc of the resulting OSDMap mutable uint32_t inc_crc; ///< crc of this incremental @@ -506,6 +508,8 @@ private: int32_t max_osd; vector<uint32_t> osd_state; + utime_t last_up_change, last_in_change; + // These features affect OSDMap[::Incremental] encoding, or the // encoding of some type embedded therein (CrushWrapper, something // from osd_types, etc.). |