diff options
Diffstat (limited to 'src/mon/MDSMonitor.cc')
-rw-r--r-- | src/mon/MDSMonitor.cc | 143 |
1 files changed, 84 insertions, 59 deletions
diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index 9ea4288c195..637ac8d41b3 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -211,16 +211,16 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m) // legal state change? if ((info.state == MDSMap::STATE_STANDBY || info.state == MDSMap::STATE_STANDBY_REPLAY) && state > 0) { - dout(10) << "mds_beacon mds can't activate itself (" << MDSMap::get_state_name(info.state) - << " -> " << MDSMap::get_state_name(state) << ")" << dendl; + dout(10) << "mds_beacon mds can't activate itself (" << ceph_mds_state_name(info.state) + << " -> " << ceph_mds_state_name(state) << ")" << dendl; goto ignore; } if (info.state == MDSMap::STATE_STANDBY && state == MDSMap::STATE_STANDBY_REPLAY && (pending_mdsmap.is_degraded() || - pending_mdsmap.get_state(info.mds) < MDSMap::STATE_ACTIVE)) { - dout(10) << "mds_beacon can't standby-replay mds" << info.mds << " at this time (cluster degraded, or mds not active)" << dendl; + pending_mdsmap.get_state(info.rank) < MDSMap::STATE_ACTIVE)) { + dout(10) << "mds_beacon can't standby-replay mds" << info.rank << " at this time (cluster degraded, or mds not active)" << dendl; goto ignore; } @@ -232,8 +232,8 @@ bool MDSMonitor::preprocess_beacon(MMDSBeacon *m) // note time and reply dout(15) << "mds_beacon " << *m << " noting time and replying" << dendl; last_beacon[addr] = g_clock.now(); - mon->messenger->send_message(new MMDSBeacon(mon->monmap->fsid, - mdsmap.get_epoch(), state, seq, 0), + mon->messenger->send_message(new MMDSBeacon(mon->monmap->fsid, m->get_name(), + mdsmap.get_epoch(), state, seq), m->get_orig_source_inst()); // done @@ -275,22 +275,14 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) // boot? if (state == MDSMap::STATE_BOOT) { - int from = m->get_orig_source_inst().name.num(); - - // standby for a given rank? - int standby_for = m->get_want_rank(); - if (standby_for >= (int)pending_mdsmap.max_mds) { - dout(10) << "prepare_beacon boot: wanted standby for mds" << from - << " >= max_mds " << pending_mdsmap.max_mds - << ", will be shared standby" << dendl; - standby_for = -1; - } - // add MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr]; - info.mds = standby_for; + info.name = m->get_name(); + info.rank = -1; info.addr = addr; info.state = MDSMap::STATE_STANDBY; + info.standby_for_rank = m->get_standby_for_rank(); + info.standby_for_name = m->get_standby_for_name(); // initialize the beacon timer last_beacon[addr] = g_clock.now(); @@ -304,15 +296,15 @@ bool MDSMonitor::prepare_beacon(MMDSBeacon *m) info.clear_laggy(); } - dout(10) << "prepare_beacon mds" << info.mds - << " " << MDSMap::get_state_name(info.state) - << " -> " << MDSMap::get_state_name(state) + dout(10) << "prepare_beacon mds" << info.rank + << " " << ceph_mds_state_name(info.state) + << " -> " << ceph_mds_state_name(state) << dendl; if (state == MDSMap::STATE_STOPPED) { - pending_mdsmap.up.erase(info.mds); + pending_mdsmap.up.erase(info.rank); pending_mdsmap.mds_info.erase(addr); - pending_mdsmap.stopped.insert(info.mds); - pending_mdsmap.in.erase(info.mds); + pending_mdsmap.stopped.insert(info.rank); + pending_mdsmap.in.erase(info.rank); } else { info.state = state; info.state_seq = seq; @@ -370,15 +362,46 @@ bool MDSMonitor::preprocess_command(MMonCommand *m) r = 0; } else if (m->cmd[1] == "dump") { - stringstream ds; - mdsmap.print(ds); - rdata.append(ds); - ss << "dumped mdsmap epoch " << mdsmap.get_epoch(); - r = 0; + MDSMap *p = &mdsmap; + if (m->cmd.size() > 2) { + epoch_t e = atoi(m->cmd[2].c_str()); + bufferlist b; + mon->store->get_bl_sn(b,"mdsmap",e); + if (!b.length()) { + p = 0; + r = -ENOENT; + } else { + p = new MDSMap; + p->decode(b); + } + } + if (p) { + stringstream ds; + p->print(ds); + rdata.append(ds); + ss << "dumped mdsmap epoch " << p->get_epoch(); + if (p != &mdsmap) + delete p; + r = 0; + } } else if (m->cmd[1] == "getmap") { - mdsmap.encode(rdata); - ss << "got mdsmap epoch " << mdsmap.get_epoch(); + if (m->cmd.size() > 2) { + epoch_t e = atoi(m->cmd[2].c_str()); + bufferlist b; + mon->store->get_bl_sn(b,"mdsmap",e); + if (!b.length()) { + r = -ENOENT; + } else { + MDSMap m; + m.decode(b); + m.encode(rdata); + ss << "got mdsmap epoch " << m.get_epoch(); + } + } else { + mdsmap.encode(rdata); + ss << "got mdsmap epoch " << mdsmap.get_epoch(); + } r = 0; } else if (m->cmd[1] == "injectargs" && m->cmd.size() == 4) { @@ -427,7 +450,7 @@ bool MDSMonitor::prepare_command(MMonCommand *m) } else { r = -EEXIST; ss << "mds" << who << " not active (" - << mdsmap.get_state_name(mdsmap.get_state(who)) << ")"; + << ceph_mds_state_name(mdsmap.get_state(who)) << ")"; } } else if (m->cmd[1] == "set_max_mds" && m->cmd.size() > 2) { @@ -505,16 +528,17 @@ void MDSMonitor::tick() while (pending_mdsmap.get_num_mds() < pending_mdsmap.get_max_mds() && !pending_mdsmap.is_degraded()) { int mds = 0; + string name; while (pending_mdsmap.is_in(mds)) mds++; entity_addr_t addr; - if (!pending_mdsmap.find_standby_for(mds, addr)) + if (!pending_mdsmap.find_standby_for(mds, name, addr)) break; dout(1) << "adding standby " << addr << " as mds" << mds << dendl; MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr]; - info.mds = mds; + info.rank = mds; if (pending_mdsmap.stopped.count(mds)) info.state = MDSMap::STATE_STARTING; else @@ -552,20 +576,20 @@ void MDSMonitor::tick() MDSMap::mds_info_t& info = pending_mdsmap.mds_info[addr]; - dout(10) << "no beacon from " << addr << " mds" << info.mds << "." << info.inc - << " " << MDSMap::get_state_name(info.state) + dout(10) << "no beacon from " << addr << " mds" << info.rank << "." << info.inc + << " " << ceph_mds_state_name(info.state) << " since " << since << dendl; // are we in? // and is there a non-laggy standby that can take over for us? entity_addr_t sa; - if (info.mds >= 0 && + if (info.rank >= 0 && info.state > 0 && //|| info.state == MDSMap::STATE_STANDBY_REPLAY) && - pending_mdsmap.find_standby_for(info.mds, sa)) { - dout(10) << " replacing " << addr << " mds" << info.mds << "." << info.inc - << " " << MDSMap::get_state_name(info.state) - << " with " << sa << dendl; + pending_mdsmap.find_standby_for(info.rank, info.name, sa)) { MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa]; + dout(10) << " replacing " << addr << " mds" << info.rank << "." << info.inc + << " " << ceph_mds_state_name(info.state) + << " with " << si.name << " " << sa << dendl; switch (info.state) { case MDSMap::STATE_CREATING: case MDSMap::STATE_STARTING: @@ -583,10 +607,10 @@ void MDSMonitor::tick() default: assert(0); } - si.mds = info.mds; + si.rank = info.rank; if (si.state > 0) { - si.inc = ++pending_mdsmap.inc[info.mds]; - pending_mdsmap.up[info.mds] = sa; + si.inc = ++pending_mdsmap.inc[info.rank]; + pending_mdsmap.up[info.rank] = sa; pending_mdsmap.last_failure = pending_mdsmap.epoch; } pending_mdsmap.mds_info.erase(addr); @@ -601,15 +625,15 @@ void MDSMonitor::tick() do_propose = true; } else if (info.state == MDSMap::STATE_STANDBY_REPLAY) { - dout(10) << " failing " << addr << " mds" << info.mds << "." << info.inc - << " " << MDSMap::get_state_name(info.state) + dout(10) << " failing " << addr << " mds" << info.rank << "." << info.inc + << " " << ceph_mds_state_name(info.state) << dendl; pending_mdsmap.mds_info.erase(addr); do_propose = true; } else if (!info.laggy()) { // just mark laggy - dout(10) << " marking " << addr << " mds" << info.mds << "." << info.inc - << " " << MDSMap::get_state_name(info.state) + dout(10) << " marking " << addr << " mds" << info.rank << "." << info.inc + << " " << ceph_mds_state_name(info.state) << " laggy" << dendl; info.laggy_since = now; do_propose = true; @@ -632,11 +656,12 @@ void MDSMonitor::tick() while (p != failed.end()) { int f = *p++; entity_addr_t sa; - if (pending_mdsmap.find_standby_for(f, sa)) { + string name; // FIXME + if (pending_mdsmap.find_standby_for(f, name, sa)) { dout(0) << " taking over failed mds" << f << " with " << sa << dendl; MDSMap::mds_info_t& si = pending_mdsmap.mds_info[sa]; si.state = MDSMap::STATE_REPLAY; - si.mds = f; + si.rank = f; si.inc = ++pending_mdsmap.inc[f]; pending_mdsmap.in.insert(f); pending_mdsmap.up[f] = sa; @@ -656,10 +681,10 @@ void MDSMonitor::tick() p != pending_mdsmap.mds_info.end(); p++) { if (p->second.state == MDSMap::STATE_STANDBY_REPLAY) - shadowed.insert(p->second.mds); + shadowed.insert(p->second.rank); if (p->second.state == MDSMap::STATE_STANDBY && !p->second.laggy()) - avail[p->second.mds].insert(p->first); + avail[p->second.rank].insert(p->first); } // find an mds that needs a standby @@ -682,7 +707,7 @@ void MDSMonitor::tick() dout(10) << "mds" << *p << " will be shadowed by " << s << dendl; MDSMap::mds_info_t& info = pending_mdsmap.mds_info[s]; - info.mds = *p; + info.rank = *p; info.state = MDSMap::STATE_STANDBY_REPLAY; do_propose = true; } @@ -715,21 +740,21 @@ void MDSMonitor::do_stop() info.state = MDSMap::STATE_STOPPING; break; case MDSMap::STATE_STARTING: - pending_mdsmap.stopped.insert(info.mds); + pending_mdsmap.stopped.insert(info.rank); case MDSMap::STATE_CREATING: - pending_mdsmap.up.erase(info.mds); + pending_mdsmap.up.erase(info.rank); pending_mdsmap.mds_info.erase(info.addr); - pending_mdsmap.in.erase(info.mds); + pending_mdsmap.in.erase(info.rank); break; case MDSMap::STATE_REPLAY: case MDSMap::STATE_RESOLVE: case MDSMap::STATE_RECONNECT: case MDSMap::STATE_REJOIN: // BUG: hrm, if this is the case, the STOPPING guys won't be able to stop, will they? - pending_mdsmap.failed.insert(info.mds); - pending_mdsmap.up.erase(info.mds); + pending_mdsmap.failed.insert(info.rank); + pending_mdsmap.up.erase(info.rank); pending_mdsmap.mds_info.erase(info.addr); - pending_mdsmap.in.erase(info.mds); + pending_mdsmap.in.erase(info.rank); break; } } |