diff options
-rwxr-xr-x | qa/workunits/cephtool/test.sh | 23 | ||||
-rw-r--r-- | src/common/ceph_strings.cc | 4 | ||||
-rw-r--r-- | src/crush/CrushWrapper.cc | 58 | ||||
-rw-r--r-- | src/crush/CrushWrapper.h | 9 | ||||
-rw-r--r-- | src/include/rados.h | 52 | ||||
-rw-r--r-- | src/mon/MonCommands.h | 43 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 414 | ||||
-rw-r--r-- | src/osd/OSDMap.cc | 13 | ||||
-rw-r--r-- | src/osd/OSDMap.h | 58 |
9 files changed, 585 insertions, 89 deletions
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index 422a4995b6f..02051906e80 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -1419,6 +1419,29 @@ function test_mon_osd() ceph osd dump | grep 'osd.0.*in' ceph osd find 0 + ceph osd add-noout 0 + ceph health detail | grep 'noout osd(s).*0' + ceph osd rm-noout 0 + ! ceph health detail | grep 'noout osds(s).*0' + + ceph osd add-nodown 0 1 + ceph health detail | grep 'nodown osd(s).*0.*1' + ceph osd rm-nodown all + ! ceph health detail | grep 'nodown osd(s).*0.*1' + + ids=`ceph osd ls-tree default` + for osd in $ids + do + ceph osd add-noout $osd + ceph osd add-nodown $osd + done + ceph -s | grep 'noout osd(s)' + ceph -s | grep 'nodown osd(s)' + ceph osd rm-noout any + ceph osd rm-nodown any + ! ceph -s | grep 'noout osd(s)' + ! ceph -s | grep 'nodown osd(s)' + # make sure mark out preserves weight ceph osd reweight osd.0 .5 ceph osd dump | grep ^osd.0 | grep 'weight 0.5' diff --git a/src/common/ceph_strings.cc b/src/common/ceph_strings.cc index 2b7e716e177..f4dca10af3c 100644 --- a/src/common/ceph_strings.cc +++ b/src/common/ceph_strings.cc @@ -47,6 +47,10 @@ const char *ceph_osd_state_name(int s) return "backfillfull"; case CEPH_OSD_DESTROYED: return "destroyed"; + case CEPH_OSD_NODOWN: + return "nodown"; + case CEPH_OSD_NOOUT: + return "noout"; default: return "???"; } diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index 1e84904ba4f..0def97e1b37 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -677,6 +677,64 @@ int CrushWrapper::get_children(int id, list<int> *children) return b->size; } +int CrushWrapper::_get_leaves(int id, list<int> *leaves) +{ + assert(leaves); + + // Already leaf? + if (id >= 0) { + leaves->push_back(id); + return 0; + } + + crush_bucket *b = get_bucket(id); + if (IS_ERR(b)) { + return -ENOENT; + } + + for (unsigned n = 0; n < b->size; n++) { + if (b->items[n] >= 0) { + leaves->push_back(b->items[n]); + } else { + // is a bucket, do recursive call + int r = _get_leaves(b->items[n], leaves); + if (r < 0) { + return r; + } + } + } + + return 0; // all is well +} + +int CrushWrapper::get_leaves(const string &name, set<int> *leaves) +{ + assert(leaves); + leaves->clear(); + + if (!name_exists(name)) { + return -ENOENT; + } + + int id = get_item_id(name); + if (id >= 0) { + // already leaf + leaves->insert(id); + return 0; + } + + list<int> unordered; + int r = _get_leaves(id, &unordered); + if (r < 0) { + return r; + } + + for (auto &p : unordered) { + leaves->insert(p); + } + + return 0; +} int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string name, const map<string,string>& loc) // typename -> bucketname diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 0e48b8c8f66..b4a3bc19ca7 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -637,6 +637,15 @@ public: int get_children(int id, list<int> *children); /** + * enumerate leaves(devices) of given node + * + * @param name parent bucket name + * @return 0 on success or a negative errno on error. + */ + int get_leaves(const string &name, set<int> *leaves); + int _get_leaves(int id, list<int> *leaves); // worker + + /** * insert an item into the map at a specific position * * Add an item as a specific location of the hierarchy. diff --git a/src/include/rados.h b/src/include/rados.h index 2f32eb0387f..dbb28e507a9 100644 --- a/src/include/rados.h +++ b/src/include/rados.h @@ -110,14 +110,16 @@ struct ceph_eversion { */ /* status bits */ -#define CEPH_OSD_EXISTS (1<<0) -#define CEPH_OSD_UP (1<<1) -#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ -#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ -#define CEPH_OSD_FULL (1<<4) /* osd is at or above full threshold */ -#define CEPH_OSD_NEARFULL (1<<5) /* osd is at or above nearfull threshold */ +#define CEPH_OSD_EXISTS (1<<0) +#define CEPH_OSD_UP (1<<1) +#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ +#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ +#define CEPH_OSD_FULL (1<<4) /* osd is at or above full threshold */ +#define CEPH_OSD_NEARFULL (1<<5) /* osd is at or above nearfull threshold */ #define CEPH_OSD_BACKFILLFULL (1<<6) /* osd is at or above backfillfull threshold */ -#define CEPH_OSD_DESTROYED (1<<7) /* osd has been destroyed */ +#define CEPH_OSD_DESTROYED (1<<7) /* osd has been destroyed */ +#define CEPH_OSD_NODOWN (1<<8) /* osd can not be marked down */ +#define CEPH_OSD_NOOUT (1<<9) /* osd can not be marked out */ extern const char *ceph_osd_state_name(int s); @@ -132,24 +134,24 @@ extern const char *ceph_osd_state_name(int s); /* * osd map flag bits */ -#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ -#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ -#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ -#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ -#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ -#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ -#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ -#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ -#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ -#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ -#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ -#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */ -#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */ -#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */ -#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */ -#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */ -#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */ -#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */ +#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ +#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ +#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ +#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ +#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ +#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ +#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ +#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ +#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ +#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ +#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ +#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */ +#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */ +#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */ +#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */ +#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */ +#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */ +#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */ #define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */ /* these are hidden in 'ceph status' view */ diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 61763d76112..efce0ad180b 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -454,6 +454,11 @@ COMMAND("osd getcrushmap " \ "name=epoch,type=CephInt,range=0,req=false", \ "get CRUSH map", "osd", "r", "cli,rest") COMMAND("osd getmaxosd", "show largest OSD id", "osd", "r", "cli,rest") +COMMAND("osd ls-tree " \ + "name=epoch,type=CephInt,range=0,req=false " + "name=name,type=CephString,req=true", \ + "show OSD ids under bucket <name> in the CRUSH map", \ + "osd", "r", "cli,rest") COMMAND("osd find " \ "name=id,type=CephOsdName", \ "find osd <id> in the CRUSH map and show its location", \ @@ -664,16 +669,46 @@ COMMAND("osd cluster_snap", "take cluster snapshot (disabled)", \ "osd", "r", "") COMMAND("osd down " \ "type=CephString,name=ids,n=N", \ - "set osd(s) <id> [<id>...] down", "osd", "rw", "cli,rest") + "set osd(s) <id> [<id>...] down, " \ + "or use <any|all|*> to set all osds down", \ + "osd", "rw", "cli,rest") COMMAND("osd out " \ "name=ids,type=CephString,n=N", \ - "set osd(s) <id> [<id>...] out", "osd", "rw", "cli,rest") + "set osd(s) <id> [<id>...] out, " \ + "or use <any|all|*> to set all osds out", \ + "osd", "rw", "cli,rest") COMMAND("osd in " \ "name=ids,type=CephString,n=N", \ - "set osd(s) <id> [<id>...] in", "osd", "rw", "cli,rest") + "set osd(s) <id> [<id>...] in, " + "can use <any|all|*> to automatically set all previously out osds in", \ + "osd", "rw", "cli,rest") COMMAND("osd rm " \ "name=ids,type=CephString,n=N", \ - "remove osd(s) <id> [<id>...] in", "osd", "rw", "cli,rest") + "remove osd(s) <id> [<id>...], " + "or use <any|all|*> to remove all osds", \ + "osd", "rw", "cli,rest") +COMMAND("osd add-noout " \ + "name=ids,type=CephString,n=N", \ + "mark osd(s) <id> [<id>...] as noout, " \ + "or use <all|any|*> to mark all osds as noout", \ + "osd", "rw", "cli,rest") +COMMAND("osd add-nodown " \ + "name=ids,type=CephString,n=N", \ + "mark osd(s) <id> [<id>...] as nodown, " \ + "or use <all|any|*> to mark all osds as nodown", \ + "osd", "rw", "cli,rest") +COMMAND("osd rm-noout " \ + "name=ids,type=CephString,n=N", \ + "allow osd(s) <id> [<id>...] to be marked out " \ + "(if they are currently marked as noout), " \ + "can use <all|any|*> to automatically filter out all noout osds", \ + "osd", "rw", "cli,rest") +COMMAND("osd rm-nodown " \ + "name=ids,type=CephString,n=N", \ + "allow osd(s) <id> [<id>...] to be marked down " \ + "(if they are currently marked as nodown), " \ + "can use <all|any|*> to automatically filter out all nodown osds", \ + "osd", "rw", "cli,rest") COMMAND("osd reweight " \ "name=id,type=CephOsdName " \ "type=CephFloat,name=weight,range=0.0|1.0", \ diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index a39e767806e..1d2483ba7fc 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1910,18 +1910,26 @@ bool OSDMonitor::prepare_mark_me_down(MonOpRequestRef op) bool OSDMonitor::can_mark_down(int i) { if (osdmap.test_flag(CEPH_OSDMAP_NODOWN)) { - dout(5) << "can_mark_down NODOWN flag set, will not mark osd." << i << " down" << dendl; + dout(5) << __func__ << " NODOWN flag set, will not mark osd." << i + << " down" << dendl; return false; } + + if (osdmap.is_nodown(i)) { + dout(5) << __func__ << " osd." << i << " is marked as nodown, " + << "will not mark it down" << dendl; + return false; + } + int num_osds = osdmap.get_num_osds(); if (num_osds == 0) { - dout(5) << "can_mark_down no osds" << dendl; + dout(5) << __func__ << " no osds" << dendl; return false; } int up = osdmap.get_num_up_osds() - pending_inc.get_net_marked_down(&osdmap); float up_ratio = (float)up / (float)num_osds; if (up_ratio < g_conf->mon_osd_min_up_ratio) { - dout(2) << "can_mark_down current up_ratio " << up_ratio << " < min " + dout(2) << __func__ << " current up_ratio " << up_ratio << " < min " << g_conf->mon_osd_min_up_ratio << ", will not mark osd." << i << " down" << dendl; return false; @@ -1948,6 +1956,13 @@ bool OSDMonitor::can_mark_out(int i) dout(5) << __func__ << " NOOUT flag set, will not mark osds out" << dendl; return false; } + + if (osdmap.is_noout(i)) { + dout(5) << __func__ << " osd." << i << " is marked as noout, " + << "will not mark it out" << dendl; + return false; + } + int num_osds = osdmap.get_num_osds(); if (num_osds == 0) { dout(5) << __func__ << " no osds" << dendl; @@ -3809,6 +3824,32 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary, detail->push_back(make_pair(HEALTH_WARN, ss.str())); } } + + // warn if there is any nodown osds. + vector<int> nodown_osds; + osdmap.get_nodown_osds(&nodown_osds); + if (nodown_osds.size()) { + ostringstream ss; + ss << nodown_osds.size() << " nodown osd(s)"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) { + ss << ": " << nodown_osds; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } + + // warn if there is any noout osds. + vector<int> noout_osds; + osdmap.get_noout_osds(&noout_osds); + if (noout_osds.size()) { + ostringstream ss; + ss << noout_osds.size() << " noout osd(s)"; + summary.push_back(make_pair(HEALTH_WARN, ss.str())); + if (detail) { + ss << ": " << noout_osds; + detail->push_back(make_pair(HEALTH_WARN, ss.str())); + } + } } // note: we leave it to ceph-mgr to generate details health warnings // with actual osd utilizations @@ -4057,7 +4098,8 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) prefix == "osd tree" || prefix == "osd ls" || prefix == "osd getmap" || - prefix == "osd getcrushmap") { + prefix == "osd getcrushmap" || + prefix == "osd ls-tree") { string val; epoch_t epoch = 0; @@ -4160,6 +4202,41 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op) } else if (prefix == "osd getcrushmap") { p->crush->encode(rdata, mon->get_quorum_con_features()); ss << "got crush map from osdmap epoch " << p->get_epoch(); + } else if (prefix == "osd ls-tree") { + string bucket_name; + cmd_getval(g_ceph_context, cmdmap, "name", bucket_name); + set<int> osds; + r = p->get_osds_by_bucket_name(bucket_name, &osds); + if (r == -ENOENT) { + ss << "\"" << bucket_name << "\" does not exist"; + goto reply; + } else if (r < 0) { + ss << "can not parse bucket name:\"" << bucket_name << "\""; + goto reply; + } + + if (f) { + f->open_array_section("osds"); + for (auto &i : osds) { + if (osdmap.exists(i)) { + f->dump_int("osd", i); + } + } + f->close_section(); + f->flush(ds); + } else { + bool first = true; + for (auto &i : osds) { + if (osdmap.exists(i)) { + if (!first) + ds << "\n"; + first = false; + ds << i; + } + } + } + + rdata.append(ds); } if (p != &osdmap) delete p; @@ -8417,79 +8494,296 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, prefix == "osd rm") { bool any = false; + bool stop = false; + bool verbose = true; vector<string> idvec; cmd_getval(g_ceph_context, cmdmap, "ids", idvec); - for (unsigned j = 0; j < idvec.size(); j++) { - long osd = parse_osd_id(idvec[j].c_str(), &ss); - if (osd < 0) { - ss << "invalid osd id" << osd; - err = -EINVAL; - continue; - } else if (!osdmap.exists(osd)) { - ss << "osd." << osd << " does not exist. "; - continue; + for (unsigned j = 0; j < idvec.size() && !stop; j++) { + set<int> osds; + + // wildcard? + if (j == 0 && + (idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) { + if (prefix == "osd in") { + // touch out osds only + osdmap.get_out_osds(osds); + } else { + osdmap.get_all_osds(osds); + } + stop = true; + verbose = false; // so the output is less noisy. + } else { + long osd = parse_osd_id(idvec[j].c_str(), &ss); + if (osd < 0) { + ss << "invalid osd id" << osd; + err = -EINVAL; + continue; + } else if (!osdmap.exists(osd)) { + ss << "osd." << osd << " does not exist. "; + continue; + } + + osds.insert(osd); } - if (prefix == "osd down") { - if (osdmap.is_down(osd)) { - ss << "osd." << osd << " is already down. "; - } else { - pending_inc.new_state[osd] = CEPH_OSD_UP; - ss << "marked down osd." << osd << ". "; - any = true; - } - } else if (prefix == "osd out") { - if (osdmap.is_out(osd)) { - ss << "osd." << osd << " is already out. "; - } else { - pending_inc.new_weight[osd] = CEPH_OSD_OUT; - if (osdmap.osd_weight[osd]) { - if (pending_inc.new_xinfo.count(osd) == 0) { - pending_inc.new_xinfo[osd] = osdmap.osd_xinfo[osd]; + + for (auto &osd : osds) { + if (prefix == "osd down") { + if (osdmap.is_down(osd)) { + if (verbose) + ss << "osd." << osd << " is already down. "; + } else { + pending_inc.pending_osd_state_set(osd, CEPH_OSD_UP); + ss << "marked down osd." << osd << ". "; + any = true; + } + } else if (prefix == "osd out") { + if (osdmap.is_out(osd)) { + if (verbose) + ss << "osd." << osd << " is already out. "; + } else { + pending_inc.new_weight[osd] = CEPH_OSD_OUT; + if (osdmap.osd_weight[osd]) { + if (pending_inc.new_xinfo.count(osd) == 0) { + pending_inc.new_xinfo[osd] = osdmap.osd_xinfo[osd]; + } + pending_inc.new_xinfo[osd].old_weight = osdmap.osd_weight[osd]; } - pending_inc.new_xinfo[osd].old_weight = osdmap.osd_weight[osd]; + ss << "marked out osd." << osd << ". "; + any = true; } - ss << "marked out osd." << osd << ". "; - any = true; - } - } else if (prefix == "osd in") { - if (osdmap.is_in(osd)) { - ss << "osd." << osd << " is already in. "; - } else { - if (osdmap.osd_xinfo[osd].old_weight > 0) { - pending_inc.new_weight[osd] = osdmap.osd_xinfo[osd].old_weight; - if (pending_inc.new_xinfo.count(osd) == 0) { - pending_inc.new_xinfo[osd] = osdmap.osd_xinfo[osd]; + } else if (prefix == "osd in") { + if (osdmap.is_in(osd)) { + if (verbose) + ss << "osd." << osd << " is already in. "; + } else { + if (osdmap.osd_xinfo[osd].old_weight > 0) { + pending_inc.new_weight[osd] = osdmap.osd_xinfo[osd].old_weight; + if (pending_inc.new_xinfo.count(osd) == 0) { + pending_inc.new_xinfo[osd] = osdmap.osd_xinfo[osd]; + } + pending_inc.new_xinfo[osd].old_weight = 0; + } else { + pending_inc.new_weight[osd] = CEPH_OSD_IN; } - pending_inc.new_xinfo[osd].old_weight = 0; + ss << "marked in osd." << osd << ". "; + any = true; + } + } else if (prefix == "osd rm") { + err = prepare_command_osd_remove(osd); + + if (err == -EBUSY) { + if (any) + ss << ", "; + ss << "osd." << osd << " is still up; must be down before removal. "; } else { - pending_inc.new_weight[osd] = CEPH_OSD_IN; + assert(err == 0); + if (any) { + ss << ", osd." << osd; + } else { + ss << "removed osd." << osd; + } + any = true; } - ss << "marked in osd." << osd << ". "; - any = true; - } - } else if (prefix == "osd rm") { - err = prepare_command_osd_remove(osd); + } + } + } + if (any) { + getline(ss, rs); + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs, + get_last_committed() + 1)); + return true; + } + } else if (prefix == "osd add-noout" || + prefix == "osd add-nodown") { - if (err == -EBUSY) { - if (any) - ss << ", "; - ss << "osd." << osd << " is still up; must be down before removal. "; - } else { - assert(err == 0); - if (any) { - ss << ", osd." << osd; + bool noout = prefix == "osd add-noout"; + bool any = false; + bool stop = false; + + vector<string> idvec; + cmd_getval(g_ceph_context, cmdmap, "ids", idvec); + for (unsigned j = 0; j < idvec.size() && !stop; j++) { + + set<int> osds; + + // wildcard? + if (j == 0 && + (idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) { + osdmap.get_all_osds(osds); + stop = true; + } else { + // try traditional single osd way + + long osd = parse_osd_id(idvec[j].c_str(), &ss); + if (osd < 0) { + // ss has reason for failure + ss << ", unable to parse osd id:\"" << idvec[j] << "\". "; + err = -EINVAL; + continue; + } + + osds.insert(osd); + } + + for (auto &osd : osds) { + + if (!osdmap.exists(osd)) { + ss << "osd." << osd << " does not exist. "; + continue; + } + + if (noout) { + if (osdmap.is_out(osd)) { + ss << "osd." << osd << " is already out. "; + continue; + } + + if (osdmap.is_noout(osd)) { // already noout? + // continue to check if there is any pending "rm-noout" request + if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NOOUT)) { + // cancel it + pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NOOUT); + any = true; + } + + continue; + } + + pending_inc.pending_osd_state_set(osd, CEPH_OSD_NOOUT); + any = true; + } else { + // nodown + + if (osdmap.is_down(osd)) { + ss << "osd." << osd << " is already down. "; + continue; + } + + if (osdmap.is_nodown(osd)) { // already nodown? + // continue to check if there is any pending "rm-nodown" request + if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NODOWN)) { + // cancel it + pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NODOWN); + any = true; + } + + continue; + } + + pending_inc.pending_osd_state_set(osd, CEPH_OSD_NODOWN); + any = true; + } + } + } + + if (any) { + getline(ss, rs); + wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs, + get_last_committed() + 1)); + return true; + } + } else if (prefix == "osd rm-noout" || + prefix == "osd rm-nodown") { + + bool any = false; + bool stop = false; + bool noout = prefix == "osd rm-noout"; + + vector<string> idvec; + cmd_getval(g_ceph_context, cmdmap, "ids", idvec); + + for (unsigned j = 0; j < idvec.size() && !stop; j++) { + vector<int> osds; + + // wildcard? + if (j == 0 && + (idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) { + + // touch previous noout/nodown osds only + if (noout) { + osdmap.get_noout_osds(&osds); + } else { + osdmap.get_nodown_osds(&osds); + } + + // cancel pending noout/nodown requests too, + // if there is any + vector<int> pending_state_osds; + (void) pending_inc.get_pending_state_osds(&pending_state_osds); + for (auto &p : pending_state_osds) { + if (noout) { + if (!osdmap.is_noout(p) && + pending_inc.pending_osd_has_state(p, CEPH_OSD_NOOUT)) { + pending_inc.pending_osd_state_clear(p, CEPH_OSD_NOOUT); + any = true; + } } else { - ss << "removed osd." << osd; + if (!osdmap.is_nodown(p) && + pending_inc.pending_osd_has_state(p, CEPH_OSD_NODOWN)) { + pending_inc.pending_osd_state_clear(p, CEPH_OSD_NODOWN); + any = true; + } } - any = true; - } + } + + stop = true; + } else { + // try traditional single osd way + + long osd = parse_osd_id(idvec[j].c_str(), &ss); + if (osd < 0) { + // ss has reason for failure + ss << ", unable to parse osd id:\"" << idvec[j] << "\". "; + err = -EINVAL; + continue; + } + + osds.push_back(osd); + } + + for (auto &osd : osds) { + + if (!osdmap.exists(osd)) { + ss << "osd." << osd << " does not exist. "; + continue; + } + + if (noout) { + if (osdmap.is_noout(osd)) { + pending_inc.pending_osd_state_set(osd, CEPH_OSD_NOOUT); + any = true; + } else { + // noout flag is not set or has already been successfully cancelled + // continue to check pending_inc + if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NOOUT)) { + // cancel pending noout flag + pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NOOUT); + any = true; + } + } + } else { + // nodown + if (osdmap.is_nodown(osd)) { + pending_inc.pending_osd_state_set(osd, CEPH_OSD_NODOWN); + any = true; + } else { + // nodown flag is not set or has already been successfully cancelled + // continue to check pending_inc + if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NODOWN)) { + // cancel pending nodown flag + pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NODOWN); + any = true; + } + } + } } } + if (any) { getline(ss, rs); wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs, - get_last_committed() + 1)); + get_last_committed() + 1)); return true; } } else if (prefix == "osd pg-temp") { diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc index 7a712901a4c..c63bc6d04ea 100644 --- a/src/osd/OSDMap.cc +++ b/src/osd/OSDMap.cc @@ -1219,6 +1219,14 @@ void OSDMap::get_up_osds(set<int32_t>& ls) const } } +void OSDMap::get_out_osds(set<int32_t>& ls) const +{ + for (int i = 0; i < max_osd; i++) { + if (is_out(i)) + ls.insert(i); + } +} + void OSDMap::calc_state_set(int state, set<string>& st) { unsigned t = state; @@ -3895,3 +3903,8 @@ int OSDMap::calc_pg_upmaps( ldout(cct, 10) << " end deviation " << end_deviation << dendl; return num_changed; } + +int OSDMap::get_osds_by_bucket_name(const string &name, set<int> *osds) const +{ + return crush->get_leaves(name, osds); +} diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h index 2b5d15790d3..5178132abbb 100644 --- a/src/osd/OSDMap.h +++ b/src/osd/OSDMap.h @@ -457,6 +457,31 @@ public: /// propage update pools' snap metadata to any of their tiers int propagate_snaps_to_tiers(CephContext *cct, const OSDMap &base); + + /// filter out osds with any pending state changing + size_t get_pending_state_osds(vector<int> *osds) { + assert(osds); + osds->clear(); + + for (auto &p : new_state) { + osds->push_back(p.first); + } + + return osds->size(); + } + + bool pending_osd_has_state(int osd, unsigned state) { + return new_state.count(osd) && (new_state[osd] & state) != 0; + } + + void pending_osd_state_set(int osd, unsigned state) { + new_state[osd] |= state; + } + + void pending_osd_state_clear(int osd, unsigned state) { + new_state[osd] &= ~state; + } + }; private: @@ -634,6 +659,7 @@ public: void get_all_osds(set<int32_t>& ls) const; void get_up_osds(set<int32_t>& ls) const; + void get_out_osds(set<int32_t>& ls) const; unsigned get_num_pg_temp() const { return pg_temp->size(); } @@ -745,6 +771,36 @@ public: return !is_out(osd); } + bool is_nodown(int osd) const { + return exists(osd) && (osd_state[osd] & CEPH_OSD_NODOWN); + } + + bool is_noout(int osd) const { + return exists(osd) && (osd_state[osd] & CEPH_OSD_NOOUT); + } + + void get_nodown_osds(vector<int> *osds) const { + assert(osds); + osds->clear(); + + for (int i = 0; i < max_osd; i++) { + if (is_nodown(i)) { + osds->push_back(i); + } + } + } + + void get_noout_osds(vector<int> *osds) const { + assert(osds); + osds->clear(); + + for (int i = 0; i < max_osd; i++) { + if (is_noout(i)) { + osds->push_back(i); + } + } + } + /** * check if an entire crush subtree is down */ @@ -1166,6 +1222,8 @@ public: Incremental *pending_inc ); + int get_osds_by_bucket_name(const string &name, set<int> *osds) const; + /* * handy helpers to build simple maps... */ |