summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/ceph_strings.cc4
-rw-r--r--src/crush/CrushWrapper.cc58
-rw-r--r--src/crush/CrushWrapper.h9
-rw-r--r--src/include/rados.h52
-rw-r--r--src/mon/MonCommands.h43
-rw-r--r--src/mon/OSDMonitor.cc414
-rw-r--r--src/osd/OSDMap.cc13
-rw-r--r--src/osd/OSDMap.h58
8 files changed, 562 insertions, 89 deletions
diff --git a/src/common/ceph_strings.cc b/src/common/ceph_strings.cc
index 2b7e716e177..f4dca10af3c 100644
--- a/src/common/ceph_strings.cc
+++ b/src/common/ceph_strings.cc
@@ -47,6 +47,10 @@ const char *ceph_osd_state_name(int s)
return "backfillfull";
case CEPH_OSD_DESTROYED:
return "destroyed";
+ case CEPH_OSD_NODOWN:
+ return "nodown";
+ case CEPH_OSD_NOOUT:
+ return "noout";
default:
return "???";
}
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 1e84904ba4f..0def97e1b37 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -677,6 +677,64 @@ int CrushWrapper::get_children(int id, list<int> *children)
return b->size;
}
+int CrushWrapper::_get_leaves(int id, list<int> *leaves)
+{
+ assert(leaves);
+
+ // Already leaf?
+ if (id >= 0) {
+ leaves->push_back(id);
+ return 0;
+ }
+
+ crush_bucket *b = get_bucket(id);
+ if (IS_ERR(b)) {
+ return -ENOENT;
+ }
+
+ for (unsigned n = 0; n < b->size; n++) {
+ if (b->items[n] >= 0) {
+ leaves->push_back(b->items[n]);
+ } else {
+ // is a bucket, do recursive call
+ int r = _get_leaves(b->items[n], leaves);
+ if (r < 0) {
+ return r;
+ }
+ }
+ }
+
+ return 0; // all is well
+}
+
+int CrushWrapper::get_leaves(const string &name, set<int> *leaves)
+{
+ assert(leaves);
+ leaves->clear();
+
+ if (!name_exists(name)) {
+ return -ENOENT;
+ }
+
+ int id = get_item_id(name);
+ if (id >= 0) {
+ // already leaf
+ leaves->insert(id);
+ return 0;
+ }
+
+ list<int> unordered;
+ int r = _get_leaves(id, &unordered);
+ if (r < 0) {
+ return r;
+ }
+
+ for (auto &p : unordered) {
+ leaves->insert(p);
+ }
+
+ return 0;
+}
int CrushWrapper::insert_item(CephContext *cct, int item, float weight, string name,
const map<string,string>& loc) // typename -> bucketname
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index 0e48b8c8f66..b4a3bc19ca7 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -637,6 +637,15 @@ public:
int get_children(int id, list<int> *children);
/**
+ * enumerate leaves(devices) of given node
+ *
+ * @param name parent bucket name
+ * @return 0 on success or a negative errno on error.
+ */
+ int get_leaves(const string &name, set<int> *leaves);
+ int _get_leaves(int id, list<int> *leaves); // worker
+
+ /**
* insert an item into the map at a specific position
*
* Add an item as a specific location of the hierarchy.
diff --git a/src/include/rados.h b/src/include/rados.h
index 2f32eb0387f..dbb28e507a9 100644
--- a/src/include/rados.h
+++ b/src/include/rados.h
@@ -110,14 +110,16 @@ struct ceph_eversion {
*/
/* status bits */
-#define CEPH_OSD_EXISTS (1<<0)
-#define CEPH_OSD_UP (1<<1)
-#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */
-#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */
-#define CEPH_OSD_FULL (1<<4) /* osd is at or above full threshold */
-#define CEPH_OSD_NEARFULL (1<<5) /* osd is at or above nearfull threshold */
+#define CEPH_OSD_EXISTS (1<<0)
+#define CEPH_OSD_UP (1<<1)
+#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */
+#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */
+#define CEPH_OSD_FULL (1<<4) /* osd is at or above full threshold */
+#define CEPH_OSD_NEARFULL (1<<5) /* osd is at or above nearfull threshold */
#define CEPH_OSD_BACKFILLFULL (1<<6) /* osd is at or above backfillfull threshold */
-#define CEPH_OSD_DESTROYED (1<<7) /* osd has been destroyed */
+#define CEPH_OSD_DESTROYED (1<<7) /* osd has been destroyed */
+#define CEPH_OSD_NODOWN (1<<8) /* osd can not be marked down */
+#define CEPH_OSD_NOOUT (1<<9) /* osd can not be marked out */
extern const char *ceph_osd_state_name(int s);
@@ -132,24 +134,24 @@ extern const char *ceph_osd_state_name(int s);
/*
* osd map flag bits
*/
-#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
-#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
-#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
-#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
-#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
-#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */
-#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */
-#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */
-#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */
-#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
-#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
-#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */
-#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */
-#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
-#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
-#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
-#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */
-#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */
+#define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */
+#define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */
+#define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */
+#define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */
+#define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */
+#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */
+#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */
+#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */
+#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */
+#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
+#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
+#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */
+#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */
+#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
+#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
+#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
+#define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */
+#define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */
#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
/* these are hidden in 'ceph status' view */
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index 61763d76112..efce0ad180b 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -454,6 +454,11 @@ COMMAND("osd getcrushmap " \
"name=epoch,type=CephInt,range=0,req=false", \
"get CRUSH map", "osd", "r", "cli,rest")
COMMAND("osd getmaxosd", "show largest OSD id", "osd", "r", "cli,rest")
+COMMAND("osd ls-tree " \
+ "name=epoch,type=CephInt,range=0,req=false "
+ "name=name,type=CephString,req=true", \
+ "show OSD ids under bucket <name> in the CRUSH map", \
+ "osd", "r", "cli,rest")
COMMAND("osd find " \
"name=id,type=CephOsdName", \
"find osd <id> in the CRUSH map and show its location", \
@@ -664,16 +669,46 @@ COMMAND("osd cluster_snap", "take cluster snapshot (disabled)", \
"osd", "r", "")
COMMAND("osd down " \
"type=CephString,name=ids,n=N", \
- "set osd(s) <id> [<id>...] down", "osd", "rw", "cli,rest")
+ "set osd(s) <id> [<id>...] down, " \
+ "or use <any|all|*> to set all osds down", \
+ "osd", "rw", "cli,rest")
COMMAND("osd out " \
"name=ids,type=CephString,n=N", \
- "set osd(s) <id> [<id>...] out", "osd", "rw", "cli,rest")
+ "set osd(s) <id> [<id>...] out, " \
+ "or use <any|all|*> to set all osds out", \
+ "osd", "rw", "cli,rest")
COMMAND("osd in " \
"name=ids,type=CephString,n=N", \
- "set osd(s) <id> [<id>...] in", "osd", "rw", "cli,rest")
+ "set osd(s) <id> [<id>...] in, "
+ "can use <any|all|*> to automatically set all previously out osds in", \
+ "osd", "rw", "cli,rest")
COMMAND("osd rm " \
"name=ids,type=CephString,n=N", \
- "remove osd(s) <id> [<id>...] in", "osd", "rw", "cli,rest")
+ "remove osd(s) <id> [<id>...], "
+ "or use <any|all|*> to remove all osds", \
+ "osd", "rw", "cli,rest")
+COMMAND("osd add-noout " \
+ "name=ids,type=CephString,n=N", \
+ "mark osd(s) <id> [<id>...] as noout, " \
+ "or use <all|any|*> to mark all osds as noout", \
+ "osd", "rw", "cli,rest")
+COMMAND("osd add-nodown " \
+ "name=ids,type=CephString,n=N", \
+ "mark osd(s) <id> [<id>...] as nodown, " \
+ "or use <all|any|*> to mark all osds as nodown", \
+ "osd", "rw", "cli,rest")
+COMMAND("osd rm-noout " \
+ "name=ids,type=CephString,n=N", \
+ "allow osd(s) <id> [<id>...] to be marked out " \
+ "(if they are currently marked as noout), " \
+ "can use <all|any|*> to automatically filter out all noout osds", \
+ "osd", "rw", "cli,rest")
+COMMAND("osd rm-nodown " \
+ "name=ids,type=CephString,n=N", \
+ "allow osd(s) <id> [<id>...] to be marked down " \
+ "(if they are currently marked as nodown), " \
+ "can use <all|any|*> to automatically filter out all nodown osds", \
+ "osd", "rw", "cli,rest")
COMMAND("osd reweight " \
"name=id,type=CephOsdName " \
"type=CephFloat,name=weight,range=0.0|1.0", \
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc
index a39e767806e..1d2483ba7fc 100644
--- a/src/mon/OSDMonitor.cc
+++ b/src/mon/OSDMonitor.cc
@@ -1910,18 +1910,26 @@ bool OSDMonitor::prepare_mark_me_down(MonOpRequestRef op)
bool OSDMonitor::can_mark_down(int i)
{
if (osdmap.test_flag(CEPH_OSDMAP_NODOWN)) {
- dout(5) << "can_mark_down NODOWN flag set, will not mark osd." << i << " down" << dendl;
+ dout(5) << __func__ << " NODOWN flag set, will not mark osd." << i
+ << " down" << dendl;
return false;
}
+
+ if (osdmap.is_nodown(i)) {
+ dout(5) << __func__ << " osd." << i << " is marked as nodown, "
+ << "will not mark it down" << dendl;
+ return false;
+ }
+
int num_osds = osdmap.get_num_osds();
if (num_osds == 0) {
- dout(5) << "can_mark_down no osds" << dendl;
+ dout(5) << __func__ << " no osds" << dendl;
return false;
}
int up = osdmap.get_num_up_osds() - pending_inc.get_net_marked_down(&osdmap);
float up_ratio = (float)up / (float)num_osds;
if (up_ratio < g_conf->mon_osd_min_up_ratio) {
- dout(2) << "can_mark_down current up_ratio " << up_ratio << " < min "
+ dout(2) << __func__ << " current up_ratio " << up_ratio << " < min "
<< g_conf->mon_osd_min_up_ratio
<< ", will not mark osd." << i << " down" << dendl;
return false;
@@ -1948,6 +1956,13 @@ bool OSDMonitor::can_mark_out(int i)
dout(5) << __func__ << " NOOUT flag set, will not mark osds out" << dendl;
return false;
}
+
+ if (osdmap.is_noout(i)) {
+ dout(5) << __func__ << " osd." << i << " is marked as noout, "
+ << "will not mark it out" << dendl;
+ return false;
+ }
+
int num_osds = osdmap.get_num_osds();
if (num_osds == 0) {
dout(5) << __func__ << " no osds" << dendl;
@@ -3809,6 +3824,32 @@ void OSDMonitor::get_health(list<pair<health_status_t,string> >& summary,
detail->push_back(make_pair(HEALTH_WARN, ss.str()));
}
}
+
+ // warn if there is any nodown osds.
+ vector<int> nodown_osds;
+ osdmap.get_nodown_osds(&nodown_osds);
+ if (nodown_osds.size()) {
+ ostringstream ss;
+ ss << nodown_osds.size() << " nodown osd(s)";
+ summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ if (detail) {
+ ss << ": " << nodown_osds;
+ detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
+ }
+
+ // warn if there is any noout osds.
+ vector<int> noout_osds;
+ osdmap.get_noout_osds(&noout_osds);
+ if (noout_osds.size()) {
+ ostringstream ss;
+ ss << noout_osds.size() << " noout osd(s)";
+ summary.push_back(make_pair(HEALTH_WARN, ss.str()));
+ if (detail) {
+ ss << ": " << noout_osds;
+ detail->push_back(make_pair(HEALTH_WARN, ss.str()));
+ }
+ }
}
// note: we leave it to ceph-mgr to generate details health warnings
// with actual osd utilizations
@@ -4057,7 +4098,8 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
prefix == "osd tree" ||
prefix == "osd ls" ||
prefix == "osd getmap" ||
- prefix == "osd getcrushmap") {
+ prefix == "osd getcrushmap" ||
+ prefix == "osd ls-tree") {
string val;
epoch_t epoch = 0;
@@ -4160,6 +4202,41 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
} else if (prefix == "osd getcrushmap") {
p->crush->encode(rdata, mon->get_quorum_con_features());
ss << "got crush map from osdmap epoch " << p->get_epoch();
+ } else if (prefix == "osd ls-tree") {
+ string bucket_name;
+ cmd_getval(g_ceph_context, cmdmap, "name", bucket_name);
+ set<int> osds;
+ r = p->get_osds_by_bucket_name(bucket_name, &osds);
+ if (r == -ENOENT) {
+ ss << "\"" << bucket_name << "\" does not exist";
+ goto reply;
+ } else if (r < 0) {
+ ss << "can not parse bucket name:\"" << bucket_name << "\"";
+ goto reply;
+ }
+
+ if (f) {
+ f->open_array_section("osds");
+ for (auto &i : osds) {
+ if (osdmap.exists(i)) {
+ f->dump_int("osd", i);
+ }
+ }
+ f->close_section();
+ f->flush(ds);
+ } else {
+ bool first = true;
+ for (auto &i : osds) {
+ if (osdmap.exists(i)) {
+ if (!first)
+ ds << "\n";
+ first = false;
+ ds << i;
+ }
+ }
+ }
+
+ rdata.append(ds);
}
if (p != &osdmap)
delete p;
@@ -8417,79 +8494,296 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
prefix == "osd rm") {
bool any = false;
+ bool stop = false;
+ bool verbose = true;
vector<string> idvec;
cmd_getval(g_ceph_context, cmdmap, "ids", idvec);
- for (unsigned j = 0; j < idvec.size(); j++) {
- long osd = parse_osd_id(idvec[j].c_str(), &ss);
- if (osd < 0) {
- ss << "invalid osd id" << osd;
- err = -EINVAL;
- continue;
- } else if (!osdmap.exists(osd)) {
- ss << "osd." << osd << " does not exist. ";
- continue;
+ for (unsigned j = 0; j < idvec.size() && !stop; j++) {
+ set<int> osds;
+
+ // wildcard?
+ if (j == 0 &&
+ (idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) {
+ if (prefix == "osd in") {
+ // touch out osds only
+ osdmap.get_out_osds(osds);
+ } else {
+ osdmap.get_all_osds(osds);
+ }
+ stop = true;
+ verbose = false; // so the output is less noisy.
+ } else {
+ long osd = parse_osd_id(idvec[j].c_str(), &ss);
+ if (osd < 0) {
+ ss << "invalid osd id" << osd;
+ err = -EINVAL;
+ continue;
+ } else if (!osdmap.exists(osd)) {
+ ss << "osd." << osd << " does not exist. ";
+ continue;
+ }
+
+ osds.insert(osd);
}
- if (prefix == "osd down") {
- if (osdmap.is_down(osd)) {
- ss << "osd." << osd << " is already down. ";
- } else {
- pending_inc.new_state[osd] = CEPH_OSD_UP;
- ss << "marked down osd." << osd << ". ";
- any = true;
- }
- } else if (prefix == "osd out") {
- if (osdmap.is_out(osd)) {
- ss << "osd." << osd << " is already out. ";
- } else {
- pending_inc.new_weight[osd] = CEPH_OSD_OUT;
- if (osdmap.osd_weight[osd]) {
- if (pending_inc.new_xinfo.count(osd) == 0) {
- pending_inc.new_xinfo[osd] = osdmap.osd_xinfo[osd];
+
+ for (auto &osd : osds) {
+ if (prefix == "osd down") {
+ if (osdmap.is_down(osd)) {
+ if (verbose)
+ ss << "osd." << osd << " is already down. ";
+ } else {
+ pending_inc.pending_osd_state_set(osd, CEPH_OSD_UP);
+ ss << "marked down osd." << osd << ". ";
+ any = true;
+ }
+ } else if (prefix == "osd out") {
+ if (osdmap.is_out(osd)) {
+ if (verbose)
+ ss << "osd." << osd << " is already out. ";
+ } else {
+ pending_inc.new_weight[osd] = CEPH_OSD_OUT;
+ if (osdmap.osd_weight[osd]) {
+ if (pending_inc.new_xinfo.count(osd) == 0) {
+ pending_inc.new_xinfo[osd] = osdmap.osd_xinfo[osd];
+ }
+ pending_inc.new_xinfo[osd].old_weight = osdmap.osd_weight[osd];
}
- pending_inc.new_xinfo[osd].old_weight = osdmap.osd_weight[osd];
+ ss << "marked out osd." << osd << ". ";
+ any = true;
}
- ss << "marked out osd." << osd << ". ";
- any = true;
- }
- } else if (prefix == "osd in") {
- if (osdmap.is_in(osd)) {
- ss << "osd." << osd << " is already in. ";
- } else {
- if (osdmap.osd_xinfo[osd].old_weight > 0) {
- pending_inc.new_weight[osd] = osdmap.osd_xinfo[osd].old_weight;
- if (pending_inc.new_xinfo.count(osd) == 0) {
- pending_inc.new_xinfo[osd] = osdmap.osd_xinfo[osd];
+ } else if (prefix == "osd in") {
+ if (osdmap.is_in(osd)) {
+ if (verbose)
+ ss << "osd." << osd << " is already in. ";
+ } else {
+ if (osdmap.osd_xinfo[osd].old_weight > 0) {
+ pending_inc.new_weight[osd] = osdmap.osd_xinfo[osd].old_weight;
+ if (pending_inc.new_xinfo.count(osd) == 0) {
+ pending_inc.new_xinfo[osd] = osdmap.osd_xinfo[osd];
+ }
+ pending_inc.new_xinfo[osd].old_weight = 0;
+ } else {
+ pending_inc.new_weight[osd] = CEPH_OSD_IN;
}
- pending_inc.new_xinfo[osd].old_weight = 0;
+ ss << "marked in osd." << osd << ". ";
+ any = true;
+ }
+ } else if (prefix == "osd rm") {
+ err = prepare_command_osd_remove(osd);
+
+ if (err == -EBUSY) {
+ if (any)
+ ss << ", ";
+ ss << "osd." << osd << " is still up; must be down before removal. ";
} else {
- pending_inc.new_weight[osd] = CEPH_OSD_IN;
+ assert(err == 0);
+ if (any) {
+ ss << ", osd." << osd;
+ } else {
+ ss << "removed osd." << osd;
+ }
+ any = true;
}
- ss << "marked in osd." << osd << ". ";
- any = true;
- }
- } else if (prefix == "osd rm") {
- err = prepare_command_osd_remove(osd);
+ }
+ }
+ }
+ if (any) {
+ getline(ss, rs);
+ wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs,
+ get_last_committed() + 1));
+ return true;
+ }
+ } else if (prefix == "osd add-noout" ||
+ prefix == "osd add-nodown") {
- if (err == -EBUSY) {
- if (any)
- ss << ", ";
- ss << "osd." << osd << " is still up; must be down before removal. ";
- } else {
- assert(err == 0);
- if (any) {
- ss << ", osd." << osd;
+ bool noout = prefix == "osd add-noout";
+ bool any = false;
+ bool stop = false;
+
+ vector<string> idvec;
+ cmd_getval(g_ceph_context, cmdmap, "ids", idvec);
+ for (unsigned j = 0; j < idvec.size() && !stop; j++) {
+
+ set<int> osds;
+
+ // wildcard?
+ if (j == 0 &&
+ (idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) {
+ osdmap.get_all_osds(osds);
+ stop = true;
+ } else {
+ // try traditional single osd way
+
+ long osd = parse_osd_id(idvec[j].c_str(), &ss);
+ if (osd < 0) {
+ // ss has reason for failure
+ ss << ", unable to parse osd id:\"" << idvec[j] << "\". ";
+ err = -EINVAL;
+ continue;
+ }
+
+ osds.insert(osd);
+ }
+
+ for (auto &osd : osds) {
+
+ if (!osdmap.exists(osd)) {
+ ss << "osd." << osd << " does not exist. ";
+ continue;
+ }
+
+ if (noout) {
+ if (osdmap.is_out(osd)) {
+ ss << "osd." << osd << " is already out. ";
+ continue;
+ }
+
+ if (osdmap.is_noout(osd)) { // already noout?
+ // continue to check if there is any pending "rm-noout" request
+ if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NOOUT)) {
+ // cancel it
+ pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NOOUT);
+ any = true;
+ }
+
+ continue;
+ }
+
+ pending_inc.pending_osd_state_set(osd, CEPH_OSD_NOOUT);
+ any = true;
+ } else {
+ // nodown
+
+ if (osdmap.is_down(osd)) {
+ ss << "osd." << osd << " is already down. ";
+ continue;
+ }
+
+ if (osdmap.is_nodown(osd)) { // already nodown?
+ // continue to check if there is any pending "rm-nodown" request
+ if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NODOWN)) {
+ // cancel it
+ pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NODOWN);
+ any = true;
+ }
+
+ continue;
+ }
+
+ pending_inc.pending_osd_state_set(osd, CEPH_OSD_NODOWN);
+ any = true;
+ }
+ }
+ }
+
+ if (any) {
+ getline(ss, rs);
+ wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs,
+ get_last_committed() + 1));
+ return true;
+ }
+ } else if (prefix == "osd rm-noout" ||
+ prefix == "osd rm-nodown") {
+
+ bool any = false;
+ bool stop = false;
+ bool noout = prefix == "osd rm-noout";
+
+ vector<string> idvec;
+ cmd_getval(g_ceph_context, cmdmap, "ids", idvec);
+
+ for (unsigned j = 0; j < idvec.size() && !stop; j++) {
+ vector<int> osds;
+
+ // wildcard?
+ if (j == 0 &&
+ (idvec[0] == "any" || idvec[0] == "all" || idvec[0] == "*")) {
+
+ // touch previous noout/nodown osds only
+ if (noout) {
+ osdmap.get_noout_osds(&osds);
+ } else {
+ osdmap.get_nodown_osds(&osds);
+ }
+
+ // cancel pending noout/nodown requests too,
+ // if there is any
+ vector<int> pending_state_osds;
+ (void) pending_inc.get_pending_state_osds(&pending_state_osds);
+ for (auto &p : pending_state_osds) {
+ if (noout) {
+ if (!osdmap.is_noout(p) &&
+ pending_inc.pending_osd_has_state(p, CEPH_OSD_NOOUT)) {
+ pending_inc.pending_osd_state_clear(p, CEPH_OSD_NOOUT);
+ any = true;
+ }
} else {
- ss << "removed osd." << osd;
+ if (!osdmap.is_nodown(p) &&
+ pending_inc.pending_osd_has_state(p, CEPH_OSD_NODOWN)) {
+ pending_inc.pending_osd_state_clear(p, CEPH_OSD_NODOWN);
+ any = true;
+ }
}
- any = true;
- }
+ }
+
+ stop = true;
+ } else {
+ // try traditional single osd way
+
+ long osd = parse_osd_id(idvec[j].c_str(), &ss);
+ if (osd < 0) {
+ // ss has reason for failure
+ ss << ", unable to parse osd id:\"" << idvec[j] << "\". ";
+ err = -EINVAL;
+ continue;
+ }
+
+ osds.push_back(osd);
+ }
+
+ for (auto &osd : osds) {
+
+ if (!osdmap.exists(osd)) {
+ ss << "osd." << osd << " does not exist. ";
+ continue;
+ }
+
+ if (noout) {
+ if (osdmap.is_noout(osd)) {
+ pending_inc.pending_osd_state_set(osd, CEPH_OSD_NOOUT);
+ any = true;
+ } else {
+ // noout flag is not set or has already been successfully cancelled
+ // continue to check pending_inc
+ if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NOOUT)) {
+ // cancel pending noout flag
+ pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NOOUT);
+ any = true;
+ }
+ }
+ } else {
+ // nodown
+ if (osdmap.is_nodown(osd)) {
+ pending_inc.pending_osd_state_set(osd, CEPH_OSD_NODOWN);
+ any = true;
+ } else {
+ // nodown flag is not set or has already been successfully cancelled
+ // continue to check pending_inc
+ if (pending_inc.pending_osd_has_state(osd, CEPH_OSD_NODOWN)) {
+ // cancel pending nodown flag
+ pending_inc.pending_osd_state_clear(osd, CEPH_OSD_NODOWN);
+ any = true;
+ }
+ }
+ }
}
}
+
if (any) {
getline(ss, rs);
wait_for_finished_proposal(op, new Monitor::C_Command(mon, op, err, rs,
- get_last_committed() + 1));
+ get_last_committed() + 1));
return true;
}
} else if (prefix == "osd pg-temp") {
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 7a712901a4c..c63bc6d04ea 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -1219,6 +1219,14 @@ void OSDMap::get_up_osds(set<int32_t>& ls) const
}
}
+void OSDMap::get_out_osds(set<int32_t>& ls) const
+{
+ for (int i = 0; i < max_osd; i++) {
+ if (is_out(i))
+ ls.insert(i);
+ }
+}
+
void OSDMap::calc_state_set(int state, set<string>& st)
{
unsigned t = state;
@@ -3895,3 +3903,8 @@ int OSDMap::calc_pg_upmaps(
ldout(cct, 10) << " end deviation " << end_deviation << dendl;
return num_changed;
}
+
+int OSDMap::get_osds_by_bucket_name(const string &name, set<int> *osds) const
+{
+ return crush->get_leaves(name, osds);
+}
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index 2b5d15790d3..5178132abbb 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -457,6 +457,31 @@ public:
/// propage update pools' snap metadata to any of their tiers
int propagate_snaps_to_tiers(CephContext *cct, const OSDMap &base);
+
+ /// filter out osds with any pending state changing
+ size_t get_pending_state_osds(vector<int> *osds) {
+ assert(osds);
+ osds->clear();
+
+ for (auto &p : new_state) {
+ osds->push_back(p.first);
+ }
+
+ return osds->size();
+ }
+
+ bool pending_osd_has_state(int osd, unsigned state) {
+ return new_state.count(osd) && (new_state[osd] & state) != 0;
+ }
+
+ void pending_osd_state_set(int osd, unsigned state) {
+ new_state[osd] |= state;
+ }
+
+ void pending_osd_state_clear(int osd, unsigned state) {
+ new_state[osd] &= ~state;
+ }
+
};
private:
@@ -634,6 +659,7 @@ public:
void get_all_osds(set<int32_t>& ls) const;
void get_up_osds(set<int32_t>& ls) const;
+ void get_out_osds(set<int32_t>& ls) const;
unsigned get_num_pg_temp() const {
return pg_temp->size();
}
@@ -745,6 +771,36 @@ public:
return !is_out(osd);
}
+ bool is_nodown(int osd) const {
+ return exists(osd) && (osd_state[osd] & CEPH_OSD_NODOWN);
+ }
+
+ bool is_noout(int osd) const {
+ return exists(osd) && (osd_state[osd] & CEPH_OSD_NOOUT);
+ }
+
+ void get_nodown_osds(vector<int> *osds) const {
+ assert(osds);
+ osds->clear();
+
+ for (int i = 0; i < max_osd; i++) {
+ if (is_nodown(i)) {
+ osds->push_back(i);
+ }
+ }
+ }
+
+ void get_noout_osds(vector<int> *osds) const {
+ assert(osds);
+ osds->clear();
+
+ for (int i = 0; i < max_osd; i++) {
+ if (is_noout(i)) {
+ osds->push_back(i);
+ }
+ }
+ }
+
/**
* check if an entire crush subtree is down
*/
@@ -1166,6 +1222,8 @@ public:
Incremental *pending_inc
);
+ int get_osds_by_bucket_name(const string &name, set<int> *osds) const;
+
/*
* handy helpers to build simple maps...
*/