summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorxie xingguo <xie.xingguo@zte.com.cn>2019-03-05 07:28:59 +0100
committerxie xingguo <xie.xingguo@zte.com.cn>2019-03-06 04:10:56 +0100
commitaf02d1031d58826464b5defc40026be2d7419aa8 (patch)
tree7059c7f4d9226bd3e13bed5f663a8f8f70dc3cc3
parentmon/OSDMonitor: add 'osd crush get-device-class' command (diff)
downloadceph-af02d1031d58826464b5defc40026be2d7419aa8.tar.xz
ceph-af02d1031d58826464b5defc40026be2d7419aa8.zip
mgr: 'osd df' by specified class or (crush) name
For large clusters, we use device classes to isolate storage pools. The existing 'osd df' output turns out to be too nosiy, say, if you care about only single storage pool with osds possibly spanning over all hosts. With this change you are now being able to do 'osd df' by class (or by pool, if you simply use classes to separate different pools), or by a specified crush bucket name you are currently interested in, which is much more convenient. Some examples: ``` $ bin/ceph osd df tree ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME -1 0.05878 - 60 GiB 6.4 GiB 23 MiB 0 B 6 GiB 54 GiB 10.60 1.00 - root default -3 0.02939 - 30 GiB 3.2 GiB 12 MiB 0 B 3 GiB 27 GiB 10.60 1.00 - host ceph11 3 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 56 up osd.3 4 bbb 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 58 up osd.4 5 ccc 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 60 up osd.5 -5 0.02939 - 30 GiB 3.2 GiB 12 MiB 0 B 3 GiB 27 GiB 10.60 1.00 - host ceph12 0 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 50 up osd.0 1 bbb 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 61 up osd.1 2 ccc 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 51 up osd.2 TOTAL 60 GiB 6.4 GiB 23 MiB 0 B 6 GiB 54 GiB 10.60 MIN/MAX VAR: 1.00/1.00 STDDEV: 0 $ bin/ceph osd df tree class aaa ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME -1 0.05878 - 20 GiB 2.1 GiB 7.8 MiB 0 B 2 GiB 18 GiB 10.60 1.00 - root default -3 0.02939 - 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 - host ceph11 3 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 56 up osd.3 -5 0.02939 - 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 - host ceph12 0 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 50 up osd.0 TOTAL 20 GiB 2.1 GiB 7.8 MiB 0 B 2 GiB 18 GiB 10.60 MIN/MAX VAR: 1.00/1.00 STDDEV: 0 $ bin/ceph osd df tree name ceph11 ID CLASS WEIGHT REWEIGHT SIZE RAW USE DATA OMAP META AVAIL %USE VAR PGS STATUS TYPE NAME -3 0.02939 - 30 GiB 3.2 GiB 12 MiB 0 B 3 GiB 27 GiB 10.60 1.00 - host ceph11 3 aaa 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 56 up osd.3 4 bbb 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 58 up osd.4 5 ccc 0.00980 1.00000 10 GiB 1.1 GiB 3.9 MiB 0 B 1 GiB 9.0 GiB 10.60 1.00 60 up osd.5 TOTAL 30 GiB 3.2 GiB 12 MiB 0 B 3 GiB 27 GiB 10.60 MIN/MAX VAR: 1.00/1.00 STDDEV: 0 ``` Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
-rwxr-xr-xqa/workunits/cephtool/test.sh9
-rw-r--r--src/crush/CrushWrapper.cc24
-rw-r--r--src/crush/CrushWrapper.h7
-rw-r--r--src/mgr/DaemonServer.cc31
-rw-r--r--src/mgr/MgrCommands.h4
-rw-r--r--src/mon/PGMap.h12
-rw-r--r--src/osd/OSDMap.cc94
-rw-r--r--src/osd/OSDMap.h10
8 files changed, 161 insertions, 30 deletions
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
index e20d61d34e9..b304a13adaa 100755
--- a/qa/workunits/cephtool/test.sh
+++ b/qa/workunits/cephtool/test.sh
@@ -718,6 +718,15 @@ function test_mon_misc()
ceph --concise osd dump | grep '^epoch'
ceph osd df | grep 'MIN/MAX VAR'
+ osd_class=$(ceph osd crush get-device-class 0)
+ ceph osd df tree class $osd_class | grep 'osd.0'
+ ceph osd crush rm-device-class 0
+ # create class first in case old device class may
+ # have already been automatically destroyed
+ ceph osd crush class create $osd_class
+ ceph osd df tree class $osd_class | expect_false grep 'osd.0'
+ ceph osd crush set-device-class $osd_class 0
+ ceph osd df tree name osd.0 | grep 'osd.0'
# df
ceph df > $TMPFILE
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index 9ec0557dc19..966ae59d62f 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -858,6 +858,30 @@ int CrushWrapper::get_children(int id, list<int> *children) const
return b->size;
}
+int CrushWrapper::get_all_children(int id, set<int> *children) const
+{
+ // leaf?
+ if (id >= 0) {
+ return 0;
+ }
+
+ auto *b = get_bucket(id);
+ if (IS_ERR(b)) {
+ return -ENOENT;
+ }
+
+ int c = 0;
+ for (unsigned n = 0; n < b->size; n++) {
+ children->insert(b->items[n]);
+ c++;
+ auto r = get_all_children(b->items[n], children);
+ if (r < 0)
+ return r;
+ c += r;
+ }
+ return c;
+}
+
void CrushWrapper::get_children_of_type(int id,
int type,
vector<int> *children,
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index b5973d71634..9e070419b97 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -735,6 +735,13 @@ public:
* @return number of items, or error
*/
int get_children(int id, list<int> *children) const;
+ /**
+ * enumerate all children of given node
+ *
+ * @param id parent bucket or device id
+ * @return number of items, or error
+ */
+ int get_all_children(int id, set<int> *children) const;
void get_children_of_type(int id,
int type,
vector<int> *children,
diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc
index 534de149ade..7f5205c8d83 100644
--- a/src/mgr/DaemonServer.cc
+++ b/src/mgr/DaemonServer.cc
@@ -1250,14 +1250,41 @@ bool DaemonServer::_handle_command(
} else if (prefix == "osd df") {
string method;
cmd_getval(g_ceph_context, cmdctx->cmdmap, "output_method", method);
+ string filter_by;
+ string filter;
+ cmd_getval(g_ceph_context, cmdctx->cmdmap, "filter_by", filter_by);
+ cmd_getval(g_ceph_context, cmdctx->cmdmap, "filter", filter);
+ if (filter_by.empty() != filter.empty()) {
+ cmdctx->reply(-EINVAL, "you must specify both 'filter_by' and 'filter'");
+ return true;
+ }
+ stringstream rs;
r = cluster_state.with_osdmap_and_pgmap([&](const OSDMap& osdmap, const PGMap& pgmap) {
+ string class_name;
+ string item_name;
+ // sanity check filter(s)
+ if (filter_by == "class") {
+ if (!osdmap.crush->class_exists(filter)) {
+ rs << "specified class '" << filter << "' does not exist";
+ return -EINVAL;
+ }
+ class_name = filter;
+ }
+ if (filter_by == "name") {
+ if (!osdmap.crush->name_exists(filter)) {
+ rs << "specified name '" << filter << "' does not exist";
+ return -EINVAL;
+ }
+ item_name = filter;
+ }
print_osd_utilization(osdmap, pgmap, ss,
- f.get(), method == "tree");
+ f.get(), method == "tree",
+ class_name, item_name);
cmdctx->odata.append(ss);
return 0;
});
- cmdctx->reply(r, "");
+ cmdctx->reply(r, rs);
return true;
} else if (prefix == "osd pool stats") {
string pool_name;
diff --git a/src/mgr/MgrCommands.h b/src/mgr/MgrCommands.h
index b5dcab56555..4116318b995 100644
--- a/src/mgr/MgrCommands.h
+++ b/src/mgr/MgrCommands.h
@@ -68,7 +68,9 @@ COMMAND("osd perf", \
"osd", \
"r")
COMMAND("osd df " \
- "name=output_method,type=CephChoices,strings=plain|tree,req=false", \
+ "name=output_method,type=CephChoices,strings=plain|tree,req=false " \
+ "name=filter_by,type=CephChoices,strings=class|name,req=false " \
+ "name=filter,type=CephString,req=false", \
"show OSD utilization", "osd", "r")
COMMAND("osd blocked-by", \
"print histogram of which OSDs are blocking their peers", \
diff --git a/src/mon/PGMap.h b/src/mon/PGMap.h
index d767a35328e..fdc7cb87f65 100644
--- a/src/mon/PGMap.h
+++ b/src/mon/PGMap.h
@@ -387,8 +387,16 @@ public:
return pool_stat_t();
}
- const osd_stat_t& get_osd_sum() const {
- return osd_sum;
+ osd_stat_t get_osd_sum(const set<int>& osds) const {
+ if (osds.empty()) // all
+ return osd_sum;
+ osd_stat_t sum;
+ for (auto i : osds) {
+ auto os = get_osd_stat(i);
+ if (os)
+ sum.add(*os);
+ }
+ return sum;
}
const osd_stat_t *get_osd_stat(int osd) const {
diff --git a/src/osd/OSDMap.cc b/src/osd/OSDMap.cc
index 1a148b47a77..99d8b63e377 100644
--- a/src/osd/OSDMap.cc
+++ b/src/osd/OSDMap.cc
@@ -4815,19 +4815,51 @@ public:
typedef CrushTreeDumper::Dumper<F> Parent;
OSDUtilizationDumper(const CrushWrapper *crush, const OSDMap *osdmap_,
- const PGMap& pgmap_, bool tree_) :
+ const PGMap& pgmap_, bool tree_,
+ const string& class_name_,
+ const string& item_name_) :
Parent(crush, osdmap_->get_pool_names()),
osdmap(osdmap_),
pgmap(pgmap_),
tree(tree_),
- average_util(average_utilization()),
+ class_name(class_name_),
+ item_name(item_name_),
min_var(-1),
max_var(-1),
stddev(0),
sum(0) {
+ if (osdmap->crush->name_exists(item_name)) {
+ // filter out items we are allowed to dump
+ auto item_id = osdmap->crush->get_item_id(item_name);
+ allowed.insert(item_id);
+ osdmap->crush->get_all_children(item_id, &allowed);
+ }
+ average_util = average_utilization();
}
protected:
+
+ bool should_dump(int id) const {
+ if (!allowed.empty() && !allowed.count(id)) // filter by name
+ return false;
+ if (id >= 0 && !class_name.empty()) {
+ const char* item_class_name = osdmap->crush->get_item_class(id);
+ if (!item_class_name || // not bound to a class yet
+ item_class_name != class_name) // or already bound to
+ // a different class
+ return false;
+ }
+ return true;
+ }
+
+ set<int> get_dumped_osds() {
+ if (class_name.empty() && item_name.empty()) {
+ // old way, all
+ return {};
+ }
+ return dumped_osds;
+ }
+
void dump_stray(F *f) {
for (int i = 0; i < osdmap->get_max_osd(); i++) {
if (osdmap->exists(i) && !this->is_touched(i))
@@ -4838,7 +4870,11 @@ protected:
void dump_item(const CrushTreeDumper::Item &qi, F *f) override {
if (!tree && qi.is_bucket())
return;
+ if (!should_dump(qi.id))
+ return;
+ if (!qi.is_bucket())
+ dumped_osds.insert(qi.id);
float reweight = qi.is_bucket() ? -1 : osdmap->get_weightf(qi.id);
int64_t kb = 0, kb_used = 0, kb_used_data = 0, kb_used_omap = 0,
kb_used_meta = 0, kb_avail = 0;
@@ -4891,7 +4927,9 @@ protected:
double average_utilization() {
int64_t kb = 0, kb_used = 0;
for (int i = 0; i < osdmap->get_max_osd(); i++) {
- if (!osdmap->exists(i) || osdmap->get_weight(i) == 0)
+ if (!osdmap->exists(i) ||
+ osdmap->get_weight(i) == 0 ||
+ !should_dump(i))
continue;
int64_t kb_i, kb_used_i, kb_used_data_i, kb_used_omap_i, kb_used_meta_i,
kb_avail_i;
@@ -4927,7 +4965,7 @@ protected:
int64_t* kb_used_meta,
int64_t* kb_avail) const {
if (id >= 0) {
- if (osdmap->is_out(id)) {
+ if (osdmap->is_out(id) || !should_dump(id)) {
*kb = 0;
*kb_used = 0;
*kb_used_data = 0;
@@ -4969,11 +5007,15 @@ protected:
const OSDMap *osdmap;
const PGMap& pgmap;
bool tree;
+ const string class_name;
+ const string item_name;
double average_util;
double min_var;
double max_var;
double stddev;
double sum;
+ set<int> allowed;
+ set<int> dumped_osds;
};
@@ -4982,8 +5024,10 @@ public:
typedef OSDUtilizationDumper<TextTable> Parent;
OSDUtilizationPlainDumper(const CrushWrapper *crush, const OSDMap *osdmap,
- const PGMap& pgmap, bool tree) :
- Parent(crush, osdmap, pgmap, tree) {}
+ const PGMap& pgmap, bool tree,
+ const string& class_name,
+ const string& item_name) :
+ Parent(crush, osdmap, pgmap, tree, class_name, item_name) {}
void dump(TextTable *tbl) {
tbl->define_column("ID", TextTable::LEFT, TextTable::RIGHT);
@@ -5007,15 +5051,16 @@ public:
dump_stray(tbl);
+ auto sum = pgmap.get_osd_sum(get_dumped_osds());
*tbl << ""
<< ""
<< "" << "TOTAL"
- << byte_u_t(pgmap.get_osd_sum().statfs.total)
- << byte_u_t(pgmap.get_osd_sum().statfs.get_used_raw())
- << byte_u_t(pgmap.get_osd_sum().statfs.allocated)
- << byte_u_t(pgmap.get_osd_sum().statfs.omap_allocated)
- << byte_u_t(pgmap.get_osd_sum().statfs.internal_metadata)
- << byte_u_t(pgmap.get_osd_sum().statfs.available)
+ << byte_u_t(sum.statfs.total)
+ << byte_u_t(sum.statfs.get_used_raw())
+ << byte_u_t(sum.statfs.allocated)
+ << byte_u_t(sum.statfs.omap_allocated)
+ << byte_u_t(sum.statfs.internal_metadata)
+ << byte_u_t(sum.statfs.available)
<< lowprecision_t(average_util)
<< ""
<< TextTable::endrow;
@@ -5116,8 +5161,10 @@ public:
typedef OSDUtilizationDumper<Formatter> Parent;
OSDUtilizationFormatDumper(const CrushWrapper *crush, const OSDMap *osdmap,
- const PGMap& pgmap, bool tree) :
- Parent(crush, osdmap, pgmap, tree) {}
+ const PGMap& pgmap, bool tree,
+ const string& class_name,
+ const string& item_name) :
+ Parent(crush, osdmap, pgmap, tree, class_name, item_name) {}
void dump(Formatter *f) {
f->open_array_section("nodes");
@@ -5171,7 +5218,8 @@ protected:
public:
void summary(Formatter *f) {
f->open_object_section("summary");
- auto& s = pgmap.get_osd_sum().statfs;
+ auto sum = pgmap.get_osd_sum(get_dumped_osds());
+ auto& s = sum.statfs;
f->dump_int("total_kb", s.kb());
f->dump_int("total_kb_used", s.kb_used_raw());
@@ -5188,21 +5236,25 @@ public:
};
void print_osd_utilization(const OSDMap& osdmap,
- const PGMap& pgmap,
- ostream& out,
- Formatter *f,
- bool tree)
+ const PGMap& pgmap,
+ ostream& out,
+ Formatter *f,
+ bool tree,
+ const string& class_name,
+ const string& item_name)
{
const CrushWrapper *crush = osdmap.crush.get();
if (f) {
f->open_object_section("df");
- OSDUtilizationFormatDumper d(crush, &osdmap, pgmap, tree);
+ OSDUtilizationFormatDumper d(crush, &osdmap, pgmap, tree,
+ class_name, item_name);
d.dump(f);
d.summary(f);
f->close_section();
f->flush(out);
} else {
- OSDUtilizationPlainDumper d(crush, &osdmap, pgmap, tree);
+ OSDUtilizationPlainDumper d(crush, &osdmap, pgmap, tree,
+ class_name, item_name);
TextTable tbl;
d.dump(&tbl);
out << tbl << d.summary() << "\n";
diff --git a/src/osd/OSDMap.h b/src/osd/OSDMap.h
index 32118249839..a477d5827e4 100644
--- a/src/osd/OSDMap.h
+++ b/src/osd/OSDMap.h
@@ -1493,9 +1493,11 @@ inline ostream& operator<<(ostream& out, const OSDMap& m) {
class PGMap;
void print_osd_utilization(const OSDMap& osdmap,
- const PGMap& pgmap,
- ostream& out,
- Formatter *f,
- bool tree);
+ const PGMap& pgmap,
+ ostream& out,
+ Formatter *f,
+ bool tree,
+ const string& class_name,
+ const string& item_name);
#endif