diff options
author | Matan Breizman <mbreizma@redhat.com> | 2022-12-19 10:58:06 +0100 |
---|---|---|
committer | Matan Breizman <mbreizma@redhat.com> | 2022-12-20 14:57:22 +0100 |
commit | c6e4c174cb9d906537d78ff3980930439ace86da (patch) | |
tree | cd8506498dd4fcf21540c2042cceb78dd6c59321 /src | |
parent | Merge pull request #49416 from ljflores/wip-telemetry-label (diff) | |
download | ceph-c6e4c174cb9d906537d78ff3980930439ace86da.tar.xz ceph-c6e4c174cb9d906537d78ff3980930439ace86da.zip |
mon/OSDMointor: Simplify check_pg_num()
* See: https://tracker.ceph.com/issues/47062.
Originally check_pg_num did not take into account the root
osds by the crash rule.
This behavior resulted in an inaccurate pg num per osd count.
* Avoid summing all of the projecetd pg num and only later
on subtracting the pg num if the pool did exist.
* With this change, we only count the projected pg num which
are part the pools affected by the crush rule.
Same for osd number, instead of dividing the projected
pg number by all of the osdmap osds, divide only by
the osds used by the crush rule.
* Avoid differentiating between whether the mapping epoch
is later than the osdmap epoch or not. Always check the pg
num according to crush rule.
Signed-off-by: Matan Breizman <mbreizma@redhat.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/mon/OSDMonitor.cc | 97 | ||||
-rw-r--r-- | src/mon/OSDMonitor.h | 1 |
2 files changed, 54 insertions, 44 deletions
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index bb8a5488de8..22aa5d4b192 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -7880,65 +7880,74 @@ int OSDMonitor::get_crush_rule(const string &rule_name, return 0; } -int OSDMonitor::check_pg_num(int64_t pool, int pg_num, int size, int crush_rule, ostream *ss) +/* +* Get the number of 'in' osds according to the crush_rule, +*/ +uint32_t OSDMonitor::get_osd_num_by_crush(int crush_rule) +{ + set<int> out_osds; + set<int> crush_in_osds; + set<int> roots; + CrushWrapper newcrush = _get_pending_crush(); + newcrush.find_takes_by_rule(crush_rule, &roots); + for (auto root : roots) { + const char *rootname = newcrush.get_item_name(root); + set<int> crush_all_osds; + newcrush.get_leaves(rootname, &crush_all_osds); + std::set_difference(crush_all_osds.begin(), crush_all_osds.end(), + out_osds.begin(), out_osds.end(), + std::inserter(crush_in_osds, crush_in_osds.end())); + } + return crush_in_osds.size(); +} + +int OSDMonitor::check_pg_num(int64_t pool, + int pg_num, + int size, + int crush_rule, + ostream *ss) { auto max_pgs_per_osd = g_conf().get_val<uint64_t>("mon_max_pg_per_osd"); uint64_t projected = 0; - unsigned osd_num = 0; - // assume min cluster size 3 - auto num_osds = std::max(osdmap.get_num_in_osds(), 3u); + uint32_t osd_num_by_crush = 0; + set<int64_t> crush_pool_ids; if (pool < 0) { // a new pool projected += pg_num * size; } - if (mapping.get_epoch() >= osdmap.get_epoch()) { - set<int> roots; - CrushWrapper newcrush = _get_pending_crush(); - newcrush.find_takes_by_rule(crush_rule, &roots); - int max_osd = osdmap.get_max_osd(); - for (auto root : roots) { - const char *rootname = newcrush.get_item_name(root); - set<int> osd_ids; - newcrush.get_leaves(rootname, &osd_ids); - unsigned out_osd = 0; - for (auto id : osd_ids) { - if (id > max_osd) { - out_osd++; - continue; - } - projected += mapping.get_osd_acting_pgs(id).size(); - } - osd_num += osd_ids.size() - out_osd; - } - if (pool >= 0) { - // update an existing pool's pg num. - // already counted the pgs of this `pool` by iterating crush map, so - // remove them using adding the specified pg num - projected += pg_num * size; - projected -= mapping.get_num_acting_pgs(pool); - } - num_osds = std::max(osd_num, 3u); // assume min cluster size 3 - } else { - // use pg_num target for evaluating the projected pg num - for (const auto& [pool_id, pool_info] : osdmap.get_pools()) { + + osd_num_by_crush = get_osd_num_by_crush(crush_rule); + osdmap.get_pool_ids_by_rule(crush_rule, &crush_pool_ids); + + for (const auto& [pool_id, pool_info] : osdmap.get_pools()) { + // Check only for pools affected by crush rule + if (crush_pool_ids.contains(pool_id)) { if (pool_id == pool) { - projected += pg_num * size; + // Specified pool, use given pg_num and size values. + projected += pg_num * size; } else { - projected += pool_info.get_pg_num_target() * pool_info.get_size(); + // Use pg_num_target for evaluating the projected pg num + projected += pool_info.get_pg_num_target() * pool_info.get_size(); } } } - auto max_pgs = max_pgs_per_osd * num_osds; - auto projected_pgs_per_osd = projected / num_osds; - if (projected > max_pgs) { + // assume min cluster size 3 + osd_num_by_crush = std::max(osd_num_by_crush, 3u); + auto projected_pgs_per_osd = projected / osd_num_by_crush; + + if (projected_pgs_per_osd > max_pgs_per_osd) { if (pool >= 0) { *ss << "pool id " << pool; } - *ss << " pg_num " << pg_num << " size " << size - << " for this new pool would result in " << projected_pgs_per_osd - << " cumulative PGs per OSD (" << projected - << " total PG replicas on " << num_osds - << " 'in' OSDs) which exceeds the mon_max_pg_per_osd value of " << max_pgs_per_osd; + *ss << " pg_num " << pg_num + << " size " << size + << " for this pool would result in " + << projected_pgs_per_osd + << " cumulative PGs per OSD (" << projected + << " total PG replicas on " << osd_num_by_crush + << " 'in' root OSDs by crush rule) " + << "which exceeds the mon_max_pg_per_osd " + << "value of " << max_pgs_per_osd; return -ERANGE; } return 0; diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 3ced8be401b..ec5c73c317c 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -516,6 +516,7 @@ private: const std::string &erasure_code_profile, unsigned *stripe_width, std::ostream *ss); + uint32_t get_osd_num_by_crush(int crush_rule); int check_pg_num(int64_t pool, int pg_num, int size, int crush_rule, std::ostream* ss); int prepare_new_pool(std::string& name, int crush_rule, |