diff options
author | Yuri Weinstein <yweinste@redhat.com> | 2023-02-27 20:56:00 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-27 20:56:00 +0100 |
commit | 6c9011017b79e2807aea1b73cb5eed116a57071f (patch) | |
tree | 30f866eb35587c40e666eee4dbb3f9f1b127de1a /src/mon | |
parent | Merge pull request #48192 from amathuria/amathuria-edit-pg-deletion-cost (diff) | |
parent | osd/OSDMapping: Remove get_num_acting_pgs (diff) | |
download | ceph-6c9011017b79e2807aea1b73cb5eed116a57071f.tar.xz ceph-6c9011017b79e2807aea1b73cb5eed116a57071f.zip |
Merge pull request #49507 from Matan-B/wip-matanb-check_pg_num-edit
mon/OSDMontior: Simplify check_pg_num()
Reviewed-by: Nitzan Mordechai <nmordech@redhat.com>
Diffstat (limited to 'src/mon')
-rw-r--r-- | src/mon/OSDMonitor.cc | 106 | ||||
-rw-r--r-- | src/mon/OSDMonitor.h | 1 |
2 files changed, 60 insertions, 47 deletions
diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 08172badc69..e2462b75328 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -7905,65 +7905,74 @@ int OSDMonitor::get_crush_rule(const string &rule_name, return 0; } -int OSDMonitor::check_pg_num(int64_t pool, int pg_num, int size, int crush_rule, ostream *ss) +/* +* Get the number of 'in' osds according to the crush_rule, +*/ +uint32_t OSDMonitor::get_osd_num_by_crush(int crush_rule) +{ + set<int> out_osds; + set<int> crush_in_osds; + set<int> roots; + CrushWrapper newcrush = _get_pending_crush(); + newcrush.find_takes_by_rule(crush_rule, &roots); + for (auto root : roots) { + const char *rootname = newcrush.get_item_name(root); + set<int> crush_all_osds; + newcrush.get_leaves(rootname, &crush_all_osds); + std::set_difference(crush_all_osds.begin(), crush_all_osds.end(), + out_osds.begin(), out_osds.end(), + std::inserter(crush_in_osds, crush_in_osds.end())); + } + return crush_in_osds.size(); +} + +int OSDMonitor::check_pg_num(int64_t pool, + int pg_num, + int size, + int crush_rule, + ostream *ss) { auto max_pgs_per_osd = g_conf().get_val<uint64_t>("mon_max_pg_per_osd"); uint64_t projected = 0; - unsigned osd_num = 0; - // assume min cluster size 3 - auto num_osds = std::max(osdmap.get_num_in_osds(), 3u); + uint32_t osd_num_by_crush = 0; + set<int64_t> crush_pool_ids; if (pool < 0) { // a new pool projected += pg_num * size; } - if (mapping.get_epoch() >= osdmap.get_epoch()) { - set<int> roots; - CrushWrapper newcrush = _get_pending_crush(); - newcrush.find_takes_by_rule(crush_rule, &roots); - int max_osd = osdmap.get_max_osd(); - for (auto root : roots) { - const char *rootname = newcrush.get_item_name(root); - set<int> osd_ids; - newcrush.get_leaves(rootname, &osd_ids); - unsigned out_osd = 0; - for (auto id : osd_ids) { - if (id > max_osd) { - out_osd++; - continue; - } - projected += mapping.get_osd_acting_pgs(id).size(); - } - osd_num += osd_ids.size() - out_osd; - } - if (pool >= 0) { - // update an existing pool's pg num. - // already counted the pgs of this `pool` by iterating crush map, so - // remove them using adding the specified pg num - projected += pg_num * size; - projected -= mapping.get_num_acting_pgs(pool); - } - num_osds = std::max(osd_num, 3u); // assume min cluster size 3 - } else { - // use pg_num target for evaluating the projected pg num - for (const auto& [pool_id, pool_info] : osdmap.get_pools()) { + + osd_num_by_crush = get_osd_num_by_crush(crush_rule); + osdmap.get_pool_ids_by_rule(crush_rule, &crush_pool_ids); + + for (const auto& [pool_id, pool_info] : osdmap.get_pools()) { + // Check only for pools affected by crush rule + if (crush_pool_ids.contains(pool_id)) { if (pool_id == pool) { - projected += pg_num * size; + // Specified pool, use given pg_num and size values. + projected += pg_num * size; } else { - projected += pool_info.get_pg_num_target() * pool_info.get_size(); + // Use pg_num_target for evaluating the projected pg num + projected += pool_info.get_pg_num_target() * pool_info.get_size(); } } } - auto max_pgs = max_pgs_per_osd * num_osds; - auto projected_pgs_per_osd = projected / num_osds; - if (projected > max_pgs) { + // assume min cluster size 3 + osd_num_by_crush = std::max(osd_num_by_crush, 3u); + auto projected_pgs_per_osd = projected / osd_num_by_crush; + + if (projected_pgs_per_osd > max_pgs_per_osd) { if (pool >= 0) { *ss << "pool id " << pool; } - *ss << " pg_num " << pg_num << " size " << size - << " for this new pool would result in " << projected_pgs_per_osd - << " cumulative PGs per OSD (" << projected - << " total PG replicas on " << num_osds - << " 'in' OSDs) which exceeds the mon_max_pg_per_osd value of " << max_pgs_per_osd; + *ss << " pg_num " << pg_num + << " size " << size + << " for this pool would result in " + << projected_pgs_per_osd + << " cumulative PGs per OSD (" << projected + << " total PG replicas on " << osd_num_by_crush + << " 'in' root OSDs by crush rule) " + << "which exceeds the mon_max_pg_per_osd " + << "value of " << max_pgs_per_osd; return -ERANGE; } return 0; @@ -8383,9 +8392,12 @@ int OSDMonitor::prepare_command_pool_set(const cmdmap_t& cmdmap, ss << "crush rule " << p.get_crush_rule() << " type does not match pool"; return -EINVAL; } - int r = check_pg_num(pool, p.get_pg_num(), n, p.get_crush_rule(), &ss); - if (r < 0) { - return r; + if (n > p.size) { + // only when increasing pool size + int r = check_pg_num(pool, p.get_pg_num(), n, p.get_crush_rule(), &ss); + if (r < 0) { + return r; + } } p.size = n; p.min_size = g_conf().get_osd_pool_default_min_size(p.size); diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index eac85707576..16880f07749 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -516,6 +516,7 @@ private: const std::string &erasure_code_profile, unsigned *stripe_width, std::ostream *ss); + uint32_t get_osd_num_by_crush(int crush_rule); int check_pg_num(int64_t pool, int pg_num, int size, int crush_rule, std::ostream* ss); int prepare_new_pool(std::string& name, int crush_rule, |