diff options
author | Sage Weil <sage@redhat.com> | 2018-09-19 23:37:59 +0200 |
---|---|---|
committer | Sage Weil <sage@redhat.com> | 2018-10-16 14:22:48 +0200 |
commit | 1617473f10f1b523aaa881b9b2b192c7d419fd2b (patch) | |
tree | fd4fc72603eef912782cea6b1f6f31696abd8185 | |
parent | mgr/DaemonServer: block pg_num decrease(merge) until pgp_num is reduced (diff) | |
download | ceph-1617473f10f1b523aaa881b9b2b192c7d419fd2b.tar.xz ceph-1617473f10f1b523aaa881b9b2b192c7d419fd2b.zip |
mgr/DaemonServer: throttle pgp_num changes based on misplaced %
- block pgp_num increase if pg_num hasn't increased yet
- make no changes if there are inactive or unknown pgs
- make no changes if there are degraded pgs either. this might be a bit
conservative...
- calculate the magnitude of our adjusted based on the max_misplaced
target. this assumes a uniform distribution of objects across pgs,
so not perfectly accurate, but hopefully close enough.
Signed-off-by: Sage Weil <sage@redhat.com>
-rw-r--r-- | src/mgr/DaemonServer.cc | 73 |
1 files changed, 68 insertions, 5 deletions
diff --git a/src/mgr/DaemonServer.cc b/src/mgr/DaemonServer.cc index 0d9eefd982f..b43077bca31 100644 --- a/src/mgr/DaemonServer.cc +++ b/src/mgr/DaemonServer.cc @@ -2224,6 +2224,7 @@ void DaemonServer::adjust_pgs() { dout(20) << dendl; unsigned max = std::max<int64_t>(1, g_conf()->mon_osd_max_creating_pgs); + double max_misplaced = g_conf().get_val<double>("target_max_misplaced_ratio"); map<string,unsigned> pg_num_to_set; map<string,unsigned> pgp_num_to_set; @@ -2244,6 +2245,21 @@ void DaemonServer::adjust_pgs() << " max_creating " << max << " left " << left << dendl; + + // FIXME: These checks are fundamentally racy given that adjust_pgs() + // can run more frequently than we get updated pg stats from OSDs. We + // may make multiple adjustments with stale informaiton. + double misplaced_ratio, degraded_ratio; + double inactive_pgs_ratio, unknown_pgs_ratio; + pg_map.get_recovery_stats(&misplaced_ratio, °raded_ratio, + &inactive_pgs_ratio, &unknown_pgs_ratio); + dout(20) << "misplaced_ratio " << misplaced_ratio + << " degraded_ratio " << degraded_ratio + << " inactive_pgs_ratio " << inactive_pgs_ratio + << " unknown_pgs_ratio " << unknown_pgs_ratio + << "; target_max_misplaced_ratio " << max_misplaced + << dendl; + cluster_state.with_osdmap([&](const OSDMap& osdmap) { if (pg_map.last_osdmap_epoch != osdmap.get_epoch()) { // do nothing if maps aren't in sync @@ -2377,13 +2393,60 @@ void DaemonServer::adjust_pgs() unsigned target = std::min(p.get_pg_num_pending(), p.get_pgp_num_target()); if (target != p.get_pgp_num()) { - // FIXME: we should throttle this to limit mispalced objects, like - // we do in the balancer module. - dout(10) << "pool " << i.first - << " pgp target " << p.get_pgp_num_target() + dout(20) << "pool " << i.first + << " pgp_num_target " << p.get_pgp_num_target() << " pgp_num " << p.get_pgp_num() << " -> " << target << dendl; - pgp_num_to_set[osdmap.get_pool_name(i.first)] = target; + if (target > p.get_pgp_num() && + p.get_pgp_num() == p.get_pg_num()) { + dout(10) << "pool " << i.first + << " pgp_num_target " << p.get_pgp_num_target() + << " pgp_num " << p.get_pgp_num() + << " - increase blocked by pg_num " << p.get_pg_num() + << dendl; + } else if (inactive_pgs_ratio > 0 || + degraded_ratio > 0 || + unknown_pgs_ratio > 0) { + dout(10) << "pool " << i.first + << " pgp_num_target " << p.get_pgp_num_target() + << " pgp_num " << p.get_pgp_num() + << " - inactive|degraded|unknown pgs, deferring pgp_num" + << " update" << dendl; + } else if (misplaced_ratio > max_misplaced) { + dout(10) << "pool " << i.first + << " pgp_num_target " << p.get_pgp_num_target() + << " pgp_num " << p.get_pgp_num() + << " - misplaced_ratio " << misplaced_ratio + << " > max " << max_misplaced + << ", deferring pgp_num update" << dendl; + } else { + // NOTE: this calculation assumes objects are + // basically uniformly distributed across all PGs + // (regardless of pool), which is probably not + // perfectly correct, but it's a start. make no + // single adjustment that's more than half of the + // max_misplaced, to somewhat limit the magnitude of + // our potential error here. + double room = + std::min<double>(max_misplaced - misplaced_ratio, + misplaced_ratio / 2.0); + unsigned estmax = std::max<unsigned>( + (double)p.get_pg_num() * room, 1u); + int delta = target - p.get_pgp_num(); + int next = p.get_pgp_num(); + if (delta < 0) { + next += std::max<int>(-estmax, delta); + } else { + next += std::min<int>(estmax, delta); + } + dout(20) << " room " << room << " estmax " << estmax + << " delta " << delta << " next " << next << dendl; + dout(10) << "pool " << i.first + << " pgp_num_target " << p.get_pgp_num_target() + << " pgp_num " << p.get_pgp_num() + << " -> " << next << dendl; + pgp_num_to_set[osdmap.get_pool_name(i.first)] = next; + } } if (left == 0) { return; |