diff options
author | Josh Durgin <jdurgin@redhat.com> | 2017-04-20 02:56:28 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-04-20 02:56:28 +0200 |
commit | 535b2cff57a715c1d1b2ced79183812706c3a34d (patch) | |
tree | 0fc559b9c7552ae1955689b527563a6db9123098 /src/mon | |
parent | Merge pull request #14256 from dillaman/wip-19297 (diff) | |
parent | qa: use bluestore for (k)rbd ec overwrites tests (diff) | |
download | ceph-535b2cff57a715c1d1b2ced79183812706c3a34d.tar.xz ceph-535b2cff57a715c1d1b2ced79183812706c3a34d.zip |
Merge pull request #14496 from jdurgin/wip-ec-overwrites-cleanup
osd: make ec overwrites ready to use
Reviewed-by: Sage Weil <sage@redhat.com>
Diffstat (limited to 'src/mon')
-rw-r--r-- | src/mon/FSCommands.cc | 2 | ||||
-rw-r--r-- | src/mon/MonCommands.h | 2 | ||||
-rw-r--r-- | src/mon/OSDMonitor.cc | 124 | ||||
-rw-r--r-- | src/mon/OSDMonitor.h | 4 |
4 files changed, 109 insertions, 23 deletions
diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index cc2dfe04c0b..630736c3989 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -849,7 +849,7 @@ int FileSystemCommandHandler::_check_pool( << " is an erasure-coded pool. Use of erasure-coded pools" << " for CephFS metadata is not permitted"; return -EINVAL; - } else if (pool->is_erasure() && !pool->is_hacky_ecoverwrites()) { + } else if (pool->is_erasure() && !pool->allows_ecoverwrites()) { // non-overwriteable EC pools are only acceptable with a cache tier overlay if (!pool->has_tiers() || !pool->has_read_tier() || !pool->has_write_tier()) { *ss << "pool '" << pool_name << "' (id '" << pool_id << "')" diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index 3945e4b3480..9506814f7be 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -737,7 +737,7 @@ COMMAND("osd pool get " \ "get pool parameter <var>", "osd", "r", "cli,rest") COMMAND("osd pool set " \ "name=pool,type=CephPoolname " \ - "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_rule|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|debug_white_box_testing_ec_overwrites " \ + "name=var,type=CephChoices,strings=size|min_size|crash_replay_interval|pg_num|pgp_num|crush_rule|crush_ruleset|hashpspool|nodelete|nopgchange|nosizechange|write_fadvise_dontneed|noscrub|nodeep-scrub|hit_set_type|hit_set_period|hit_set_count|hit_set_fpp|use_gmt_hitset|debug_fake_ec_pool|target_max_bytes|target_max_objects|cache_target_dirty_ratio|cache_target_dirty_high_ratio|cache_target_full_ratio|cache_min_flush_age|cache_min_evict_age|auid|min_read_recency_for_promote|min_write_recency_for_promote|fast_read|hit_set_grade_decay_rate|hit_set_search_last_n|scrub_min_interval|scrub_max_interval|deep_scrub_interval|recovery_priority|recovery_op_priority|scrub_priority|compression_mode|compression_algorithm|compression_required_ratio|compression_max_blob_size|compression_min_blob_size|csum_type|csum_min_block|csum_max_block|allow_ec_overwrites " \ "name=val,type=CephString " \ "name=force,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \ "set pool parameter <var> to <val>", "osd", "rw", "cli,rest") diff --git a/src/mon/OSDMonitor.cc b/src/mon/OSDMonitor.cc index 7a5eddb1832..1b48c8edccb 100644 --- a/src/mon/OSDMonitor.cc +++ b/src/mon/OSDMonitor.cc @@ -1232,6 +1232,48 @@ int OSDMonitor::load_metadata(int osd, map<string, string>& m, ostream *err) return 0; } +int OSDMonitor::get_osd_objectstore_type(int osd, string *type) +{ + map<string, string> metadata; + int r = load_metadata(osd, metadata, nullptr); + if (r < 0) + return r; + + auto it = metadata.find("osd_objectstore"); + if (it == metadata.end()) + return -ENOENT; + *type = it->second; + return 0; +} + +bool OSDMonitor::is_pool_currently_all_bluestore(int64_t pool_id, + const pg_pool_t &pool, + ostream *err) +{ + // just check a few pgs for efficiency - this can't give a guarantee anyway, + // since filestore osds could always join the pool later + set<int> checked_osds; + for (unsigned ps = 0; ps < MIN(8, pool.get_pg_num()); ++ps) { + vector<int> up, acting; + pg_t pgid(ps, pool_id, -1); + osdmap.pg_to_up_acting_osds(pgid, up, acting); + for (int osd : up) { + if (checked_osds.find(osd) != checked_osds.end()) + continue; + string objectstore_type; + int r = get_osd_objectstore_type(osd, &objectstore_type); + // allow with missing metadata, e.g. due to an osd never booting yet + if (r < 0 || objectstore_type == "bluestore") { + checked_osds.insert(osd); + continue; + } + *err << "osd." << osd << " uses " << objectstore_type; + return false; + } + } + return true; +} + int OSDMonitor::dump_osd_metadata(int osd, Formatter *f, ostream *err) { map<string,string> m; @@ -4893,8 +4935,9 @@ void OSDMonitor::check_legacy_ec_plugin(const string& plugin, const string& prof } } -int OSDMonitor::normalize_profile(const string& profilename, - ErasureCodeProfile &profile, +int OSDMonitor::normalize_profile(const string& profilename, + ErasureCodeProfile &profile, + bool force, ostream *ss) { ErasureCodeInterfaceRef erasure_code; @@ -4904,10 +4947,39 @@ int OSDMonitor::normalize_profile(const string& profilename, int err = instance.factory(plugin->second, g_conf->get_val<std::string>("erasure_code_dir"), profile, &erasure_code, ss); - if (err) + if (err) { return err; + } - return erasure_code->init(profile, ss); + err = erasure_code->init(profile, ss); + if (err) { + return err; + } + + auto it = profile.find("stripe_unit"); + if (it != profile.end()) { + string err_str; + uint32_t stripe_unit = strict_si_cast<uint32_t>(it->second.c_str(), &err_str); + if (!err_str.empty()) { + *ss << "could not parse stripe_unit '" << it->second + << "': " << err_str << std::endl; + return -EINVAL; + } + uint32_t data_chunks = erasure_code->get_data_chunk_count(); + uint32_t chunk_size = erasure_code->get_chunk_size(stripe_unit * data_chunks); + if (chunk_size != stripe_unit) { + *ss << "stripe_unit " << stripe_unit << " does not match ec profile " + << "alignment. Would be padded to " << chunk_size + << std::endl; + return -EINVAL; + } + if ((stripe_unit % 4096) != 0 && !force) { + *ss << "stripe_unit should be a multiple of 4096 bytes for best performance." + << "use --force to override this check" << std::endl; + return -EINVAL; + } + } + return 0; } int OSDMonitor::crush_ruleset_create_erasure(const string &name, @@ -5130,12 +5202,22 @@ int OSDMonitor::prepare_pool_stripe_width(const unsigned pool_type, break; case pg_pool_t::TYPE_ERASURE: { + ErasureCodeProfile profile = + osdmap.get_erasure_code_profile(erasure_code_profile); ErasureCodeInterfaceRef erasure_code; err = get_erasure_code(erasure_code_profile, &erasure_code, ss); - uint32_t desired_stripe_width = g_conf->osd_pool_erasure_code_stripe_width; - if (err == 0) - *stripe_width = erasure_code->get_data_chunk_count() * - erasure_code->get_chunk_size(desired_stripe_width); + if (err) + break; + uint32_t data_chunks = erasure_code->get_data_chunk_count(); + uint32_t stripe_unit = g_conf->osd_pool_erasure_code_stripe_unit; + auto it = profile.find("stripe_unit"); + if (it != profile.end()) { + string err_str; + stripe_unit = strict_si_cast<uint32_t>(it->second.c_str(), &err_str); + assert(err_str.empty()); + } + *stripe_width = data_chunks * + erasure_code->get_chunk_size(stripe_unit * data_chunks); } break; default: @@ -5728,18 +5810,13 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap, ss << "expecting value 'true' or '1'"; return -EINVAL; } - } else if (var == "debug_white_box_testing_ec_overwrites") { + } else if (var == "allow_ec_overwrites") { + if (!p.is_erasure()) { + ss << "ec overwrites can only be enabled for an erasure coded pool"; + return -EINVAL; + } if (val == "true" || (interr.empty() && n == 1)) { - if (cct->check_experimental_feature_enabled( - "debug_white_box_testing_ec_overwrites")) { p.flags |= pg_pool_t::FLAG_EC_OVERWRITES; - } else { - ss << "debug_white_box_testing_ec_overwrites is an experimental feature " - << "and must be enabled. Note, this feature does not yet actually " - << "work. This flag merely enables some of the preliminary support " - << "for testing purposes."; - return -ENOTSUP; - } } else if (val == "false" || (interr.empty() && n == 0)) { ss << "ec overwrites cannot be disabled once enabled"; return -EINVAL; @@ -5747,6 +5824,11 @@ int OSDMonitor::prepare_command_pool_set(map<string,cmd_vartype> &cmdmap, ss << "expecting value 'true', 'false', '0', or '1'"; return -EINVAL; } + stringstream err; + if (!is_pool_currently_all_bluestore(pool, p, &err)) { + ss << "pool must only be stored on bluestore for scrubbing to work: " << err.str(); + return -EINVAL; + } } else if (var == "target_max_objects") { if (interr.length()) { ss << "error parsing int '" << val << "': " << interr; @@ -6831,14 +6913,14 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, if (err) goto reply; } - err = normalize_profile(name, profile_map, &ss); + err = normalize_profile(name, profile_map, force, &ss); if (err) goto reply; if (osdmap.has_erasure_code_profile(name)) { ErasureCodeProfile existing_profile_map = osdmap.get_erasure_code_profile(name); - err = normalize_profile(name, existing_profile_map, &ss); + err = normalize_profile(name, existing_profile_map, force, &ss); if (err) goto reply; @@ -6892,7 +6974,7 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op, &ss); if (err) goto reply; - err = normalize_profile(name, profile_map, &ss); + err = normalize_profile(name, profile_map, true, &ss); if (err) goto reply; dout(20) << "erasure code profile set " << profile << "=" diff --git a/src/mon/OSDMonitor.h b/src/mon/OSDMonitor.h index 32d08f78d3e..5144e89b6d1 100644 --- a/src/mon/OSDMonitor.h +++ b/src/mon/OSDMonitor.h @@ -297,6 +297,7 @@ private: const string& profile) const; int normalize_profile(const string& profilename, ErasureCodeProfile &profile, + bool force, ostream *ss); int crush_ruleset_create_erasure(const string &name, const string &profile, @@ -410,6 +411,9 @@ private: OpTracker op_tracker; int load_metadata(int osd, map<string, string>& m, ostream *err); + int get_osd_objectstore_type(int osd, std::string *type); + bool is_pool_currently_all_bluestore(int64_t pool_id, const pg_pool_t &pool, + ostream *err); // when we last received PG stats from each osd map<int,utime_t> last_osd_report; |