summaryrefslogtreecommitdiffstats
path: root/src/osd/PrimaryLogPG.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/osd/PrimaryLogPG.cc')
-rw-r--r--src/osd/PrimaryLogPG.cc132
1 files changed, 87 insertions, 45 deletions
diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc
index 68c3a203bf9..3324ba9dc91 100644
--- a/src/osd/PrimaryLogPG.cc
+++ b/src/osd/PrimaryLogPG.cc
@@ -543,6 +543,11 @@ void PrimaryLogPG::schedule_recovery_work(
recovery_state.get_recovery_op_priority());
}
+common::intrusive_timer &PrimaryLogPG::get_pg_timer()
+{
+ return osd->pg_timer;
+}
+
void PrimaryLogPG::replica_clear_repop_obc(
const vector<pg_log_entry_t> &logv,
ObjectStore::Transaction &t)
@@ -1055,7 +1060,7 @@ void PrimaryLogPG::do_command(
f->close_section();
if (is_primary() && is_active() && m_scrubber) {
- m_scrubber->dump_scrubber(f.get(), m_planned_scrub);
+ m_scrubber->dump_scrubber(f.get());
}
f->open_object_section("agent_state");
@@ -1185,7 +1190,7 @@ void PrimaryLogPG::do_command(
if (is_primary()) {
scrub_level_t deep = (prefix == "deep-scrub") ? scrub_level_t::deep
: scrub_level_t::shallow;
- m_scrubber->on_operator_forced_scrub(f.get(), deep, m_planned_scrub);
+ m_scrubber->on_operator_forced_scrub(f.get(), deep);
} else {
ss << "Not primary";
ret = -EPERM;
@@ -2053,6 +2058,10 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
}
}
+ if (!is_primary()) {
+ osd->logger->inc(l_osd_replica_read);
+ }
+
if (!check_laggy(op)) {
return;
}
@@ -2183,6 +2192,7 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
// missing object?
if (is_unreadable_object(head)) {
if (!is_primary()) {
+ osd->logger->inc(l_osd_replica_read_redirect_missing);
osd->reply_op_error(op, -EAGAIN);
return;
}
@@ -2276,6 +2286,16 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
}
}
+ if (cct->_conf->bluestore_debug_inject_read_err &&
+ op->may_write() &&
+ pool.info.is_erasure() &&
+ ec_inject_test_write_error0(m->get_hobj(), m->get_reqid())) {
+ // Fail retried write with error
+ dout(0) << __func__ << " Error inject - Fail retried write with EINVAL" << dendl;
+ osd->reply_op_error(op, -EINVAL);
+ return;
+ }
+
ObjectContextRef obc;
bool can_create = op->may_write();
hobject_t missing_oid;
@@ -2314,11 +2334,13 @@ void PrimaryLogPG::do_op(OpRequestRef& op)
dout(20) << __func__
<< ": unstable write on replica, bouncing to primary "
<< *m << dendl;
+ osd->logger->inc(l_osd_replica_read_redirect_conflict);
osd->reply_op_error(op, -EAGAIN);
return;
}
dout(20) << __func__ << ": serving replica read on oid " << oid
<< dendl;
+ osd->logger->inc(l_osd_replica_read_served);
}
int r = find_object_context(
@@ -5786,10 +5808,19 @@ int PrimaryLogPG::do_extent_cmp(OpContext *ctx, OSDOp& osd_op)
int PrimaryLogPG::finish_extent_cmp(OSDOp& osd_op, const bufferlist &read_bl)
{
- for (uint64_t idx = 0; idx < osd_op.indata.length(); ++idx) {
- char read_byte = (idx < read_bl.length() ? read_bl[idx] : 0);
- if (osd_op.indata[idx] != read_byte) {
- return (-MAX_ERRNO - idx);
+ auto input_iter = osd_op.indata.begin();
+ auto read_iter = read_bl.begin();
+ uint64_t idx = 0;
+
+ while (input_iter != osd_op.indata.end()) {
+ char read_byte = (read_iter != read_bl.end() ? *read_iter : 0);
+ if (*input_iter != read_byte) {
+ return (-MAX_ERRNO - idx);
+ }
+ ++idx;
+ ++input_iter;
+ if (read_iter != read_bl.end()) {
+ ++read_iter;
}
}
@@ -5994,7 +6025,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
object_info_t& oi = obs.oi;
const hobject_t& soid = oi.soid;
const bool skip_data_digest = osd->store->has_builtin_csum() &&
- osd->osd_skip_data_digest;
+ *osd->osd_skip_data_digest;
PGTransaction* t = ctx->op_t.get();
@@ -6057,9 +6088,9 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
// munge ZERO -> TRUNCATE? (don't munge to DELETE or we risk hosing attributes)
if (op.op == CEPH_OSD_OP_ZERO &&
obs.exists &&
- op.extent.offset < static_cast<Option::size_t>(osd->osd_max_object_size) &&
+ op.extent.offset < *osd->osd_max_object_size &&
op.extent.length >= 1 &&
- op.extent.length <= static_cast<Option::size_t>(osd->osd_max_object_size) &&
+ op.extent.length <= *osd->osd_max_object_size &&
op.extent.offset + op.extent.length >= oi.size) {
if (op.extent.offset >= oi.size) {
// no-op
@@ -6769,7 +6800,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
result = check_offset_and_length(
op.extent.offset, op.extent.length,
- static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp());
+ *osd->osd_max_object_size, get_dpp());
if (result < 0)
break;
@@ -6826,7 +6857,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
result = check_offset_and_length(
0, op.extent.length,
- static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp());
+ *osd->osd_max_object_size, get_dpp());
if (result < 0)
break;
@@ -6876,7 +6907,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
{ // zero
result = check_offset_and_length(
op.extent.offset, op.extent.length,
- static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp());
+ *osd->osd_max_object_size, get_dpp());
if (result < 0)
break;
@@ -6941,7 +6972,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = check_offset_and_length(
op.extent.offset, op.extent.length,
- static_cast<Option::size_t>(osd->osd_max_object_size), get_dpp());
+ *osd->osd_max_object_size, get_dpp());
if (result < 0)
break;
@@ -7755,27 +7786,34 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
bool truncated = false;
bufferlist bl;
if (oi.is_omap()) {
- ObjectMap::ObjectMapIterator iter = osd->store->get_omap_iterator(
- ch, ghobject_t(soid)
- );
- if (!iter) {
- result = -ENOENT;
- goto fail;
- }
- iter->upper_bound(start_after);
- if (filter_prefix > start_after) iter->lower_bound(filter_prefix);
- for (num = 0;
- iter->valid() &&
- iter->key().substr(0, filter_prefix.size()) == filter_prefix;
- ++num, iter->next()) {
- dout(20) << "Found key " << iter->key() << dendl;
- if (num >= max_return ||
- bl.length() >= cct->_conf->osd_max_omap_bytes_per_request) {
- truncated = true;
- break;
- }
- encode(iter->key(), bl);
- encode(iter->value(), bl);
+ using omap_iter_seek_t = ObjectStore::omap_iter_seek_t;
+ result = osd->store->omap_iterate(
+ ch, ghobject_t(soid),
+ // try to seek as many keys-at-once as possible for the sake of performance.
+ // note complexity should be logarithmic, so seek(n/2) + seek(n/2) is worse
+ // than just seek(n).
+ ObjectStore::omap_iter_seek_t{
+ .seek_position = std::max(start_after, filter_prefix),
+ .seek_type = filter_prefix > start_after ? omap_iter_seek_t::LOWER_BOUND
+ : omap_iter_seek_t::UPPER_BOUND
+ },
+ [&bl, &truncated, &filter_prefix, &num, max_return,
+ max_bytes=cct->_conf->osd_max_omap_bytes_per_request]
+ (std::string_view key, std::string_view value) mutable {
+ if (key.substr(0, filter_prefix.size()) != filter_prefix) {
+ return ObjectStore::omap_iter_ret_t::STOP;
+ }
+ if (num >= max_return || bl.length() >= max_bytes) {
+ truncated = true;
+ return ObjectStore::omap_iter_ret_t::STOP;
+ }
+ encode(key, bl);
+ encode(value, bl);
+ ++num;
+ return ObjectStore::omap_iter_ret_t::NEXT;
+ });
+ if (result < 0) {
+ goto fail;
}
} // else return empty out_set
encode(num, osd_op.outdata);
@@ -11491,7 +11529,7 @@ void PrimaryLogPG::issue_repop(RepGather *repop, OpContext *ctx)
ctx->at_version,
std::move(ctx->op_t),
recovery_state.get_pg_trim_to(),
- recovery_state.get_min_last_complete_ondisk(),
+ recovery_state.get_pg_committed_to(),
std::move(ctx->log),
ctx->updated_hset_history,
on_all_commit,
@@ -11623,7 +11661,7 @@ void PrimaryLogPG::submit_log_entries(
eversion_t old_last_update = info.last_update;
recovery_state.merge_new_log_entries(
entries, t, recovery_state.get_pg_trim_to(),
- recovery_state.get_min_last_complete_ondisk());
+ recovery_state.get_pg_committed_to());
set<pg_shard_t> waiting_on;
for (set<pg_shard_t>::const_iterator i = get_acting_recovery_backfill().begin();
@@ -11643,7 +11681,7 @@ void PrimaryLogPG::submit_log_entries(
get_last_peering_reset(),
repop->rep_tid,
recovery_state.get_pg_trim_to(),
- recovery_state.get_min_last_complete_ondisk());
+ recovery_state.get_pg_committed_to());
osd->send_message_osd_cluster(
peer.osd, m, get_osdmap_epoch());
waiting_on.insert(peer);
@@ -12384,7 +12422,10 @@ int PrimaryLogPG::recover_missing(
int priority,
PGBackend::RecoveryHandle *h)
{
- dout(10) << __func__ << " sar: " << scrub_after_recovery << dendl;
+ dout(10) << fmt::format(
+ "{} sar: {}", __func__,
+ m_scrubber->is_after_repair_required())
+ << dendl;
if (recovery_state.get_missing_loc().is_unfound(soid)) {
dout(7) << __func__ << " " << soid
@@ -12415,7 +12456,7 @@ int PrimaryLogPG::recover_missing(
if (!object_missing) {
object_stat_sum_t stat_diff;
stat_diff.num_objects_recovered = 1;
- if (scrub_after_recovery)
+ if (m_scrubber->is_after_repair_required())
stat_diff.num_objects_repaired = 1;
on_global_recover(soid, stat_diff, true);
} else {
@@ -12641,17 +12682,18 @@ void PrimaryLogPG::do_update_log_missing(OpRequestRef &op)
op->get_req());
ceph_assert(m->get_type() == MSG_OSD_PG_UPDATE_LOG_MISSING);
ObjectStore::Transaction t;
- std::optional<eversion_t> op_trim_to, op_roll_forward_to;
+ std::optional<eversion_t> op_trim_to, op_pg_committed_to;
if (m->pg_trim_to != eversion_t())
op_trim_to = m->pg_trim_to;
- if (m->pg_roll_forward_to != eversion_t())
- op_roll_forward_to = m->pg_roll_forward_to;
+ if (m->pg_committed_to != eversion_t())
+ op_pg_committed_to = m->pg_committed_to;
dout(20) << __func__
- << " op_trim_to = " << op_trim_to << " op_roll_forward_to = " << op_roll_forward_to << dendl;
+ << " op_trim_to = " << op_trim_to << " op_pg_committed_to = "
+ << op_pg_committed_to << dendl;
recovery_state.append_log_entries_update_missing(
- m->entries, t, op_trim_to, op_roll_forward_to);
+ m->entries, t, op_trim_to, op_pg_committed_to);
eversion_t new_lcod = info.last_complete;
Context *complete = new LambdaContext(
@@ -13223,7 +13265,7 @@ void PrimaryLogPG::_clear_recovery_state()
#ifdef DEBUG_RECOVERY_OIDS
recovering_oids.clear();
#endif
- dout(15) << __func__ << " flags: " << m_planned_scrub << dendl;
+ dout(15) << __func__ << dendl;
last_backfill_started = hobject_t();
set<hobject_t>::iterator i = backfills_in_flight.begin();