diff options
author | Sage Weil <sage@redhat.com> | 2018-10-30 02:38:57 +0100 |
---|---|---|
committer | Sage Weil <sage@redhat.com> | 2018-10-30 02:38:57 +0100 |
commit | 5f51cd286b747b1729006a5b98fb08b1b646237a (patch) | |
tree | 7511384f9b9385ce2ea9bfaab2c68987b53280d9 | |
parent | Merge PR #24796 into nautilus (diff) | |
parent | include/types: fixed compile warning for signed/unsigned comparison (diff) | |
download | ceph-14.0.1.tar.xz ceph-14.0.1.zip |
Merge PR #24804 into nautilusv14.0.1
* refs/pull/24804/head:
include/types: fixed compile warning for signed/unsigned comparison
osd/PrimaryLogPG: uncommitted dup ops should respond with logged return code
osd/PrimaryLogPG: propagate error return codes on object copy_get ops
osd/PGLog: optionally record error return codes for extra_reqids
osd/osd_types: include PG log return codes in object copy data
-rw-r--r-- | src/include/types.h | 2 | ||||
-rw-r--r-- | src/osd/PG.h | 2 | ||||
-rw-r--r-- | src/osd/PGLog.cc | 12 | ||||
-rw-r--r-- | src/osd/PGLog.h | 21 | ||||
-rw-r--r-- | src/osd/PrimaryLogPG.cc | 55 | ||||
-rw-r--r-- | src/osd/PrimaryLogPG.h | 2 | ||||
-rw-r--r-- | src/osd/osd_types.cc | 28 | ||||
-rw-r--r-- | src/osd/osd_types.h | 7 | ||||
-rw-r--r-- | src/osdc/Objecter.h | 9 | ||||
-rw-r--r-- | src/test/librados/tier.cc | 52 |
10 files changed, 150 insertions, 40 deletions
diff --git a/src/include/types.h b/src/include/types.h index 73123bc9733..8103cabbb36 100644 --- a/src/include/types.h +++ b/src/include/types.h @@ -158,7 +158,7 @@ inline std::ostream& operator<<(std::ostream& out, const std::deque<A,Alloc>& v) template<typename... Ts> inline std::ostream& operator<<(std::ostream& out, const std::tuple<Ts...> &t) { - auto f = [n = sizeof...(Ts), i = 0, &out](const auto& e) mutable { + auto f = [n = sizeof...(Ts), i = 0U, &out](const auto& e) mutable { out << e; if (++i != n) out << ","; diff --git a/src/osd/PG.h b/src/osd/PG.h index f695f6c5e9d..5d9b48b8b50 100644 --- a/src/osd/PG.h +++ b/src/osd/PG.h @@ -1283,7 +1283,7 @@ protected: map<hobject_t, list<Context*>> callbacks_for_degraded_object; map<eversion_t, - list<pair<OpRequestRef, version_t> > > waiting_for_ondisk; + list<tuple<OpRequestRef, version_t, int> > > waiting_for_ondisk; void requeue_object_waiters(map<hobject_t, list<OpRequestRef>>& m); void requeue_op(OpRequestRef op); diff --git a/src/osd/PGLog.cc b/src/osd/PGLog.cc index 1ed178b3cd3..ab9ead3fb92 100644 --- a/src/osd/PGLog.cc +++ b/src/osd/PGLog.cc @@ -78,10 +78,20 @@ void PGLog::IndexedLog::trim( } dups.push_back(pg_log_dup_t(e)); index(dups.back()); + uint32_t idx = 0; for (const auto& extra : e.extra_reqids) { + int return_code = e.return_code; + if (return_code >= 0) { + auto it = e.extra_reqid_return_codes.find(idx); + if (it != e.extra_reqid_return_codes.end()) { + return_code = it->second; + } + } + ++idx; + // note: extras have the same version as outer op dups.push_back(pg_log_dup_t(e.version, extra.second, - extra.first, e.return_code)); + extra.first, return_code)); index(dups.back()); } } diff --git a/src/osd/PGLog.h b/src/osd/PGLog.h index 3b16e09ef54..532da798824 100644 --- a/src/osd/PGLog.h +++ b/src/osd/PGLog.h @@ -289,13 +289,20 @@ public: } p = extra_caller_ops.find(r); if (p != extra_caller_ops.end()) { + uint32_t idx = 0; for (auto i = p->second->extra_reqids.begin(); i != p->second->extra_reqids.end(); - ++i) { + ++idx, ++i) { if (i->first == r) { *version = p->second->version; *user_version = i->second; *return_code = p->second->return_code; + if (*return_code >= 0) { + auto it = p->second->extra_reqid_return_codes.find(idx); + if (it != p->second->extra_reqid_return_codes.end()) { + *return_code = it->second; + } + } return true; } } @@ -318,7 +325,8 @@ public: /// get a (bounded) list of recent reqids for the given object void get_object_reqids(const hobject_t& oid, unsigned max, - mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > *pls) const { + mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > *pls, + mempool::osd_pglog::map<uint32_t, int> *return_codes) const { // make sure object is present at least once before we do an // O(n) search. if (!(indexed_data & PGLOG_INDEXED_OBJECTS)) { @@ -326,12 +334,19 @@ public: } if (objects.count(oid) == 0) return; + for (list<pg_log_entry_t>::const_reverse_iterator i = log.rbegin(); i != log.rend(); ++i) { if (i->soid == oid) { - if (i->reqid_is_indexed()) + if (i->reqid_is_indexed()) { + if (i->op == pg_log_entry_t::ERROR) { + // propagate op errors to the cache tier's PG log + return_codes->emplace(pls->size(), i->return_code); + } pls->push_back(make_pair(i->reqid, i->user_version)); + } + pls->insert(pls->end(), i->extra_reqids.begin(), i->extra_reqids.end()); if (pls->size() >= max) { if (pls->size() > max) { diff --git a/src/osd/PrimaryLogPG.cc b/src/osd/PrimaryLogPG.cc index c0ccd802e12..8cd65df20f6 100644 --- a/src/osd/PrimaryLogPG.cc +++ b/src/osd/PrimaryLogPG.cc @@ -2167,7 +2167,7 @@ void PrimaryLogPG::do_op(OpRequestRef& op) } else { dout(10) << " waiting for " << version << " to commit" << dendl; // always queue ondisk waiters, so that we can requeue if needed - waiting_for_ondisk[version].push_back(make_pair(op, user_version)); + waiting_for_ondisk[version].emplace_back(op, user_version, return_code); op->mark_delayed("waiting for ondisk"); } return; @@ -8485,8 +8485,10 @@ void PrimaryLogPG::finish_ctx(OpContext *ctx, int log_op_type) } if (!ctx->extra_reqids.empty()) { - dout(20) << __func__ << " extra_reqids " << ctx->extra_reqids << dendl; + dout(20) << __func__ << " extra_reqids " << ctx->extra_reqids << " " + << ctx->extra_reqid_return_codes << dendl; ctx->log.back().extra_reqids.swap(ctx->extra_reqids); + ctx->log.back().extra_reqid_return_codes.swap(ctx->extra_reqid_return_codes); } // apply new object state. @@ -8784,7 +8786,9 @@ int PrimaryLogPG::do_copy_get(OpContext *ctx, bufferlist::const_iterator& bp, if (cursor.is_complete()) { // include reqids only in the final step. this is a bit fragile // but it works... - pg_log.get_log().get_object_reqids(ctx->obc->obs.oi.soid, 10, &reply_obj.reqids); + pg_log.get_log().get_object_reqids(ctx->obc->obs.oi.soid, 10, + &reply_obj.reqids, + &reply_obj.reqid_return_codes); dout(20) << " got reqids" << dendl; } @@ -8820,7 +8824,8 @@ void PrimaryLogPG::fill_in_copy_get_noent(OpRequestRef& op, hobject_t oid, uint64_t features = m->get_features(); object_copy_data_t reply_obj; - pg_log.get_log().get_object_reqids(oid, 10, &reply_obj.reqids); + pg_log.get_log().get_object_reqids(oid, 10, &reply_obj.reqids, + &reply_obj.reqid_return_codes); dout(20) << __func__ << " got reqids " << reply_obj.reqids << dendl; encode(reply_obj, osd_op.outdata, features); osd_op.rval = -ENOENT; @@ -8922,6 +8927,7 @@ void PrimaryLogPG::_copy_some(ObjectContextRef obc, CopyOpRef cop) &cop->results.source_data_digest, &cop->results.source_omap_digest, &cop->results.reqids, + &cop->results.reqid_return_codes, &cop->results.truncate_seq, &cop->results.truncate_size, &cop->rval); @@ -9464,6 +9470,7 @@ void PrimaryLogPG::finish_copyfrom(CopyFromCallback *cb) obs.oi.truncate_size = cb->results->truncate_size; ctx->extra_reqids = cb->results->reqids; + ctx->extra_reqid_return_codes = cb->results->reqid_return_codes; // cache: clear whiteout? if (obs.oi.is_whiteout()) { @@ -9627,6 +9634,7 @@ void PrimaryLogPG::finish_promote(int r, CopyResults *results, tctx->new_obs.exists = true; tctx->extra_reqids = results->reqids; + tctx->extra_reqid_return_codes = results->reqid_return_codes; if (whiteout) { // create a whiteout @@ -10363,12 +10371,13 @@ void PrimaryLogPG::eval_repop(RepGather *repop) auto it = waiting_for_ondisk.find(repop->v); if (it != waiting_for_ondisk.end()) { ceph_assert(waiting_for_ondisk.begin()->first == repop->v); - for (list<pair<OpRequestRef, version_t> >::iterator i = - it->second.begin(); - i != it->second.end(); - ++i) { - osd->reply_op_error(i->first, repop->r, repop->v, - i->second); + for (auto& i : it->second) { + int return_code = repop->r; + if (return_code >= 0) { + return_code = std::get<2>(i); + } + osd->reply_op_error(std::get<0>(i), return_code, repop->v, + std::get<1>(i)); } waiting_for_ondisk.erase(it); } @@ -11877,15 +11886,11 @@ void PrimaryLogPG::apply_and_flush_repops(bool requeue) } // also requeue any dups, interleaved into position - map<eversion_t, list<pair<OpRequestRef, version_t> > >::iterator p = - waiting_for_ondisk.find(repop->v); + auto p = waiting_for_ondisk.find(repop->v); if (p != waiting_for_ondisk.end()) { dout(10) << " also requeuing ondisk waiters " << p->second << dendl; - for (list<pair<OpRequestRef, version_t> >::iterator i = - p->second.begin(); - i != p->second.end(); - ++i) { - rq.push_back(i->first); + for (auto& i : p->second) { + rq.push_back(std::get<0>(i)); } waiting_for_ondisk.erase(p); } @@ -11899,17 +11904,11 @@ void PrimaryLogPG::apply_and_flush_repops(bool requeue) if (requeue) { requeue_ops(rq); if (!waiting_for_ondisk.empty()) { - for (map<eversion_t, list<pair<OpRequestRef, version_t> > >::iterator i = - waiting_for_ondisk.begin(); - i != waiting_for_ondisk.end(); - ++i) { - for (list<pair<OpRequestRef, version_t> >::iterator j = - i->second.begin(); - j != i->second.end(); - ++j) { - derr << __func__ << ": op " << *(j->first->get_req()) << " waiting on " - << i->first << dendl; - } + for (auto& i : waiting_for_ondisk) { + for (auto& j : i.second) { + derr << __func__ << ": op " << *(std::get<0>(j)->get_req()) + << " waiting on " << i.first << dendl; + } } ceph_assert(waiting_for_ondisk.empty()); } diff --git a/src/osd/PrimaryLogPG.h b/src/osd/PrimaryLogPG.h index 3707bb98473..f93aeed5b4c 100644 --- a/src/osd/PrimaryLogPG.h +++ b/src/osd/PrimaryLogPG.h @@ -95,6 +95,7 @@ public: uint32_t source_data_digest, source_omap_digest; uint32_t data_digest, omap_digest; mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > reqids; // [(reqid, user_version)] + mempool::osd_pglog::map<uint32_t, int> reqid_return_codes; // map reqids by index to error code map<string, bufferlist> attrs; // xattrs uint64_t truncate_seq; uint64_t truncate_size; @@ -576,6 +577,7 @@ public: int num_write; ///< count update ops mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > extra_reqids; + mempool::osd_pglog::map<uint32_t, int> extra_reqid_return_codes; hobject_t new_temp_oid, discard_temp_oid; ///< temp objects we should start/stop tracking diff --git a/src/osd/osd_types.cc b/src/osd/osd_types.cc index 79b0ab8482b..783d753edce 100644 --- a/src/osd/osd_types.cc +++ b/src/osd/osd_types.cc @@ -4022,7 +4022,7 @@ void pg_log_entry_t::decode_with_checksum(bufferlist::const_iterator& p) void pg_log_entry_t::encode(bufferlist &bl) const { - ENCODE_START(11, 4, bl); + ENCODE_START(12, 4, bl); encode(op, bl); encode(soid, bl); encode(version, bl); @@ -4049,12 +4049,14 @@ void pg_log_entry_t::encode(bufferlist &bl) const encode(extra_reqids, bl); if (op == ERROR) encode(return_code, bl); + if (!extra_reqids.empty()) + encode(extra_reqid_return_codes, bl); ENCODE_FINISH(bl); } void pg_log_entry_t::decode(bufferlist::const_iterator &bl) { - DECODE_START_LEGACY_COMPAT_LEN(11, 4, 4, bl); + DECODE_START_LEGACY_COMPAT_LEN(12, 4, 4, bl); decode(op, bl); if (struct_v < 2) { sobject_t old_soid; @@ -4108,6 +4110,8 @@ void pg_log_entry_t::decode(bufferlist::const_iterator &bl) decode(extra_reqids, bl); if (struct_v >= 11 && op == ERROR) decode(return_code, bl); + if (struct_v >= 12 && !extra_reqids.empty()) + decode(extra_reqid_return_codes, bl); DECODE_FINISH(bl); } @@ -4119,12 +4123,17 @@ void pg_log_entry_t::dump(Formatter *f) const f->dump_stream("prior_version") << prior_version; f->dump_stream("reqid") << reqid; f->open_array_section("extra_reqids"); + uint32_t idx = 0; for (auto p = extra_reqids.begin(); p != extra_reqids.end(); - ++p) { + ++idx, ++p) { f->open_object_section("extra_reqid"); f->dump_stream("reqid") << p->first; f->dump_stream("user_version") << p->second; + auto it = extra_reqid_return_codes.find(idx); + if (it != extra_reqid_return_codes.end()) { + f->dump_int("return_code", it->second); + } f->close_section(); } f->close_section(); @@ -4495,7 +4504,7 @@ void object_copy_cursor_t::generate_test_instances(list<object_copy_cursor_t*>& void object_copy_data_t::encode(bufferlist& bl, uint64_t features) const { - ENCODE_START(7, 5, bl); + ENCODE_START(8, 5, bl); encode(size, bl); encode(mtime, bl); encode(attrs, bl); @@ -4511,6 +4520,7 @@ void object_copy_data_t::encode(bufferlist& bl, uint64_t features) const encode(reqids, bl); encode(truncate_seq, bl); encode(truncate_size, bl); + encode(reqid_return_codes, bl); ENCODE_FINISH(bl); } @@ -4574,6 +4584,9 @@ void object_copy_data_t::decode(bufferlist::const_iterator& bl) decode(truncate_seq, bl); decode(truncate_size, bl); } + if (struct_v >= 8) { + decode(reqid_return_codes, bl); + } } DECODE_FINISH(bl); } @@ -4633,12 +4646,17 @@ void object_copy_data_t::dump(Formatter *f) const f->dump_unsigned("snap", *p); f->close_section(); f->open_array_section("reqids"); + uint32_t idx = 0; for (auto p = reqids.begin(); p != reqids.end(); - ++p) { + ++idx, ++p) { f->open_object_section("extra_reqid"); f->dump_stream("reqid") << p->first; f->dump_stream("user_version") << p->second; + auto it = reqid_return_codes.find(idx); + if (it != reqid_return_codes.end()) { + f->dump_int("return_code", it->second); + } f->close_section(); } f->close_section(); diff --git a/src/osd/osd_types.h b/src/osd/osd_types.h index c87e3bccf46..448d811fcb1 100644 --- a/src/osd/osd_types.h +++ b/src/osd/osd_types.h @@ -3485,6 +3485,10 @@ struct pg_log_entry_t { hobject_t soid; osd_reqid_t reqid; // caller+tid to uniquely identify request mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > extra_reqids; + + ///< map extra_reqids by index to error return code (if any) + mempool::osd_pglog::map<uint32_t, int> extra_reqid_return_codes; + eversion_t version, prior_version, reverting_to; version_t user_version; // the user version for this entry utime_t mtime; // this is the _user_ mtime, mind you @@ -4484,6 +4488,9 @@ struct object_copy_data_t { ///< recent reqids on this object mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > reqids; + ///< map reqids by index to error return code (if any) + mempool::osd_pglog::map<uint32_t, int> reqid_return_codes; + uint64_t truncate_seq; uint64_t truncate_size; diff --git a/src/osdc/Objecter.h b/src/osdc/Objecter.h index 0dc1eebe80b..99b12346f38 100644 --- a/src/osdc/Objecter.h +++ b/src/osdc/Objecter.h @@ -737,6 +737,7 @@ struct ObjectOperation { uint32_t *out_data_digest; uint32_t *out_omap_digest; mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > *out_reqids; + mempool::osd_pglog::map<uint32_t, int> *out_reqid_return_codes; uint64_t *out_truncate_seq; uint64_t *out_truncate_size; int *prval; @@ -752,6 +753,7 @@ struct ObjectOperation { uint32_t *dd, uint32_t *od, mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > *oreqids, + mempool::osd_pglog::map<uint32_t, int> *oreqid_return_codes, uint64_t *otseq, uint64_t *otsize, int *r) @@ -761,6 +763,7 @@ struct ObjectOperation { out_omap_data(o), out_snaps(osnaps), out_snap_seq(osnap_seq), out_flags(flags), out_data_digest(dd), out_omap_digest(od), out_reqids(oreqids), + out_reqid_return_codes(oreqid_return_codes), out_truncate_seq(otseq), out_truncate_size(otsize), prval(r) {} @@ -801,6 +804,8 @@ struct ObjectOperation { *out_omap_digest = copy_reply.omap_digest; if (out_reqids) *out_reqids = copy_reply.reqids; + if (out_reqid_return_codes) + *out_reqid_return_codes = copy_reply.reqid_return_codes; if (out_truncate_seq) *out_truncate_seq = copy_reply.truncate_seq; if (out_truncate_size) @@ -827,6 +832,7 @@ struct ObjectOperation { uint32_t *out_data_digest, uint32_t *out_omap_digest, mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > *out_reqids, + mempool::osd_pglog::map<uint32_t, int> *out_reqid_return_codes, uint64_t *truncate_seq, uint64_t *truncate_size, int *prval) { @@ -841,7 +847,8 @@ struct ObjectOperation { out_attrs, out_data, out_omap_header, out_omap_data, out_snaps, out_snap_seq, out_flags, out_data_digest, - out_omap_digest, out_reqids, truncate_seq, + out_omap_digest, out_reqids, + out_reqid_return_codes, truncate_seq, truncate_size, prval); out_bl[p] = &h->bl; out_handler[p] = h; diff --git a/src/test/librados/tier.cc b/src/test/librados/tier.cc index 4a777934af5..98d454c2238 100644 --- a/src/test/librados/tier.cc +++ b/src/test/librados/tier.cc @@ -6420,3 +6420,55 @@ TEST_F(LibRadosTwoPoolsECPP, ManifestPromoteRead) { // wait for maps to settle before next test cluster.wait_for_latest_osdmap(); } + +TEST_F(LibRadosTwoPoolsPP, PropagateBaseTierError) { + // write object to base tier + bufferlist omap_bl; + encode(static_cast<uint32_t>(0U), omap_bl); + + ObjectWriteOperation op1; + op1.omap_set({{"somekey", omap_bl}}); + ASSERT_EQ(0, ioctx.operate("propagate-base-tier-error", &op1)); + + // configure cache + bufferlist inbl; + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name + + "\", \"tierpool\": \"" + cache_pool_name + + "\", \"force_nonempty\": \"--force-nonempty\" }", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name + + "\", \"mode\": \"writeback\"}", + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name + + "\", \"overlaypool\": \"" + cache_pool_name + "\"}", + inbl, NULL, NULL)); + + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_type", "bloom"), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_count", 1), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "hit_set_period", 600), + inbl, NULL, NULL)); + ASSERT_EQ(0, cluster.mon_command( + set_pool_str(cache_pool_name, "target_max_objects", 250), + inbl, NULL, NULL)); + + // wait for maps to settle + cluster.wait_for_latest_osdmap(); + + // guarded op should fail so expect error to propagate to cache tier + bufferlist test_omap_bl; + encode(static_cast<uint32_t>(1U), test_omap_bl); + + ObjectWriteOperation op2; + op2.omap_cmp({{"somekey", {test_omap_bl, CEPH_OSD_CMPXATTR_OP_EQ}}}, nullptr); + op2.omap_set({{"somekey", test_omap_bl}}); + + ASSERT_EQ(-ECANCELED, ioctx.operate("propagate-base-tier-error", &op2)); +} |