summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-08-22 06:12:33 +0200
committerSage Weil <sage@inktank.com>2012-08-22 19:51:57 +0200
commit1113a6c56739a56871f01fa13da881dab36a32c4 (patch)
tree00241afb8582aa8e9a343b65fd34ed7dde5d0616
parentosd: fix requeue order for waiting_for_ondisk (diff)
downloadceph-1113a6c56739a56871f01fa13da881dab36a32c4.tar.xz
ceph-1113a6c56739a56871f01fa13da881dab36a32c4.zip
objecter: use ordered map<> for tracking tids to preserve order on resend
We are using a hash_map<> to map tids to Op*'s. In handle_osd_map(), we will recalc_op_target() on each Op in a random (hash) order. These will get put in a temp map<tid,Op*> to ensure they are resent in the correct order, but their order on the session->ops list will be random. Then later, if we reset an OSD connection, we will resend everything for that session in ops order, which is be incorrect. Fix this by explicitly reordering the requests to resend in kick_requests(), much like we do in handle_osd_map(). This lets us continue to use a hash_map<>, which is faster for reasonable numbers of requests. A simpler but slower fix would be to just use map<> instead. This is one of many bugs contributing to #2947. Signed-off-by: Sage Weil <sage@inktank.com> Reviewed-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/osdc/Objecter.cc14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/osdc/Objecter.cc b/src/osdc/Objecter.cc
index eeed06173ee..3a42277dbcd 100644
--- a/src/osdc/Objecter.cc
+++ b/src/osdc/Objecter.cc
@@ -758,21 +758,31 @@ void Objecter::kick_requests(OSDSession *session)
ldout(cct, 10) << "kick_requests for osd." << session->osd << dendl;
// resend ops
+ map<tid_t,Op*> resend; // resend in tid order
for (xlist<Op*>::iterator p = session->ops.begin(); !p.end();) {
Op *op = *p;
++p;
logger->inc(l_osdc_op_resend);
if (op->should_resend) {
- send_op(op);
+ resend[op->tid] = op;
} else {
cancel_op(op);
}
}
+ while (!resend.empty()) {
+ send_op(resend.begin()->second);
+ resend.erase(resend.begin());
+ }
// resend lingers
+ map<uint64_t, LingerOp*> lresend; // resend in order
for (xlist<LingerOp*>::iterator j = session->linger_ops.begin(); !j.end(); ++j) {
logger->inc(l_osdc_linger_resend);
- send_linger(*j);
+ lresend[(*j)->linger_id] = *j;
+ }
+ while (!lresend.empty()) {
+ send_linger(lresend.begin()->second);
+ lresend.erase(lresend.begin());
}
}