summaryrefslogtreecommitdiffstats
path: root/src/msg
diff options
context:
space:
mode:
authorxie xingguo <xie.xingguo@zte.com.cn>2019-02-27 08:34:46 +0100
committerxie xingguo <xie.xingguo@zte.com.cn>2019-03-01 02:31:35 +0100
commit61b9432ef9a3847eceb96f8d5a854567c49bbf61 (patch)
treeec5630456b0f0ee3ec33ef62d7944348d636d957 /src/msg
parentMerge PR #26663 into master (diff)
downloadceph-61b9432ef9a3847eceb96f8d5a854567c49bbf61.tar.xz
ceph-61b9432ef9a3847eceb96f8d5a854567c49bbf61.zip
msg/async/Protocol*: send keep alive if existing wins
This is a follow-up fix of https://github.com/ceph/ceph/pull/25754. It turns out the existing connection would win the connection race and then be stuck in the __replacing__ stage forever without being aware of that the underlying Pipe is actually broken. Fix by forcing existing sending keepalive periodically __too__. Fixes: http://tracker.ceph.com/issues/38493 Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
Diffstat (limited to 'src/msg')
-rw-r--r--src/msg/async/ProtocolV1.cc2
-rw-r--r--src/msg/async/ProtocolV2.cc4
2 files changed, 6 insertions, 0 deletions
diff --git a/src/msg/async/ProtocolV1.cc b/src/msg/async/ProtocolV1.cc
index dac1bbd5840..80d2b8e26b7 100644
--- a/src/msg/async/ProtocolV1.cc
+++ b/src/msg/async/ProtocolV1.cc
@@ -1998,6 +1998,8 @@ CtPtr ProtocolV1::handle_connect_message_2() {
<< " existing_state="
<< connection->get_state_name(existing->state) << dendl;
reply.global_seq = exproto->peer_global_seq;
+ // make sure we notice if existing connection is no longer functioning
+ existing->send_keepalive();
existing->lock.unlock();
return send_connect_message_reply(CEPH_MSGR_TAG_RETRY_GLOBAL, reply,
authorizer_reply);
diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc
index cf700b601d7..ebf6dcea407 100644
--- a/src/msg/async/ProtocolV2.cc
+++ b/src/msg/async/ProtocolV2.cc
@@ -2464,6 +2464,8 @@ CtPtr ProtocolV2::handle_reconnect(ceph::bufferlist &payload)
ldout(cct, 1) << __func__
<< " existing racing replace happened while replacing."
<< " existing=" << existing << dendl;
+ // make sure we notice if existing connection is no longer functioning
+ existing->send_keepalive();
auto retry = RetryGlobalFrame::Encode(session_stream_handlers,
exproto->peer_global_seq);
return WRITE(retry, "session retry", read_frame);
@@ -2571,6 +2573,8 @@ CtPtr ProtocolV2::handle_existing_connection(AsyncConnectionRef existing) {
ldout(cct, 1) << __func__
<< " existing racing replace happened while replacing."
<< " existing=" << existing << dendl;
+ // make sure we notice if existing connection is no longer functioning
+ existing->send_keepalive();
auto wait = WaitFrame::Encode(session_stream_handlers);
return WRITE(wait, "wait", read_frame);
}