diff options
author | xie xingguo <xie.xingguo@zte.com.cn> | 2019-02-27 08:34:46 +0100 |
---|---|---|
committer | xie xingguo <xie.xingguo@zte.com.cn> | 2019-03-01 02:31:35 +0100 |
commit | 61b9432ef9a3847eceb96f8d5a854567c49bbf61 (patch) | |
tree | ec5630456b0f0ee3ec33ef62d7944348d636d957 /src/msg | |
parent | Merge PR #26663 into master (diff) | |
download | ceph-61b9432ef9a3847eceb96f8d5a854567c49bbf61.tar.xz ceph-61b9432ef9a3847eceb96f8d5a854567c49bbf61.zip |
msg/async/Protocol*: send keep alive if existing wins
This is a follow-up fix of https://github.com/ceph/ceph/pull/25754.
It turns out the existing connection would win the connection race
and then be stuck in the __replacing__ stage forever without being
aware of that the underlying Pipe is actually broken.
Fix by forcing existing sending keepalive periodically __too__.
Fixes: http://tracker.ceph.com/issues/38493
Signed-off-by: xie xingguo <xie.xingguo@zte.com.cn>
Diffstat (limited to 'src/msg')
-rw-r--r-- | src/msg/async/ProtocolV1.cc | 2 | ||||
-rw-r--r-- | src/msg/async/ProtocolV2.cc | 4 |
2 files changed, 6 insertions, 0 deletions
diff --git a/src/msg/async/ProtocolV1.cc b/src/msg/async/ProtocolV1.cc index dac1bbd5840..80d2b8e26b7 100644 --- a/src/msg/async/ProtocolV1.cc +++ b/src/msg/async/ProtocolV1.cc @@ -1998,6 +1998,8 @@ CtPtr ProtocolV1::handle_connect_message_2() { << " existing_state=" << connection->get_state_name(existing->state) << dendl; reply.global_seq = exproto->peer_global_seq; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); existing->lock.unlock(); return send_connect_message_reply(CEPH_MSGR_TAG_RETRY_GLOBAL, reply, authorizer_reply); diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc index cf700b601d7..ebf6dcea407 100644 --- a/src/msg/async/ProtocolV2.cc +++ b/src/msg/async/ProtocolV2.cc @@ -2464,6 +2464,8 @@ CtPtr ProtocolV2::handle_reconnect(ceph::bufferlist &payload) ldout(cct, 1) << __func__ << " existing racing replace happened while replacing." << " existing=" << existing << dendl; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); auto retry = RetryGlobalFrame::Encode(session_stream_handlers, exproto->peer_global_seq); return WRITE(retry, "session retry", read_frame); @@ -2571,6 +2573,8 @@ CtPtr ProtocolV2::handle_existing_connection(AsyncConnectionRef existing) { ldout(cct, 1) << __func__ << " existing racing replace happened while replacing." << " existing=" << existing << dendl; + // make sure we notice if existing connection is no longer functioning + existing->send_keepalive(); auto wait = WaitFrame::Encode(session_stream_handlers); return WRITE(wait, "wait", read_frame); } |