diff options
author | Shilpa Jagannath <smanjara@redhat.com> | 2023-05-11 19:36:25 +0200 |
---|---|---|
committer | Shilpa Jagannath <smanjara@redhat.com> | 2023-06-15 01:59:55 +0200 |
commit | ae4a61b1c722f83e4f3d9ac0ffaa94d9f9c4cde1 (patch) | |
tree | 3d776f323bd781163ba93d321e1ecdc4b50e0359 /src/rgw/driver/rados/rgw_data_sync.cc | |
parent | rgw/multisite: data sync fairness. Add bid manager and sync lock notification CR (diff) | |
download | ceph-ae4a61b1c722f83e4f3d9ac0ffaa94d9f9c4cde1.tar.xz ceph-ae4a61b1c722f83e4f3d9ac0ffaa94d9f9c4cde1.zip |
rgw/multisite: full sync and incremental sync changes to handle lost_lock and lost_bid
Signed-off-by: Shilpa Jagannath <smanjara@redhat.com>
Diffstat (limited to 'src/rgw/driver/rados/rgw_data_sync.cc')
-rw-r--r-- | src/rgw/driver/rados/rgw_data_sync.cc | 126 |
1 files changed, 77 insertions, 49 deletions
diff --git a/src/rgw/driver/rados/rgw_data_sync.cc b/src/rgw/driver/rados/rgw_data_sync.cc index 9b6401530b5..135b2d958b2 100644 --- a/src/rgw/driver/rados/rgw_data_sync.cc +++ b/src/rgw/driver/rados/rgw_data_sync.cc @@ -1766,6 +1766,8 @@ class RGWDataFullSyncShardCR : public RGWDataBaseSyncShardCR { std::map<std::string, bufferlist> entries; std::map<std::string, bufferlist>::iterator iter; string error_marker; + bool lost_lock = false; + bool lost_bid = false; public: @@ -1789,10 +1791,17 @@ public: entry_timestamp = sync_marker.timestamp; // time when full sync started do { if (!lease_cr->is_locked()) { - drain_all(); tn->log(1, "lease is lost, abort"); - return set_cr_error(-ECANCELED); + lost_lock = true; + break; + } + + if (!sc->env->bid_manager->is_highest_bidder(shard_id)) { + tn->log(1, "lost bid"); + lost_bid = true; + break; } + omapvals = std::make_shared<RGWRadosGetOmapValsCR::Result>(); yield call(new RGWRadosGetOmapValsCR(sc->env->driver, rgw_raw_obj(pool, oid), @@ -1836,29 +1845,35 @@ public: } while (omapvals->more); omapvals.reset(); - drain_all(); - tn->unset_flag(RGW_SNS_FLAG_ACTIVE); - /* update marker to reflect we're done with full sync */ - sync_marker.state = rgw_data_sync_marker::IncrementalSync; - sync_marker.marker = sync_marker.next_step_marker; - sync_marker.next_step_marker.clear(); - yield call(new RGWSimpleRadosWriteCR<rgw_data_sync_marker>( - sc->env->dpp, sc->env->driver, - rgw_raw_obj(pool, status_oid), sync_marker, &objv)); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: failed to set sync marker: retcode=" << retcode)); - return set_cr_error(retcode); + if (lost_bid) { + yield call(marker_tracker->flush()); + } else if (!lost_lock) { + /* update marker to reflect we're done with full sync */ + sync_marker.state = rgw_data_sync_marker::IncrementalSync; + sync_marker.marker = sync_marker.next_step_marker; + sync_marker.next_step_marker.clear(); + yield call(new RGWSimpleRadosWriteCR<rgw_data_sync_marker>( + sc->env->dpp, sc->env->driver, + rgw_raw_obj(pool, status_oid), sync_marker, &objv)); + if (retcode < 0) { + tn->log(0, SSTR("ERROR: failed to set sync marker: retcode=" << retcode)); + return set_cr_error(retcode); + } + + // clean up full sync index, ignoring errors + yield call(new RGWRadosRemoveCR(sc->env->driver, {pool, oid})); + + // transition to incremental sync + return set_cr_done(); } - // clean up full sync index, ignoring errors - yield call(new RGWRadosRemoveCR(sc->env->driver, {pool, oid})); + if (lost_lock || lost_bid) { + return set_cr_error(-EBUSY); + } - // transition to incremental sync - return set_cr_done(); - } - return 0; + } return 0; } }; @@ -1884,6 +1899,8 @@ class RGWDataIncSyncShardCR : public RGWDataBaseSyncShardCR { decltype(log_entries)::iterator log_iter; bool truncated = false; int cbret = 0; + bool lost_lock = false; + bool lost_bid = false; utime_t get_idle_interval() const { ceph::timespan interval = std::chrono::seconds(cct->_conf->rgw_data_sync_poll_interval); @@ -1923,9 +1940,15 @@ public: marker_tracker.emplace(sc, status_oid, sync_marker, tn, objv); do { if (!lease_cr->is_locked()) { - drain_all(); + lost_lock = true; tn->log(1, "lease is lost, abort"); - return set_cr_error(-ECANCELED); + break; + } + + if (!sc->env->bid_manager->is_highest_bidder(shard_id)) { + tn->log(1, "lost bid"); + lost_bid = true; + break; } { current_modified.clear(); @@ -1942,13 +1965,9 @@ public: modified_iter != current_modified.end(); ++modified_iter) { if (!lease_cr->is_locked()) { - drain_all(); - yield call(marker_tracker->flush()); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: data sync marker_tracker.flush() returned retcode=" << retcode)); - return set_cr_error(retcode); - } - return set_cr_error(-ECANCELED); + tn->log(1, "lease is lost, abort"); + lost_lock = true; + break; } retcode = parse_bucket_key(modified_iter->key, source_bs); if (retcode < 0) { @@ -1976,13 +1995,9 @@ public: iter = error_entries.begin(); for (; iter != error_entries.end(); ++iter) { if (!lease_cr->is_locked()) { - drain_all(); - yield call(marker_tracker->flush()); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: data sync marker_tracker.flush() returned retcode=" << retcode)); - return set_cr_error(retcode); - } - return set_cr_error(-ECANCELED); + tn->log(1, "lease is lost, abort"); + lost_lock = true; + break; } error_marker = iter->first; entry_timestamp = rgw::error_repo::decode_value(iter->second); @@ -2043,13 +2058,9 @@ public: log_iter != log_entries.end(); ++log_iter) { if (!lease_cr->is_locked()) { - drain_all(); - yield call(marker_tracker->flush()); - if (retcode < 0) { - tn->log(0, SSTR("ERROR: data sync marker_tracker.flush() returned retcode=" << retcode)); - return set_cr_error(retcode); - } - return set_cr_error(-ECANCELED); + tn->log(1, "lease is lost, abort"); + lost_lock = true; + break; } tn->log(20, SSTR("shard_id=" << shard_id << " log_entry: " << log_iter->log_id << ":" << log_iter->log_timestamp << ":" << log_iter->entry.key)); @@ -2101,6 +2112,15 @@ public: yield wait(get_idle_interval()); } } while (true); + + if (lost_bid) { + return set_cr_error(-EBUSY); + } else if (lost_lock) { + drain_all(); + yield marker_tracker->flush(); + return set_cr_error(-ECANCELED); + } + } return 0; } @@ -2160,6 +2180,12 @@ public: int operate(const DoutPrefixProvider *dpp) override { reenter(this) { + + if (!sc->env->bid_manager->is_highest_bidder(shard_id)) { + tn->log(10, "not the highest bidder"); + return set_cr_error(-EBUSY); + } + yield init_lease_cr(); while (!lease_cr->is_locked()) { if (lease_cr->is_done()) { @@ -2173,7 +2199,7 @@ public: } *reset_backoff = true; tn->log(10, "took lease"); - /* Reread data sync status to fech latest marker and objv */ + /* Reread data sync status to fetch latest marker and objv */ objv.clear(); yield call(new RGWSimpleRadosReadCR<rgw_data_sync_marker>(sync_env->dpp, sync_env->driver, rgw_raw_obj(pool, status_oid), @@ -2387,7 +2413,7 @@ public: if (init_lease->is_done()) { tn->log(5, "ERROR: failed to take data sync status lease"); set_status("lease lock failed, early abort"); - drain_all(); + drain_all_but_stack(notify_stack.get()); return set_cr_error(init_lease->get_ret_status()); } tn->log(5, "waiting on data sync status lease"); @@ -2415,7 +2441,7 @@ public: if (retcode < 0) { tn->log(0, SSTR("ERROR: failed to init sync, retcode=" << retcode)); init_lease->go_down(); - drain_all(); + drain_all_but_stack(notify_stack.get()); return set_cr_error(retcode); } // sets state = StateBuildingFullSyncMaps @@ -2437,7 +2463,7 @@ public: if (!init_lease->is_locked()) { init_lease->go_down(); - drain_all(); + drain_all_but_stack(notify_stack.get()); return set_cr_error(-ECANCELED); } /* state: building full sync maps */ @@ -2450,7 +2476,7 @@ public: if (!init_lease->is_locked()) { init_lease->go_down(); - drain_all(); + drain_all_but_stack(notify_stack.get()); return set_cr_error(-ECANCELED); } /* update new state */ @@ -2472,7 +2498,7 @@ public: if ((rgw_data_sync_info::SyncState)sync_status.sync_info.state == rgw_data_sync_info::StateSync) { if (init_lease) { init_lease->go_down(); - drain_all(); + drain_all_but_stack(notify_stack.get()); init_lease.reset(); lease_stack.reset(); } @@ -2491,6 +2517,8 @@ public: } } + notify_stack.get()->cancel(); + return set_cr_done(); } return 0; |