diff options
author | Erqi Chen <chenerqi@gmail.com> | 2021-03-04 14:48:09 +0100 |
---|---|---|
committer | Erqi Chen <chenerqi@gmail.com> | 2021-03-07 07:27:02 +0100 |
commit | 0767336b43155426af8d4ea9f85a7cfbbf032e8c (patch) | |
tree | ab1f6e8d7f4e003d4453d417b87df98a600bfd44 /src/mds/CDir.cc | |
parent | Merge pull request #38435 from votdev/issue_48449_test_standby (diff) | |
download | ceph-0767336b43155426af8d4ea9f85a7cfbbf032e8c.tar.xz ceph-0767336b43155426af8d4ea9f85a7cfbbf032e8c.zip |
mds: fix race of fetching large dirfrag
When a dirfrag contains more than 'mds_dir_keys_per_op' items, MDS
needs to send multiple 'omap-get-vals' requests to fetch the dirfrag
completely. There is a race if MDS commits the dirfrag in the middle
of these 'omap-get-vals' requests.
Re-fetch from the beginning if dirfrag get committed in the middle of
omap-get-vals requests.
Fixes: https://tracker.ceph.com/issues/49617
Signed-off-by: Erqi Chen <chenerqi@kuaishou.com>
Diffstat (limited to 'src/mds/CDir.cc')
-rw-r--r-- | src/mds/CDir.cc | 44 |
1 files changed, 30 insertions, 14 deletions
diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 2a8ea6777b0..47bcd6fe21f 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -1606,14 +1606,21 @@ void CDir::fetch(MDSContext *c, const std::set<dentry_key_t>& keys) class C_IO_Dir_OMAP_FetchedMore : public CDirIOContext { MDSContext *fin; public: + const version_t omap_version; bufferlist hdrbl; bool more = false; map<string, bufferlist> omap; ///< carry-over from before map<string, bufferlist> omap_more; ///< new batch int ret; - C_IO_Dir_OMAP_FetchedMore(CDir *d, MDSContext *f) : - CDirIOContext(d), fin(f), ret(0) { } + C_IO_Dir_OMAP_FetchedMore(CDir *d, version_t v, MDSContext *f) : + CDirIOContext(d), fin(f), omap_version(v), ret(0) { } void finish(int r) { + if (omap_version < dir->get_committed_version()) { + omap.clear(); + dir->_omap_fetch(fin, {}); + return; + } + // merge results if (omap.empty()) { omap.swap(omap_more); @@ -1621,7 +1628,7 @@ public: omap.insert(omap_more.begin(), omap_more.end()); } if (more) { - dir->_omap_fetch_more(hdrbl, omap, fin); + dir->_omap_fetch_more(omap_version, hdrbl, omap, fin); } else { dir->_omap_fetched(hdrbl, omap, !fin, r); if (fin) @@ -1636,6 +1643,7 @@ public: class C_IO_Dir_OMAP_Fetched : public CDirIOContext { MDSContext *fin; public: + const version_t omap_version; bufferlist hdrbl; bool more = false; map<string, bufferlist> omap; @@ -1643,20 +1651,30 @@ public: int ret1, ret2, ret3; C_IO_Dir_OMAP_Fetched(CDir *d, MDSContext *f) : - CDirIOContext(d), fin(f), ret1(0), ret2(0), ret3(0) { } + CDirIOContext(d), fin(f), + omap_version(d->get_committing_version()), + ret1(0), ret2(0), ret3(0) { } void finish(int r) override { // check the correctness of backtrace if (r >= 0 && ret3 != -ECANCELED) dir->inode->verify_diri_backtrace(btbl, ret3); if (r >= 0) r = ret1; if (r >= 0) r = ret2; + if (more) { - dir->_omap_fetch_more(hdrbl, omap, fin); - } else { - dir->_omap_fetched(hdrbl, omap, !fin, r); - if (fin) - fin->complete(r); + if (omap_version < dir->get_committed_version()) { + omap.clear(); + dir->_omap_fetch(fin, {}); + } else { + dir->_omap_fetch_more(omap_version, hdrbl, omap, fin); + } + return; } + + dir->_omap_fetched(hdrbl, omap, !fin, r); + if (fin) + fin->complete(r); + } void print(ostream& out) const override { out << "dirfrag_fetch(" << dir->dirfrag() << ")"; @@ -1696,15 +1714,13 @@ void CDir::_omap_fetch(MDSContext *c, const std::set<dentry_key_t>& keys) new C_OnFinisher(fin, mdcache->mds->finisher)); } -void CDir::_omap_fetch_more( - bufferlist& hdrbl, - map<string, bufferlist>& omap, - MDSContext *c) +void CDir::_omap_fetch_more(version_t omap_version, bufferlist& hdrbl, + map<string, bufferlist>& omap, MDSContext *c) { // we have more omap keys to fetch! object_t oid = get_ondisk_object(); object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool()); - C_IO_Dir_OMAP_FetchedMore *fin = new C_IO_Dir_OMAP_FetchedMore(this, c); + auto fin = new C_IO_Dir_OMAP_FetchedMore(this, omap_version, c); fin->hdrbl = std::move(hdrbl); fin->omap.swap(omap); ObjectOperation rd; |