diff options
author | Yan, Zheng <zyan@redhat.com> | 2019-09-16 14:58:23 +0200 |
---|---|---|
committer | Yan, Zheng <zyan@redhat.com> | 2019-12-12 19:04:12 +0100 |
commit | eab59a85e121e3e42de8456f36dcec76863b8aba (patch) | |
tree | e8ef3f3c9511693dd2a155ffb4915a0ea80d2f6a /src/mds/Migrator.cc | |
parent | mds: add 'path_locked' flag to MDCache::find_ino_peers() (diff) | |
download | ceph-eab59a85e121e3e42de8456f36dcec76863b8aba.tar.xz ceph-eab59a85e121e3e42de8456f36dcec76863b8aba.zip |
mds: adjust locking for subtree migration
Take snap rdlocks (instead of taking dentry locks) on subtree's ancestor
inodes. Path to subtree is stable after they are all locked.
For dirfragtree locks on subtree bounds, it's not convenient to rdlock
them in top-down order (paths to them are not stable). The solution is
kicking them to SYNC state and try taking rdlocks on all of them.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Diffstat (limited to 'src/mds/Migrator.cc')
-rw-r--r-- | src/mds/Migrator.cc | 139 |
1 files changed, 83 insertions, 56 deletions
diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 9ab9824527e..23f0179922c 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -737,37 +737,42 @@ public: }; -void Migrator::get_export_lock_set(CDir *dir, MutationImpl::LockOpVec& lov) +bool Migrator::export_try_grab_locks(CDir *dir, MutationRef& mut) { - // path - vector<CDentry*> trace; - cache->make_trace(trace, dir->inode); + CInode *diri = dir->get_inode(); - set<CDir*> wouldbe_bounds; - cache->get_wouldbe_subtree_bounds(dir, wouldbe_bounds); + if (!diri->filelock.can_wrlock(diri->get_loner()) || + !diri->nestlock.can_wrlock(diri->get_loner())) + return false; - lov.reserve(trace.size() + wouldbe_bounds.size() + 8); + MutationImpl::LockOpVec lov; - for (auto& dn : trace) - lov.add_rdlock(&dn->lock); + set<CDir*> wouldbe_bounds; + set<CInode*> bound_inodes; + cache->get_wouldbe_subtree_bounds(dir, wouldbe_bounds); + for (auto& bound : wouldbe_bounds) + bound_inodes.insert(bound->get_inode()); + for (auto& in : bound_inodes) + lov.add_rdlock(&in->dirfragtreelock); + + lov.add_rdlock(&diri->dirfragtreelock); + + CInode* in = diri; + while (true) { + lov.add_rdlock(&in->snaplock); + CDentry* pdn = in->get_projected_parent_dn(); + if (!pdn) + break; + in = pdn->get_dir()->get_inode(); + } - // prevent scatter gather race - lov.add_rdlock(&dir->get_inode()->dirfragtreelock); + if (!mds->locker->rdlock_try_set(lov, mut)) + return false; - // bound dftlocks: - // NOTE: We need to take an rdlock on bounding dirfrags during - // migration for a rather irritating reason: when we export the - // bound inode, we need to send scatterlock state for the dirfrags - // as well, so that the new auth also gets the correct info. If we - // race with a refragment, this info is useless, as we can't - // redivvy it up. And it's needed for the scatterlocks to work - // properly: when the auth is in a sync/lock state it keeps each - // dirfrag's portion in the local (auth OR replica) dirfrag. - for (auto& dir : wouldbe_bounds) - lov.add_rdlock(&dir->get_inode()->dirfragtreelock); + mds->locker->wrlock_force(&diri->filelock, mut); + mds->locker->wrlock_force(&diri->nestlock, mut); - // above code may add duplicated locks - lov.sort_and_merge(); + return true; } @@ -1067,26 +1072,54 @@ void Migrator::dispatch_export_dir(MDRequestRef& mdr, int count) } // locks? - MutationImpl::LockOpVec lov; - get_export_lock_set(dir, lov); - // If auth MDS of the subtree root inode is neither the exporter MDS - // nor the importer MDS and it gathers subtree root's fragstat/neststat - // while the subtree is exporting. It's possible that the exporter MDS - // and the importer MDS both are auth MDS of the subtree root or both - // are not auth MDS of the subtree root at the time they receive the - // lock messages. So the auth MDS of the subtree root inode may get no - // or duplicated fragstat/neststat for the subtree root dirfrag. - lov.lock_scatter_gather(&dir->get_inode()->filelock); - lov.lock_scatter_gather(&dir->get_inode()->nestlock); - if (dir->get_inode()->is_auth()) { - dir->get_inode()->filelock.set_scatter_wanted(); - dir->get_inode()->nestlock.set_scatter_wanted(); - } - - if (!mds->locker->acquire_locks(mdr, lov, NULL, true)) { - if (mdr->aborted) - export_try_cancel(dir); - return; + if (!(mdr->locking_state & MutationImpl::ALL_LOCKED)) { + MutationImpl::LockOpVec lov; + // If auth MDS of the subtree root inode is neither the exporter MDS + // nor the importer MDS and it gathers subtree root's fragstat/neststat + // while the subtree is exporting. It's possible that the exporter MDS + // and the importer MDS both are auth MDS of the subtree root or both + // are not auth MDS of the subtree root at the time they receive the + // lock messages. So the auth MDS of the subtree root inode may get no + // or duplicated fragstat/neststat for the subtree root dirfrag. + lov.lock_scatter_gather(&dir->get_inode()->filelock); + lov.lock_scatter_gather(&dir->get_inode()->nestlock); + if (dir->get_inode()->is_auth()) { + dir->get_inode()->filelock.set_scatter_wanted(); + dir->get_inode()->nestlock.set_scatter_wanted(); + } + lov.add_rdlock(&dir->get_inode()->dirfragtreelock); + + if (!mds->locker->acquire_locks(mdr, lov, nullptr, true)) { + if (mdr->aborted) + export_try_cancel(dir); + return; + } + + lov.clear(); + // bound dftlocks: + // NOTE: We need to take an rdlock on bounding dirfrags during + // migration for a rather irritating reason: when we export the + // bound inode, we need to send scatterlock state for the dirfrags + // as well, so that the new auth also gets the correct info. If we + // race with a refragment, this info is useless, as we can't + // redivvy it up. And it's needed for the scatterlocks to work + // properly: when the auth is in a sync/lock state it keeps each + // dirfrag's portion in the local (auth OR replica) dirfrag. + set<CDir*> wouldbe_bounds; + set<CInode*> bound_inodes; + cache->get_wouldbe_subtree_bounds(dir, wouldbe_bounds); + for (auto& bound : wouldbe_bounds) + bound_inodes.insert(bound->get_inode()); + for (auto& in : bound_inodes) + lov.add_rdlock(&in->dirfragtreelock); + + if (!mds->locker->rdlock_try_set(lov, mdr)) + return; + + if (!mds->locker->try_rdlock_snap_layout(dir->get_inode(), mdr)) + return; + + mdr->locking_state |= MutationImpl::ALL_LOCKED; } ceph_assert(g_conf()->mds_kill_export_at != 1); @@ -1314,28 +1347,20 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid) ceph_assert(it->second.state == EXPORT_FREEZING); ceph_assert(dir->is_frozen_tree_root()); - CInode *diri = dir->get_inode(); + it->second.mut = new MutationImpl(); // ok, try to grab all my locks. - MutationImpl::LockOpVec lov; - get_export_lock_set(dir, lov); + CInode *diri = dir->get_inode(); if ((diri->is_auth() && diri->is_frozen()) || - !mds->locker->can_rdlock_set(lov) || - // for pinning scatter gather. loner has a higher chance to get wrlock - !diri->filelock.can_wrlock(diri->get_loner()) || - !diri->nestlock.can_wrlock(diri->get_loner())) { + !export_try_grab_locks(dir, it->second.mut)) { dout(7) << "export_dir couldn't acquire all needed locks, failing. " << *dir << dendl; export_try_cancel(dir); return; } - it->second.mut = new MutationImpl(); if (diri->is_auth()) it->second.mut->auth_pin(diri); - mds->locker->rdlock_take_set(lov, it->second.mut); - mds->locker->wrlock_force(&diri->filelock, it->second.mut); - mds->locker->wrlock_force(&diri->nestlock, it->second.mut); cache->show_subtrees(); @@ -2301,7 +2326,9 @@ void Migrator::handle_export_discover(const cref_t<MExportDirDiscover> &m, bool filepath fpath(m->get_path()); vector<CDentry*> trace; MDRequestRef null_ref; - int r = cache->path_traverse(null_ref, cf, fpath, MDS_TRAVERSE_DISCOVER, &trace); + int r = cache->path_traverse(null_ref, cf, fpath, + MDS_TRAVERSE_DISCOVER | MDS_TRAVERSE_PATH_LOCKED, + &trace); if (r > 0) return; if (r < 0) { dout(7) << "handle_export_discover failed to discover or not dir " << m->get_path() << ", NAK" << dendl; |