summaryrefslogtreecommitdiffstats
path: root/src/mds/Migrator.cc
diff options
context:
space:
mode:
authorSidharth Anupkrishnan <sanupkri@redhat.com>2020-01-14 14:35:47 +0100
committerPatrick Donnelly <pdonnell@redhat.com>2020-06-25 00:43:31 +0200
commitced15ed7ef70ff832d4bebedecb89944276b0395 (patch)
tree4ea62525bc8d928f7e46d8147b500b0da1e96531 /src/mds/Migrator.cc
parentmds: trim pinned and empty subtrees (diff)
downloadceph-ced15ed7ef70ff832d4bebedecb89944276b0395.tar.xz
ceph-ced15ed7ef70ff832d4bebedecb89944276b0395.zip
mds: add ephemeral pinning for subtrees
This PR introduces inode xattrs export_ephemeral_random and export_ephemeral_distributed which enables two different metadata distribution strategies - the first being suitable for a more depthwise scaling of metadata (height of the tree keeps increasing) and the latter for horizontal scaling (many subtrees under a single parent). export_ephemeral_distributed applies is not hierarchical. Any direct descendant directory (i.e. a child directory) has an ephemeral export pin applied to it according to a consistent hash of the child directory inode number. export_ephemeral_distributed is hierarchical like "export_pin". Any CDir loaded into the cache may be ephemerally pinned to a random rank. Like "export_ephemeral_distributed", the random rank is determined by a consistent hash. The metadata distribution strategies are facilitated by using John Lamping and Eric Veach's Jump Consistent Hashing as the consistent hash algorithm. This hashing algorithm eliminates the need to store the data structures representing the consistent hash cluster state and performs as well as Akamai's original implementation providing a fairly uniform distribution. This algorithm only works for distributed systems with numbered buckets (nodes) arranged in ascending order and cluster resizes does not produce any holes in the arrangement of nodes i.e (0, 1, 2, 3) --[removing node 1]--> (0, 1, 2). CephFS satisfies these conditions as the MDSs are arranged as numbered ranks and cluster modifications does not produce any holes in the resulting arrangement of ranks. Fixes: https://tracker.ceph.com/issues/41302 Signed-off-by: Sidharth Anupkrishnan <sanupkri@redhat.com> Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
Diffstat (limited to 'src/mds/Migrator.cc')
-rw-r--r--src/mds/Migrator.cc30
1 files changed, 28 insertions, 2 deletions
diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc
index 355cc3c0dfa..f5600031e79 100644
--- a/src/mds/Migrator.cc
+++ b/src/mds/Migrator.cc
@@ -2236,6 +2236,11 @@ void Migrator::export_finish(CDir *dir)
mut->cleanup();
}
+ if (dir->get_inode()->is_export_ephemeral_distributed_migrating)
+ dir->get_inode()->finish_export_ephemeral_distributed_migration();
+ else if (dir->get_inode()->is_export_ephemeral_random_migrating)
+ dir->get_inode()->finish_export_ephemeral_random_migration();
+
if (parent)
child_export_finish(parent, true);
@@ -3135,7 +3140,29 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last)
MutationRef mut = it->second.mut;
import_state.erase(it);
- mds->mdlog->start_submit_entry(new EImportFinish(dir, true));
+ // start the journal entry
+ EImportFinish *le = new EImportFinish(dir, true);
+ mds->mdlog->start_entry(le);
+
+ CInode *in = dir->get_inode();
+
+ CDentry *pdn = in->get_parent_dn();
+
+ if (in->get_export_ephemeral_random_pin(false)) { // Lazy checks. FIXME
+ le->metablob.add_primary_dentry(pdn, in, false, false, false, false,
+ false, true);
+ in->is_export_ephemeral_random_pinned = true;
+ cache->ephemeral_pins.push_back(&in->ephemeral_pin_inode);
+ } else if (pdn->get_dir()->get_inode()
+ && pdn->get_dir()->get_inode()->get_export_ephemeral_distributed_pin()) {
+ le->metablob.add_primary_dentry(pdn, in, false, false, false, false,
+ true, false);
+ in->is_export_ephemeral_distributed_pinned = true;
+ cache->ephemeral_pins.push_back(&in->ephemeral_pin_inode);
+ }
+
+ // log it
+ mds->mdlog->submit_entry(le);
// process delayed expires
cache->process_delayed_expire(dir);
@@ -3176,7 +3203,6 @@ void Migrator::import_finish(CDir *dir, bool notify, bool last)
}
}
-
void Migrator::decode_import_inode(CDentry *dn, bufferlist::const_iterator& blp,
mds_rank_t oldauth, LogSegment *ls,
map<CInode*, map<client_t,Capability::Export> >& peer_exports,