diff options
-rw-r--r-- | src/include/compact_map.h | 15 | ||||
-rw-r--r-- | src/include/compact_set.h | 19 | ||||
-rw-r--r-- | src/mds/CDir.cc | 15 | ||||
-rw-r--r-- | src/mds/CDir.h | 1 | ||||
-rw-r--r-- | src/mds/CInode.cc | 547 | ||||
-rw-r--r-- | src/mds/CInode.h | 248 | ||||
-rw-r--r-- | src/mds/Locker.cc | 129 | ||||
-rw-r--r-- | src/mds/Locker.h | 8 | ||||
-rw-r--r-- | src/mds/MDCache.cc | 120 | ||||
-rw-r--r-- | src/mds/MDCache.h | 2 | ||||
-rw-r--r-- | src/mds/MDSDaemon.cc | 3 | ||||
-rw-r--r-- | src/mds/RecoveryQueue.cc | 2 | ||||
-rw-r--r-- | src/mds/Server.cc | 373 | ||||
-rw-r--r-- | src/mds/StrayManager.cc | 40 | ||||
-rw-r--r-- | src/mds/events/EMetaBlob.h | 21 | ||||
-rw-r--r-- | src/mds/journal.cc | 19 | ||||
-rw-r--r-- | src/mds/mdstypes.cc | 350 | ||||
-rw-r--r-- | src/mds/mdstypes.h | 441 | ||||
-rw-r--r-- | src/test/encoding/types.h | 4 | ||||
-rw-r--r-- | src/tools/cephfs/DataScan.cc | 2 |
20 files changed, 1202 insertions, 1157 deletions
diff --git a/src/include/compact_map.h b/src/include/compact_map.h index 52a7792adc8..9ccf97c1139 100644 --- a/src/include/compact_map.h +++ b/src/include/compact_map.h @@ -181,13 +181,18 @@ public: size_t count (const Key& k) const { return map ? map->count(k) : 0; } - void erase (iterator p) { + iterator erase (iterator p) { if (map) { assert(this == p.map); - map->erase(p.it); + auto it = map->erase(p.it); if (map->empty()) { free_internal(); + return iterator(this); + } else { + return iterator(this, it); } + } else { + return iterator(this); } } size_t erase (const Key& k) { @@ -216,6 +221,12 @@ public: alloc_internal(); return iterator(this, map->insert(val)); } + template <class... Args> + std::pair<iterator,bool> emplace ( Args&&... args ) { + alloc_internal(); + auto em = map->emplace(std::forward<Args>(args)...); + return std::pair<iterator,bool>(iterator(this, em.first), em.second); + } iterator begin() { if (!map) return iterator(this); diff --git a/src/include/compact_set.h b/src/include/compact_set.h index dd2ddd31165..e41ca95ed81 100644 --- a/src/include/compact_set.h +++ b/src/include/compact_set.h @@ -140,12 +140,18 @@ public: size_t count(const T& t) const { return set ? set->count(t) : 0; } - void erase (iterator p) { + iterator erase (iterator p) { if (set) { assert(this == p.set); - set->erase(p.it); - if (set->empty()) + auto it = set->erase(p.it); + if (set->empty()) { free_internal(); + return iterator(this); + } else { + return iterator(this, it); + } + } else { + return iterator(this); } } size_t erase (const T& t) { @@ -175,6 +181,13 @@ public: std::pair<typename Set::iterator,bool> r = set->insert(t); return std::make_pair(iterator(this, r.first), r.second); } + template <class... Args> + std::pair<iterator,bool> emplace ( Args&&... args ) { + alloc_internal(); + auto em = set->emplace(std::forward<Args>(args)...); + return std::pair<iterator,bool>(iterator(this, em.first), em.second); + } + iterator begin() { if (!set) return iterator(this); diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index cb0cd07778d..9fb6a323fe8 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -200,14 +200,11 @@ CDir::CDir(CInode *in, frag_t fg, MDCache *mdcache, bool auth) : num_dentries_auth_subtree_nested(0), dir_auth(CDIR_AUTH_DEFAULT) { - state = STATE_INITIAL; - memset(&fnode, 0, sizeof(fnode)); // auth assert(in->is_dir()); - if (auth) - state |= STATE_AUTH; + if (auth) state_set(STATE_AUTH); } /** @@ -825,7 +822,7 @@ void CDir::steal_dentry(CDentry *dn) if (dn->get_linkage()->is_primary()) { CInode *in = dn->get_linkage()->get_inode(); - inode_t *pi = in->get_projected_inode(); + auto pi = in->get_projected_inode(); if (dn->get_linkage()->get_inode()->is_dir()) fnode.fragstat.nsubdirs++; else @@ -1143,7 +1140,7 @@ void CDir::merge(list<CDir*>& subs, list<MDSInternalContextBase*>& waiters, bool void CDir::resync_accounted_fragstat() { fnode_t *pf = get_projected_fnode(); - inode_t *pi = inode->get_projected_inode(); + auto pi = inode->get_projected_inode(); if (pf->accounted_fragstat.version != pi->dirstat.version) { pf->fragstat.version = pi->dirstat.version; @@ -1158,7 +1155,7 @@ void CDir::resync_accounted_fragstat() void CDir::resync_accounted_rstat() { fnode_t *pf = get_projected_fnode(); - inode_t *pi = inode->get_projected_inode(); + auto pi = inode->get_projected_inode(); if (pf->accounted_rstat.version != pi->rstat.version) { pf->rstat.version = pi->rstat.version; @@ -1178,8 +1175,8 @@ void CDir::assimilate_dirty_rstat_inodes() if (in->is_frozen()) continue; - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); + auto &pi = in->project_inode(); + pi.inode.version = in->pre_dirty(); inode->mdcache->project_rstat_inode_to_frag(in, this, 0, 0, NULL); } diff --git a/src/mds/CDir.h b/src/mds/CDir.h index f9137491829..3e6bc0eb2d2 100644 --- a/src/mds/CDir.h +++ b/src/mds/CDir.h @@ -95,7 +95,6 @@ public: // common states static const unsigned STATE_CLEAN = 0; - static const unsigned STATE_INITIAL = 0; // these state bits are preserved by an import/export // ...except if the directory is hashed, in which case none of them are! diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index ab68a7e054e..e01a0611393 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -147,14 +147,14 @@ ostream& operator<<(ostream& out, const CInode& in) if (in.is_frozen_inode()) out << " FROZEN"; if (in.is_frozen_auth_pin()) out << " FROZEN_AUTHPIN"; - const inode_t *pi = in.get_projected_inode(); + const CInode::mempool_inode *pi = in.get_projected_inode(); if (pi->is_truncating()) out << " truncating(" << pi->truncate_from << " to " << pi->truncate_size << ")"; if (in.inode.is_dir()) { out << " " << in.inode.dirstat; if (g_conf->mds_debug_scatterstat && in.is_projected()) { - const inode_t *pi = in.get_projected_inode(); + const CInode::mempool_inode *pi = in.get_projected_inode(); out << "->" << pi->dirstat; } } else { @@ -168,7 +168,7 @@ ostream& operator<<(ostream& out, const CInode& in) if (!(in.inode.rstat == in.inode.accounted_rstat)) out << "/" << in.inode.accounted_rstat; if (g_conf->mds_debug_scatterstat && in.is_projected()) { - const inode_t *pi = in.get_projected_inode(); + const CInode::mempool_inode *pi = in.get_projected_inode(); out << "->" << pi->rstat; if (!(pi->rstat == pi->accounted_rstat)) out << "/" << pi->accounted_rstat; @@ -229,12 +229,12 @@ ostream& operator<<(ostream& out, const CInode& in) } if (!in.get_mds_caps_wanted().empty()) { out << " mcw={"; - for (compact_map<int,int>::const_iterator p = in.get_mds_caps_wanted().begin(); - p != in.get_mds_caps_wanted().end(); - ++p) { - if (p != in.get_mds_caps_wanted().begin()) + bool first = true; + for (const auto &p : in.get_mds_caps_wanted()) { + if (!first) out << ','; - out << p->first << '=' << ccap_string(p->second); + out << p.first << '=' << ccap_string(p.second); + first = false; } out << '}'; } @@ -283,7 +283,7 @@ void CInode::add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client auth_pin(this); // pin head inode... } - set<client_t>& clients = client_need_snapflush[snapid]; + auto &clients = client_need_snapflush[snapid]; if (clients.empty()) snapin->auth_pin(this); // ...and pin snapped/old inode! @@ -293,18 +293,18 @@ void CInode::add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client void CInode::remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client) { dout(10) << __func__ << " client." << client << " snapid " << snapid << " on " << snapin << dendl; - compact_map<snapid_t, std::set<client_t> >::iterator p = client_need_snapflush.find(snapid); - if (p == client_need_snapflush.end()) { + auto it = client_need_snapflush.find(snapid); + if (it == client_need_snapflush.end()) { dout(10) << " snapid not found" << dendl; return; } - if (!p->second.count(client)) { + size_t n = it->second.erase(client); + if (n == 0) { dout(10) << " client not found" << dendl; return; } - p->second.erase(client); - if (p->second.empty()) { - client_need_snapflush.erase(p); + if (it->second.empty()) { + client_need_snapflush.erase(it); snapin->auth_unpin(this); if (client_need_snapflush.empty()) { @@ -318,16 +318,16 @@ bool CInode::split_need_snapflush(CInode *cowin, CInode *in) { dout(10) << __func__ << " [" << cowin->first << "," << cowin->last << "] for " << *cowin << dendl; bool need_flush = false; - for (compact_map<snapid_t, set<client_t> >::iterator p = client_need_snapflush.lower_bound(cowin->first); - p != client_need_snapflush.end() && p->first < in->first; ) { - compact_map<snapid_t, set<client_t> >::iterator q = p; - ++p; - assert(!q->second.empty()); - if (cowin->last >= q->first) { + for (auto it = client_need_snapflush.lower_bound(cowin->first); + it != client_need_snapflush.end() && it->first < in->first; ) { + assert(!it->second.empty()); + if (cowin->last >= it->first) { cowin->auth_pin(this); need_flush = true; - } else - client_need_snapflush.erase(q); + ++it; + } else { + it = client_need_snapflush.erase(it); + } in->auth_unpin(this); } return need_flush; @@ -361,105 +361,102 @@ void CInode::clear_dirty_rstat() } } -inode_t *CInode::project_inode(map<string,bufferptr> *px) +/* Ideally this function would be subsumed by project_inode but it is also + * needed by CInode::project_past_snaprealm_parent so we keep it. + */ +sr_t &CInode::project_snaprealm(projected_inode &pi) { - if (projected_nodes.empty()) { - projected_nodes.push_back(new projected_inode_t(new inode_t(inode))); - if (px) - *px = xattrs; + const sr_t *cur_srnode = get_projected_srnode(); + + assert(!pi.snapnode); + if (cur_srnode) { + pi.snapnode.reset(new sr_t(*cur_srnode)); } else { - projected_nodes.push_back(new projected_inode_t( - new inode_t(*projected_nodes.back()->inode))); - if (px) - *px = *get_projected_xattrs(); + pi.snapnode.reset(new sr_t()); + pi.snapnode->created = 0; + pi.snapnode->current_parent_since = get_oldest_snap(); } + ++num_projected_srnodes; - projected_inode_t &pi = *projected_nodes.back(); + dout(10) << __func__ << " " << pi.snapnode.get() << dendl; + return *pi.snapnode.get(); +} - if (px) { - pi.xattrs = px; - ++num_projected_xattrs; - } +CInode::projected_inode &CInode::project_inode(bool xattr, bool snap) +{ + auto &pi = projected_nodes.empty() ? + projected_nodes.emplace_back(inode) : + projected_nodes.emplace_back(projected_nodes.back().inode); if (scrub_infop && scrub_infop->last_scrub_dirty) { - pi.inode->last_scrub_stamp = scrub_infop->last_scrub_stamp; - pi.inode->last_scrub_version = scrub_infop->last_scrub_version; + pi.inode.last_scrub_stamp = scrub_infop->last_scrub_stamp; + pi.inode.last_scrub_version = scrub_infop->last_scrub_version; scrub_infop->last_scrub_dirty = false; scrub_maybe_delete_info(); } - dout(15) << __func__ << " " << pi.inode << dendl; - return pi.inode; + + if (xattr) { + pi.xattrs.reset(new mempool_xattr_map(*get_projected_xattrs())); + ++num_projected_xattrs; + } + + if (snap) { + project_snaprealm(pi); + } + + dout(15) << __func__ << " " << pi.inode.ino << dendl; + return pi; } void CInode::pop_and_dirty_projected_inode(LogSegment *ls) { assert(!projected_nodes.empty()); - dout(15) << __func__ << " " << projected_nodes.front()->inode - << " v" << projected_nodes.front()->inode->version << dendl; + auto &front = projected_nodes.front(); + dout(15) << __func__ << " " << front.inode.ino + << " v" << front.inode.version << dendl; int64_t old_pool = inode.layout.pool_id; - mark_dirty(projected_nodes.front()->inode->version, ls); - inode = *projected_nodes.front()->inode; + mark_dirty(front.inode.version, ls); + inode = front.inode; if (inode.is_backtrace_updated()) _mark_dirty_parent(ls, old_pool != inode.layout.pool_id); - map<string,bufferptr> *px = projected_nodes.front()->xattrs; - if (px) { + if (front.xattrs) { --num_projected_xattrs; - xattrs = *px; - delete px; + xattrs = *front.xattrs; } - if (projected_nodes.front()->snapnode) { - pop_projected_snaprealm(projected_nodes.front()->snapnode); + auto &snapnode = front.snapnode; + if (snapnode) { + pop_projected_snaprealm(snapnode.get()); --num_projected_srnodes; } - delete projected_nodes.front()->inode; - delete projected_nodes.front(); - projected_nodes.pop_front(); } -sr_t *CInode::project_snaprealm(snapid_t snapid) -{ - const sr_t *cur_srnode = get_projected_srnode(); - sr_t *new_srnode; - - if (cur_srnode) { - new_srnode = new sr_t(*cur_srnode); - } else { - new_srnode = new sr_t(); - new_srnode->created = snapid; - new_srnode->current_parent_since = get_oldest_snap(); - } - dout(10) << __func__ << " " << new_srnode << dendl; - projected_nodes.back()->snapnode = new_srnode; - ++num_projected_srnodes; - return new_srnode; -} - /* if newparent != parent, add parent to past_parents if parent DNE, we need to find what the parent actually is and fill that in */ void CInode::project_past_snaprealm_parent(SnapRealm *newparent) { - sr_t *new_snap = project_snaprealm(); + assert(!projected_nodes.empty()); + sr_t &new_snap = project_snaprealm(projected_nodes.back()); SnapRealm *oldparent; if (!snaprealm) { oldparent = find_snaprealm(); - new_snap->seq = oldparent->get_newest_seq(); + new_snap.seq = oldparent->get_newest_seq(); } else oldparent = snaprealm->parent; if (newparent != oldparent) { snapid_t oldparentseq = oldparent->get_newest_seq(); - if (oldparentseq + 1 > new_snap->current_parent_since) { - new_snap->past_parents[oldparentseq].ino = oldparent->inode->ino(); - new_snap->past_parents[oldparentseq].first = new_snap->current_parent_since; + if (oldparentseq + 1 > new_snap.current_parent_since) { + new_snap.past_parents[oldparentseq].ino = oldparent->inode->ino(); + new_snap.past_parents[oldparentseq].first = new_snap.current_parent_since; } - new_snap->current_parent_since = std::max(oldparentseq, newparent->get_last_created()) + 1; + new_snap.current_parent_since = std::max(oldparentseq, newparent->get_last_created()) + 1; } } @@ -481,7 +478,6 @@ void CInode::pop_projected_snaprealm(sr_t *next_snaprealm) << " -> " << next_snaprealm->past_parents << dendl; } snaprealm->srnode = *next_snaprealm; - delete next_snaprealm; // we should be able to open these up (or have them already be open). bool ok = snaprealm->_open_parents(NULL); @@ -520,7 +516,7 @@ frag_t InodeStoreBase::pick_dirfrag(std::string_view dn) bool CInode::get_dirfrags_under(frag_t fg, list<CDir*>& ls) { bool all = true; - list<frag_t> fglist; + std::list<frag_t> fglist; dirfragtree.get_leaves_under(fg, fglist); for (list<frag_t>::iterator p = fglist.begin(); p != fglist.end(); ++p) if (dirfrags.count(*p)) @@ -533,19 +529,20 @@ bool CInode::get_dirfrags_under(frag_t fg, list<CDir*>& ls) fragtree_t tmpdft; tmpdft.force_to_leaf(g_ceph_context, fg); - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); p != dirfrags.end(); ++p) { - tmpdft.force_to_leaf(g_ceph_context, p->first); - if (fg.contains(p->first) && !dirfragtree.is_leaf(p->first)) - ls.push_back(p->second); + for (auto &p : dirfrags) { + tmpdft.force_to_leaf(g_ceph_context, p.first); + if (fg.contains(p.first) && !dirfragtree.is_leaf(p.first)) + ls.push_back(p.second); } all = true; tmpdft.get_leaves_under(fg, fglist); - for (list<frag_t>::iterator p = fglist.begin(); p != fglist.end(); ++p) - if (!dirfrags.count(*p)) { + for (const auto &p : fglist) { + if (!dirfrags.count(p)) { all = false; break; } + } return all; } @@ -553,10 +550,10 @@ bool CInode::get_dirfrags_under(frag_t fg, list<CDir*>& ls) void CInode::verify_dirfrags() { bool bad = false; - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); p != dirfrags.end(); ++p) { - if (!dirfragtree.is_leaf(p->first)) { - dout(0) << "have open dirfrag " << p->first << " but not leaf in " << dirfragtree - << ": " << *p->second << dendl; + for (const auto &p : dirfrags) { + if (!dirfragtree.is_leaf(p.first)) { + dout(0) << "have open dirfrag " << p.first << " but not leaf in " << dirfragtree + << ": " << *p.second << dendl; bad = true; } } @@ -566,10 +563,10 @@ void CInode::verify_dirfrags() void CInode::force_dirfrags() { bool bad = false; - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); p != dirfrags.end(); ++p) { - if (!dirfragtree.is_leaf(p->first)) { - dout(0) << "have open dirfrag " << p->first << " but not leaf in " << dirfragtree - << ": " << *p->second << dendl; + for (auto &p : dirfrags) { + if (!dirfragtree.is_leaf(p.first)) { + dout(0) << "have open dirfrag " << p.first << " but not leaf in " << dirfragtree + << ": " << *p.second << dendl; bad = true; } } @@ -604,31 +601,28 @@ CDir *CInode::get_approx_dirfrag(frag_t fg) return NULL; } -void CInode::get_dirfrags(list<CDir*>& ls) +void CInode::get_dirfrags(std::list<CDir*>& ls) { // all dirfrags - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) - ls.push_back(p->second); + for (const auto &p : dirfrags) { + ls.push_back(p.second); + } } void CInode::get_nested_dirfrags(list<CDir*>& ls) { // dirfrags in same subtree - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) - if (!p->second->is_subtree_root()) - ls.push_back(p->second); + for (const auto &p : dirfrags) { + if (!p.second->is_subtree_root()) + ls.push_back(p.second); + } } void CInode::get_subtree_dirfrags(list<CDir*>& ls) { // dirfrags that are roots of new subtrees - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) - if (p->second->is_subtree_root()) - ls.push_back(p->second); + for (const auto &p : dirfrags) { + if (p.second->is_subtree_root()) + ls.push_back(p.second); + } } @@ -696,23 +690,21 @@ void CInode::close_dirfrags() bool CInode::has_subtree_root_dirfrag(int auth) { - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) - if (p->second->is_subtree_root() && - (auth == -1 || p->second->dir_auth.first == auth)) + for (const auto &p : dirfrags) { + if (p.second->is_subtree_root() && + (auth == -1 || p.second->dir_auth.first == auth)) return true; + } return false; } bool CInode::has_subtree_or_exporting_dirfrag() { - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) - if (p->second->is_subtree_root() || - p->second->state_test(CDir::STATE_EXPORTING)) + for (const auto &p : dirfrags) { + if (p.second->is_subtree_root() || + p.second->state_test(CDir::STATE_EXPORTING)) return true; + } return false; } @@ -720,11 +712,9 @@ void CInode::get_stickydirs() { if (stickydir_ref == 0) { get(PIN_STICKYDIRS); - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - p->second->state_set(CDir::STATE_STICKY); - p->second->get(CDir::PIN_STICKY); + for (const auto &p : dirfrags) { + p.second->state_set(CDir::STATE_STICKY); + p.second->get(CDir::PIN_STICKY); } } stickydir_ref++; @@ -736,11 +726,9 @@ void CInode::put_stickydirs() stickydir_ref--; if (stickydir_ref == 0) { put(PIN_STICKYDIRS); - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - p->second->state_clear(CDir::STATE_STICKY); - p->second->put(CDir::PIN_STICKY); + for (const auto &p : dirfrags) { + p.second->state_clear(CDir::STATE_STICKY); + p.second->put(CDir::PIN_STICKY); } } } @@ -899,11 +887,11 @@ version_t CInode::pre_dirty() assert(is_base()); pv = get_projected_version() + 1; } - // force update backtrace for old format inode (see inode_t::decode) + // force update backtrace for old format inode (see mempool_inode::decode) if (inode.backtrace_version == 0 && !projected_nodes.empty()) { - inode_t *pi = projected_nodes.back()->inode; - if (pi->backtrace_version == 0) - pi->update_backtrace(pv); + mempool_inode &pi = projected_nodes.back().inode; + if (pi.backtrace_version == 0) + pi.update_backtrace(pv); } return pv; } @@ -1137,12 +1125,10 @@ void CInode::build_backtrace(int64_t pool, inode_backtrace_t& bt) in = diri; pdn = in->get_parent_dn(); } - for (compact_set<int64_t>::iterator i = inode.old_pools.begin(); - i != inode.old_pools.end(); - ++i) { + for (auto &p : inode.old_pools) { // don't add our own pool id to old_pools to avoid looping (e.g. setlayout 0, 1, 0) - if (*i != pool) - bt.old_pools.insert(*i); + if (p != pool) + bt.old_pools.insert(p); } } @@ -1204,20 +1190,18 @@ void CInode::store_backtrace(MDSInternalContextBase *fin, int op_prio) // In the case where DIRTYPOOL is set, we update all old pools backtraces // such that anyone reading them will see the new pool ID in // inode_backtrace_t::pool and go read everything else from there. - for (compact_set<int64_t>::iterator p = inode.old_pools.begin(); - p != inode.old_pools.end(); - ++p) { - if (*p == pool) + for (const auto &p : inode.old_pools) { + if (p == pool) continue; - dout(20) << __func__ << ": updating old pool " << *p << dendl; + dout(20) << __func__ << ": updating old pool " << p << dendl; ObjectOperation op; op.priority = op_prio; op.create(false); op.setxattr("parent", parent_bl); - object_locator_t oloc(*p); + object_locator_t oloc(p); mdcache->mds->objecter->mutate(oid, oloc, op, snapc, ceph::real_clock::now(), 0, gather.new_sub()); @@ -1366,8 +1350,11 @@ void InodeStoreBase::decode_bare(bufferlist::iterator &bl, { using ceph::decode; decode(inode, bl); - if (is_symlink()) - decode(symlink, bl); + if (is_symlink()) { + std::string tmp; + decode(tmp, bl); + symlink = std::string_view(tmp); + } decode(dirfragtree, bl); decode(xattrs, bl); decode(snap_blob, bl); @@ -1492,11 +1479,9 @@ void CInode::encode_lock_state(int type, bufferlist& bl) encode(inode.dirstat, bl); // only meaningful if i am auth. bufferlist tmp; __u32 n = 0; - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - frag_t fg = p->first; - CDir *dir = p->second; + for (const auto &p : dirfrags) { + frag_t fg = p.first; + CDir *dir = p.second; if (is_auth() || dir->is_auth()) { fnode_t *pf = dir->get_projected_fnode(); dout(15) << fg << " " << *dir << dendl; @@ -1527,11 +1512,9 @@ void CInode::encode_lock_state(int type, bufferlist& bl) encode(inode.rstat, bl); // only meaningful if i am auth. bufferlist tmp; __u32 n = 0; - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - frag_t fg = p->first; - CDir *dir = p->second; + for (const auto &p : dirfrags) { + frag_t fg = p.first; + CDir *dir = p.second; if (is_auth() || dir->is_auth()) { fnode_t *pf = dir->get_projected_fnode(); dout(10) << fg << " " << *dir << dendl; @@ -1653,15 +1636,13 @@ void CInode::decode_lock_state(int type, bufferlist& bl) // dft was scattered, or we may still be be waiting on the // notify from the auth) dirfragtree.swap(temp); - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - if (!dirfragtree.is_leaf(p->first)) { - dout(10) << " forcing open dirfrag " << p->first << " to leaf (racing with split|merge)" << dendl; - dirfragtree.force_to_leaf(g_ceph_context, p->first); + for (const auto &p : dirfrags) { + if (!dirfragtree.is_leaf(p.first)) { + dout(10) << " forcing open dirfrag " << p.first << " to leaf (racing with split|merge)" << dendl; + dirfragtree.force_to_leaf(g_ceph_context, p.first); } - if (p->second->is_auth()) - p->second->state_clear(CDir::STATE_DIRTYDFT); + if (p.second->is_auth()) + p.second->state_clear(CDir::STATE_DIRTYDFT); } } if (g_conf->mds_debug_frag) @@ -1898,13 +1879,11 @@ void CInode::start_scatter(ScatterLock *lock) { dout(10) << __func__ << " " << *lock << " on " << *this << dendl; assert(is_auth()); - inode_t *pi = get_projected_inode(); + mempool_inode *pi = get_projected_inode(); - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - frag_t fg = p->first; - CDir *dir = p->second; + for (const auto &p : dirfrags) { + frag_t fg = p.first; + CDir *dir = p.second; fnode_t *pf = dir->get_projected_fnode(); dout(20) << fg << " " << *dir << dendl; @@ -1960,7 +1939,7 @@ void CInode::finish_scatter_update(ScatterLock *lock, CDir *dir, MutationRef mut(new MutationImpl()); mut->ls = mdlog->get_current_segment(); - inode_t *pi = get_projected_inode(); + mempool_inode *pi = get_projected_inode(); fnode_t *pf = dir->project_fnode(); const char *ename = 0; @@ -2064,16 +2043,14 @@ void CInode::finish_scatter_gather_update(int type) // adjust summation assert(is_auth()); - inode_t *pi = get_projected_inode(); + mempool_inode *pi = get_projected_inode(); bool touched_mtime = false, touched_chattr = false; dout(20) << " orig dirstat " << pi->dirstat << dendl; pi->dirstat.version++; - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - frag_t fg = p->first; - CDir *dir = p->second; + for (const auto &p : dirfrags) { + frag_t fg = p.first; + CDir *dir = p.second; dout(20) << fg << " " << *dir << dendl; bool update; @@ -2175,14 +2152,12 @@ void CInode::finish_scatter_gather_update(int type) // adjust summation assert(is_auth()); - inode_t *pi = get_projected_inode(); + mempool_inode *pi = get_projected_inode(); dout(20) << " orig rstat " << pi->rstat << dendl; pi->rstat.version++; - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - frag_t fg = p->first; - CDir *dir = p->second; + for (const auto &p : dirfrags) { + frag_t fg = p.first; + CDir *dir = p.second; dout(20) << fg << " " << *dir << dendl; bool update; @@ -2274,10 +2249,8 @@ void CInode::finish_scatter_gather_update_accounted(int type, MutationRef& mut, dout(10) << __func__ << " " << type << " on " << *this << dendl; assert(is_auth()); - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - CDir *dir = p->second; + for (const auto &p : dirfrags) { + CDir *dir = p.second; if (!dir->is_auth() || dir->get_version() == 0 || dir->is_frozen()) continue; @@ -2332,11 +2305,11 @@ void CInode::take_dir_waiting(frag_t fg, list<MDSInternalContextBase*>& ls) if (waiting_on_dir.empty()) return; - compact_map<frag_t, list<MDSInternalContextBase*> >::iterator p = waiting_on_dir.find(fg); - if (p != waiting_on_dir.end()) { + auto it = waiting_on_dir.find(fg); + if (it != waiting_on_dir.end()) { dout(10) << __func__ << " frag " << fg << " on " << *this << dendl; - ls.splice(ls.end(), p->second); - waiting_on_dir.erase(p); + ls.splice(ls.end(), it->second); + waiting_on_dir.erase(it); if (waiting_on_dir.empty()) put(PIN_DIRWAITER); @@ -2368,10 +2341,10 @@ void CInode::take_waiting(uint64_t mask, list<MDSInternalContextBase*>& ls) if ((mask & WAIT_DIR) && !waiting_on_dir.empty()) { // take all dentry waiters while (!waiting_on_dir.empty()) { - compact_map<frag_t, list<MDSInternalContextBase*> >::iterator p = waiting_on_dir.begin(); - dout(10) << __func__ << " dirfrag " << p->first << " on " << *this << dendl; - ls.splice(ls.end(), p->second); - waiting_on_dir.erase(p); + auto it = waiting_on_dir.begin(); + dout(10) << __func__ << " dirfrag " << it->first << " on " << *this << dendl; + ls.splice(ls.end(), it->second); + waiting_on_dir.erase(it); } put(PIN_DIRWAITER); } @@ -2524,10 +2497,8 @@ void CInode::adjust_nested_auth_pins(int a, void *by) if (g_conf->mds_debug_auth_pins) { // audit int s = 0; - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - CDir *dir = p->second; + for (const auto &p : dirfrags) { + CDir *dir = p.second; if (!dir->is_subtree_root() && dir->get_cum_auth_pins()) s++; } @@ -2568,14 +2539,14 @@ snapid_t CInode::get_oldest_snap() return std::min(t, oldest_snap); } -old_inode_t& CInode::cow_old_inode(snapid_t follows, bool cow_head) +CInode::mempool_old_inode& CInode::cow_old_inode(snapid_t follows, bool cow_head) { assert(follows >= first); - inode_t *pi = cow_head ? get_projected_inode() : get_previous_projected_inode(); - map<string,bufferptr> *px = cow_head ? get_projected_xattrs() : get_previous_projected_xattrs(); + mempool_inode *pi = cow_head ? get_projected_inode() : get_previous_projected_inode(); + mempool_xattr_map *px = cow_head ? get_projected_xattrs() : get_previous_projected_xattrs(); - old_inode_t &old = old_inodes[follows]; + mempool_old_inode &old = old_inodes[follows]; old.first = first; old.inode = *pi; old.xattrs = *px; @@ -2602,15 +2573,15 @@ old_inode_t& CInode::cow_old_inode(snapid_t follows, bool cow_head) void CInode::split_old_inode(snapid_t snap) { - compact_map<snapid_t, old_inode_t>::iterator p = old_inodes.lower_bound(snap); - assert(p != old_inodes.end() && p->second.first < snap); + auto it = old_inodes.lower_bound(snap); + assert(it != old_inodes.end() && it->second.first < snap); - old_inode_t &old = old_inodes[snap - 1]; - old = p->second; + mempool_old_inode &old = old_inodes[snap - 1]; + old = it->second; - p->second.first = snap; - dout(10) << __func__ << " " << "[" << old.first << "," << p->first - << "] to [" << snap << "," << p->first << "] on " << *this << dendl; + it->second.first = snap; + dout(10) << __func__ << " " << "[" << old.first << "," << it->first + << "] to [" << snap << "," << it->first << "] on " << *this << dendl; } void CInode::pre_cow_old_inode() @@ -2642,29 +2613,27 @@ void CInode::purge_stale_snap_data(const set<snapid_t>& snaps) { dout(10) << __func__ << " " << snaps << dendl; - if (old_inodes.empty()) - return; - - compact_map<snapid_t,old_inode_t>::iterator p = old_inodes.begin(); - while (p != old_inodes.end()) { - set<snapid_t>::const_iterator q = snaps.lower_bound(p->second.first); - if (q == snaps.end() || *q > p->first) { - dout(10) << " purging old_inode [" << p->second.first << "," << p->first << "]" << dendl; - old_inodes.erase(p++); - } else - ++p; + for (auto it = old_inodes.begin(); it != old_inodes.end(); ) { + const snapid_t &id = it->first; + const auto &s = snaps.lower_bound(it->second.first); + if (s == snaps.end() || *s > id) { + dout(10) << " purging old_inode [" << it->second.first << "," << id << "]" << dendl; + it = old_inodes.erase(it); + } else { + ++it; + } } } /* * pick/create an old_inode */ -old_inode_t * CInode::pick_old_inode(snapid_t snap) +CInode::mempool_old_inode * CInode::pick_old_inode(snapid_t snap) { - compact_map<snapid_t, old_inode_t>::iterator p = old_inodes.lower_bound(snap); // p is first key >= to snap - if (p != old_inodes.end() && p->second.first <= snap) { - dout(10) << __func__ << " snap " << snap << " -> [" << p->second.first << "," << p->first << "]" << dendl; - return &p->second; + auto it = old_inodes.lower_bound(snap); // p is first key >= to snap + if (it != old_inodes.end() && it->second.first <= snap) { + dout(10) << __func__ << " snap " << snap << " -> [" << it->second.first << "," << it->first << "]" << dendl; + return &it->second; } dout(10) << __func__ << " snap " << snap << " -> nothing" << dendl; return NULL; @@ -3042,7 +3011,7 @@ int CInode::get_xlocker_mask(client_t client) const (linklock.gcaps_xlocker_mask(client) << linklock.get_cap_shift()); } -int CInode::get_caps_allowed_for_client(Session *session, inode_t *file_i) const +int CInode::get_caps_allowed_for_client(Session *session, mempool_inode *file_i) const { client_t client = session->get_client(); int allowed; @@ -3120,11 +3089,9 @@ int CInode::get_caps_wanted(int *ploner, int *pother, int shift, int mask) const //cout << " get_caps_wanted client " << it->first << " " << cap_string(it->second.wanted()) << endl; } if (is_auth()) - for (compact_map<int,int>::const_iterator it = mds_caps_wanted.begin(); - it != mds_caps_wanted.end(); - ++it) { - w |= it->second; - other |= it->second; + for (const auto &p : mds_caps_wanted) { + w |= p.second; + other |= p.second; //cout << " get_caps_wanted mds " << it->first << " " << cap_string(it->second) << endl; } if (ploner) *ploner = (loner >> shift) & mask; @@ -3178,10 +3145,10 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, bool valid = true; // pick a version! - inode_t *oi = &inode; - inode_t *pi = get_projected_inode(); + mempool_inode *oi = &inode; + mempool_inode *pi = get_projected_inode(); - map<string, bufferptr> *pxattrs = 0; + CInode::mempool_xattr_map *pxattrs = nullptr; if (snapid != CEPH_NOSNAP) { @@ -3190,19 +3157,20 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, valid = false; if (is_multiversion()) { - compact_map<snapid_t,old_inode_t>::iterator p = old_inodes.lower_bound(snapid); - if (p != old_inodes.end()) { - if (p->second.first > snapid) { - if (p != old_inodes.begin()) - --p; + auto it = old_inodes.lower_bound(snapid); + if (it != old_inodes.end()) { + if (it->second.first > snapid) { + if (it != old_inodes.begin()) + --it; } - if (p->second.first <= snapid && snapid <= p->first) { + if (it->second.first <= snapid && snapid <= it->first) { dout(15) << __func__ << " snapid " << snapid - << " to old_inode [" << p->second.first << "," << p->first << "]" - << " " << p->second.inode.rstat + << " to old_inode [" << it->second.first << "," << it->first << "]" + << " " << it->second.inode.rstat << dendl; - pi = oi = &p->second.inode; - pxattrs = &p->second.xattrs; + auto &p = it->second; + pi = oi = &p.inode; + pxattrs = &p.xattrs; } else { // snapshoted remote dentry can result this dout(0) << __func__ << " old_inode for snapid " << snapid @@ -3246,7 +3214,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, bool plocal = versionlock.get_last_wrlock_client() == client; bool ppolicy = policylock.is_xlocked_by_client(client) || get_loner()==client; - inode_t *any_i = (pfile|pauth|plink|pxattr|plocal) ? pi : oi; + mempool_inode *any_i = (pfile|pauth|plink|pxattr|plocal) ? pi : oi; dout(20) << " pfile " << pfile << " pauth " << pauth << " plink " << plink << " pxattr " << pxattr @@ -3255,7 +3223,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, << " valid=" << valid << dendl; // file - inode_t *file_i = pfile ? pi:oi; + mempool_inode *file_i = pfile ? pi:oi; file_layout_t layout; if (is_dir()) { layout = (ppolicy ? pi : oi)->layout; @@ -3290,13 +3258,13 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, } // auth - inode_t *auth_i = pauth ? pi:oi; + mempool_inode *auth_i = pauth ? pi:oi; // link - inode_t *link_i = plink ? pi:oi; + mempool_inode *link_i = plink ? pi:oi; // xattr - inode_t *xattr_i = pxattr ? pi:oi; + mempool_inode *xattr_i = pxattr ? pi:oi; using ceph::encode; // xattr @@ -3479,7 +3447,7 @@ int CInode::encode_inodestat(bufferlist& bl, Session *session, encode(inline_data, bl); } if (session->connection->has_feature(CEPH_FEATURE_MDS_QUOTA)) { - inode_t *policy_i = ppolicy ? pi : oi; + mempool_inode *policy_i = ppolicy ? pi : oi; encode(policy_i->quota, bl); } if (session->connection->has_feature(CEPH_FEATURE_FS_FILE_LAYOUT_V2)) { @@ -3504,9 +3472,9 @@ void CInode::encode_cap_message(MClientCaps *m, Capability *cap) bool plink = linklock.is_xlocked_by_client(client); bool pxattr = xattrlock.is_xlocked_by_client(client); - inode_t *oi = &inode; - inode_t *pi = get_projected_inode(); - inode_t *i = (pfile|pauth|plink|pxattr) ? pi : oi; + mempool_inode *oi = &inode; + mempool_inode *pi = get_projected_inode(); + mempool_inode *i = (pfile|pauth|plink|pxattr) ? pi : oi; dout(20) << __func__ << " pfile " << pfile << " pauth " << pauth << " plink " << plink << " pxattr " << pxattr @@ -3546,7 +3514,7 @@ void CInode::encode_cap_message(MClientCaps *m, Capability *cap) using ceph::encode; i = pxattr ? pi:oi; - map<string,bufferptr> *ix = pxattr ? get_projected_xattrs() : &xattrs; + auto ix = pxattr ? get_projected_xattrs() : &xattrs; if ((cap->pending() & CEPH_CAP_XATTR_SHARED) && i->xattr_version > cap->client_xattr_version) { dout(10) << " including xattrs v " << i->xattr_version << dendl; @@ -3575,7 +3543,11 @@ void CInode::_decode_base(bufferlist::iterator& p) using ceph::decode; decode(first, p); decode(inode, p); - decode(symlink, p); + { + std::string tmp; + decode(tmp, p); + symlink = std::string_view(tmp); + } decode(dirfragtree, p); decode(xattrs, p); decode(old_inodes, p); @@ -3711,12 +3683,10 @@ void CInode::encode_export(bufferlist& bl) // include scatterlock info for any bounding CDirs bufferlist bounding; if (inode.is_dir()) - for (compact_map<frag_t,CDir*>::iterator p = dirfrags.begin(); - p != dirfrags.end(); - ++p) { - CDir *dir = p->second; + for (const auto &p : dirfrags) { + CDir *dir = p.second; if (dir->state_test(CDir::STATE_EXPORTBOUND)) { - encode(p->first, bounding); + encode(p.first, bounding); encode(dir->fnode.fragstat, bounding); encode(dir->fnode.accounted_fragstat, bounding); encode(dir->fnode.rstat, bounding); @@ -3833,14 +3803,11 @@ void InodeStoreBase::dump(Formatter *f) const inode.dump(f); f->dump_string("symlink", symlink); f->open_array_section("old_inodes"); - for (compact_map<snapid_t, old_inode_t>::const_iterator i = old_inodes.begin(); - i != old_inodes.end(); ++i) { + for (const auto &p : old_inodes) { f->open_object_section("old_inode"); - { - // The key is the last snapid, the first is in the old_inode_t - f->dump_int("last", i->first); - i->second.dump(f); - } + // The key is the last snapid, the first is in the mempool_old_inode + f->dump_int("last", p.first); + p.second.dump(f); f->close_section(); // old_inode } f->close_section(); // old_inodes @@ -3935,7 +3902,7 @@ void CInode::validate_disk_state(CInode::validated_data *results, bool _start(int rval) { if (in->is_dirty()) { MDCache *mdcache = in->mdcache; - inode_t& inode = in->inode; + mempool_inode& inode = in->inode; dout(20) << "validating a dirty CInode; results will be inconclusive" << dendl; } @@ -3978,7 +3945,7 @@ void CInode::validate_disk_state(CInode::validated_data *results, int memory_newer; MDCache *mdcache = in->mdcache; // For the benefit of dout - const inode_t& inode = in->inode; // For the benefit of dout + const mempool_inode& inode = in->inode; // For the benefit of dout // Ignore rval because it's the result of a FAILOK operation // from fetch_backtrace_and_tag: the real result is in @@ -4094,8 +4061,8 @@ next: results->inode.ondisk_value = shadow_in->inode; results->inode.memory_value = in->inode; - inode_t& si = shadow_in->inode; - inode_t& i = in->inode; + mempool_inode& si = shadow_in->inode; + mempool_inode& i = in->inode; if (si.version > i.version) { // uh, what? results->inode.error_str << "On-disk inode is newer than in-memory one!"; @@ -4158,10 +4125,8 @@ next: } // check each dirfrag... - for (compact_map<frag_t,CDir*>::iterator p = in->dirfrags.begin(); - p != in->dirfrags.end(); - ++p) { - CDir *dir = p->second; + for (const auto &p : in->dirfrags) { + CDir *dir = p.second; assert(dir->get_version() > 0); nest_info.add(dir->fnode.accounted_rstat); dir_info.add(dir->fnode.accounted_fragstat); @@ -4171,10 +4136,10 @@ next: if (dir->scrub_infop->header->get_repair()) { results->raw_stats.repaired = true; results->raw_stats.error_str - << "dirfrag(" << p->first << ") has bad stats (will be fixed); "; + << "dirfrag(" << p.first << ") has bad stats (will be fixed); "; } else { results->raw_stats.error_str - << "dirfrag(" << p->first << ") has bad stats; "; + << "dirfrag(" << p.first << ") has bad stats; "; } frags_errors++; } @@ -4371,14 +4336,15 @@ void CInode::dump(Formatter *f, int flags) const if (flags & DUMP_CAPS) { f->open_array_section("client_caps"); - for (map<client_t,Capability*>::const_iterator it = client_caps.begin(); - it != client_caps.end(); ++it) { + for (const auto &p : client_caps) { + auto &client = p.first; + auto &cap = p.second; f->open_object_section("client_cap"); - f->dump_int("client_id", it->first.v); - f->dump_string("pending", ccap_string(it->second->pending())); - f->dump_string("issued", ccap_string(it->second->issued())); - f->dump_string("wanted", ccap_string(it->second->wanted())); - f->dump_int("last_sent", it->second->get_last_sent()); + f->dump_int("client_id", client.v); + f->dump_string("pending", ccap_string(cap->pending())); + f->dump_string("issued", ccap_string(cap->issued())); + f->dump_string("wanted", ccap_string(cap->wanted())); + f->dump_int("last_sent", cap->get_last_sent()); f->close_section(); } f->close_section(); @@ -4387,11 +4353,10 @@ void CInode::dump(Formatter *f, int flags) const f->dump_int("want_loner", want_loner_cap.v); f->open_array_section("mds_caps_wanted"); - for (compact_map<int,int>::const_iterator p = mds_caps_wanted.begin(); - p != mds_caps_wanted.end(); ++p) { + for (const auto &p : mds_caps_wanted) { f->open_object_section("mds_cap_wanted"); - f->dump_int("rank", p->first); - f->dump_string("cap", ccap_string(p->second)); + f->dump_int("rank", p.first); + f->dump_string("cap", ccap_string(p.second)); f->close_section(); } f->close_section(); @@ -4406,7 +4371,7 @@ void CInode::scrub_info_create() const // break out of const-land to set up implicit initial state CInode *me = const_cast<CInode*>(this); - inode_t *in = me->get_projected_inode(); + mempool_inode *in = me->get_projected_inode(); scrub_info_t *si = new scrub_info_t(); si->scrub_start_stamp = si->last_scrub_stamp = in->last_scrub_stamp; @@ -4640,7 +4605,7 @@ mds_rank_t CInode::get_export_pin(bool inherit) const const CDentry *pdn = in->get_projected_parent_dn(); if (!pdn) break; - const inode_t *pi = in->get_projected_inode(); + const mempool_inode *pi = in->get_projected_inode(); // ignore export pin for unlinked directory if (pi->nlink == 0) break; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index 41f066240b1..537d98760e7 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -75,15 +75,20 @@ extern int num_cinode_locks; */ class InodeStoreBase { public: - inode_t inode; // the inode itself - std::string symlink; // symlink dest, if symlink - std::map<std::string, bufferptr> xattrs; + typedef inode_t<mempool::mds_co::pool_allocator> mempool_inode; + typedef old_inode_t<mempool::mds_co::pool_allocator> mempool_old_inode; + typedef mempool::mds_co::compact_map<snapid_t, mempool_old_inode> mempool_old_inode_map; + typedef xattr_map<mempool::mds_co::pool_allocator> mempool_xattr_map; // FIXME bufferptr not in mempool + + mempool_inode inode; // the inode itself + mempool::mds_co::string symlink; // symlink dest, if symlink + mempool_xattr_map xattrs; fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map. - compact_map<snapid_t, old_inode_t> old_inodes; // key = last, value.first = first - snapid_t oldest_snap; - damage_flags_t damage_flags; + mempool_old_inode_map old_inodes; // key = last, value.first = first + snapid_t oldest_snap = CEPH_NOSNAP; + damage_flags_t damage_flags = 0; - InodeStoreBase() : oldest_snap(CEPH_NOSNAP), damage_flags(0) { } + InodeStoreBase() {} /* Helpers */ bool is_file() const { return inode.is_file(); } @@ -109,6 +114,7 @@ public: class InodeStore : public InodeStoreBase { public: + // FIXME bufferlist not part of mempool bufferlist snap_blob; // Encoded copy of SnapRealm, because we can't // rehydrate it without full MDCache void encode(bufferlist &bl, uint64_t features) const { @@ -250,22 +256,22 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno public: MDCache *mdcache; - SnapRealm *snaprealm; - SnapRealm *containing_realm; + SnapRealm *snaprealm = nullptr; + SnapRealm *containing_realm = nullptr; snapid_t first, last; - compact_set<snapid_t> dirty_old_rstats; + mempool::mds_co::compact_set<snapid_t> dirty_old_rstats; class scrub_stamp_info_t { public: /// version we started our latest scrub (whether in-progress or finished) - version_t scrub_start_version; + version_t scrub_start_version = 0; /// time we started our latest scrub (whether in-progress or finished) utime_t scrub_start_stamp; /// version we started our most recent finished scrub - version_t last_scrub_version; + version_t last_scrub_version = 0; /// time we started our most recent finished scrub utime_t last_scrub_stamp; - scrub_stamp_info_t() : scrub_start_version(0), last_scrub_version(0) {} + scrub_stamp_info_t() {} void reset() { scrub_start_version = last_scrub_version = 0; scrub_start_stamp = last_scrub_stamp = utime_t(); @@ -274,22 +280,19 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno class scrub_info_t : public scrub_stamp_info_t { public: - CDentry *scrub_parent; - MDSInternalContextBase *on_finish; + CDentry *scrub_parent = nullptr; + MDSInternalContextBase *on_finish = nullptr; - bool last_scrub_dirty; /// are our stamps dirty with respect to disk state? - bool scrub_in_progress; /// are we currently scrubbing? - bool children_scrubbed; + bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state? + bool scrub_in_progress = false; /// are we currently scrubbing? + bool children_scrubbed = false; /// my own (temporary) stamps and versions for each dirfrag we have - std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; + std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; // XXX not part of mempool ScrubHeaderRef header; - scrub_info_t() : scrub_stamp_info_t(), - scrub_parent(NULL), on_finish(NULL), - last_scrub_dirty(false), scrub_in_progress(false), - children_scrubbed(false) {} + scrub_info_t() {} }; const scrub_info_t *scrub_info() const{ @@ -381,7 +384,7 @@ public: } snapid_t get_oldest_snap(); - uint64_t last_journaled; // log offset for the last time i was journaled + uint64_t last_journaled = 0; // log offset for the last time i was journaled //loff_t last_open_journaled; // log offset for the last journaled EOpen utime_t last_dirstat_prop; @@ -395,108 +398,104 @@ public: void mark_dirty_rstat(); void clear_dirty_rstat(); - //bool hack_accessed; + //bool hack_accessed = false; //utime_t hack_load_stamp; /** * Projection methods, used to store inode changes until they have been journaled, * at which point they are popped. * Usage: - * project_inode as needed. If you're also projecting xattrs, pass - * in an xattr map (by pointer), then edit the map. - * If you're also projecting the snaprealm, call project_snaprealm after - * calling project_inode, and modify the snaprealm as necessary. + * project_inode as needed. If you're changing xattrs or sr_t, then pass true + * as needed then change the xattrs/snapnode member as needed. (Dirty + * exception: project_past_snaprealm_parent allows you to project the + * snapnode after doing project_inode (i.e. you don't need to pass + * snap=true). * * Then, journal. Once journaling is done, pop_and_dirty_projected_inode. * This function will take care of the inode itself, the xattrs, and the snaprealm. */ - struct projected_inode_t { - inode_t *inode; - std::map<std::string,bufferptr> *xattrs; - sr_t *snapnode; - - projected_inode_t() - : inode(NULL), xattrs(NULL), snapnode(NULL) {} - projected_inode_t(inode_t *in, sr_t *sn) - : inode(in), xattrs(NULL), snapnode(sn) {} - projected_inode_t(inode_t *in, std::map<std::string, bufferptr> *xp = NULL, sr_t *sn = NULL) - : inode(in), xattrs(xp), snapnode(sn) {} + class projected_inode { + public: + mempool_inode inode; + std::unique_ptr<mempool_xattr_map> xattrs; + std::unique_ptr<sr_t> snapnode; + + projected_inode() = delete; + projected_inode(const mempool_inode &in) : inode(in) {} }; - std::list<projected_inode_t*> projected_nodes; // projected values (only defined while dirty) - int num_projected_xattrs; - int num_projected_srnodes; - - inode_t *project_inode(std::map<std::string,bufferptr> *px=0); + +private: + mempool::mds_co::list<projected_inode> projected_nodes; // projected values (only defined while dirty) + size_t num_projected_xattrs = 0; + size_t num_projected_srnodes = 0; + + sr_t &project_snaprealm(projected_inode &pi); +public: + CInode::projected_inode &project_inode(bool xattr = false, bool snap = false); void pop_and_dirty_projected_inode(LogSegment *ls); - projected_inode_t *get_projected_node() { + projected_inode *get_projected_node() { if (projected_nodes.empty()) return NULL; else - return projected_nodes.back(); + return &projected_nodes.back(); } version_t get_projected_version() const { if (projected_nodes.empty()) return inode.version; else - return projected_nodes.back()->inode->version; + return projected_nodes.back().inode.version; } bool is_projected() const { return !projected_nodes.empty(); } - const inode_t *get_projected_inode() const { + const mempool_inode *get_projected_inode() const { if (projected_nodes.empty()) return &inode; else - return projected_nodes.back()->inode; + return &projected_nodes.back().inode; } - inode_t *get_projected_inode() { + mempool_inode *get_projected_inode() { if (projected_nodes.empty()) return &inode; else - return projected_nodes.back()->inode; + return &projected_nodes.back().inode; } - inode_t *get_previous_projected_inode() { + mempool_inode *get_previous_projected_inode() { assert(!projected_nodes.empty()); - std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); - ++p; - if (p != projected_nodes.rend()) - return (*p)->inode; + auto it = projected_nodes.rbegin(); + ++it; + if (it != projected_nodes.rend()) + return &it->inode; else return &inode; } - std::map<std::string,bufferptr> *get_projected_xattrs() { + mempool_xattr_map *get_projected_xattrs() { if (num_projected_xattrs > 0) { - for (std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); - p != projected_nodes.rend(); - ++p) - if ((*p)->xattrs) - return (*p)->xattrs; + for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) + if (it->xattrs) + return it->xattrs.get(); } return &xattrs; } - std::map<std::string,bufferptr> *get_previous_projected_xattrs() { - std::list<projected_inode_t*>::reverse_iterator p = projected_nodes.rbegin(); - for (++p; // skip the most recent projected value - p != projected_nodes.rend(); - ++p) - if ((*p)->xattrs) - return (*p)->xattrs; + mempool_xattr_map *get_previous_projected_xattrs() { + if (num_projected_xattrs > 0) { + for (auto it = ++projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) + if (it->xattrs) + return it->xattrs.get(); + } return &xattrs; } - sr_t *project_snaprealm(snapid_t snapid=0); const sr_t *get_projected_srnode() const { if (num_projected_srnodes > 0) { - for (std::list<projected_inode_t*>::const_reverse_iterator p = projected_nodes.rbegin(); - p != projected_nodes.rend(); - ++p) - if ((*p)->snapnode) - return (*p)->snapnode; + for (auto it = projected_nodes.rbegin(); it != projected_nodes.rend(); ++it) + if (it->snapnode) + return it->snapnode.get(); } if (snaprealm) return &snaprealm->srnode; @@ -509,18 +508,18 @@ private: void pop_projected_snaprealm(sr_t *next_snaprealm); public: - old_inode_t& cow_old_inode(snapid_t follows, bool cow_head); + mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head); void split_old_inode(snapid_t snap); - old_inode_t *pick_old_inode(snapid_t last); + mempool_old_inode *pick_old_inode(snapid_t last); void pre_cow_old_inode(); bool has_snap_data(snapid_t s); void purge_stale_snap_data(const std::set<snapid_t>& snaps); // -- cache infrastructure -- private: - compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode - int stickydir_ref; - scrub_info_t *scrub_infop; + mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode + int stickydir_ref = 0; + scrub_info_t *scrub_infop = nullptr; public: bool has_dirfrags() { return !dirfrags.empty(); } @@ -552,23 +551,24 @@ public: protected: // parent dentries in cache - CDentry *parent; // primary link - compact_set<CDentry*> remote_parents; // if hard linked + CDentry *parent = nullptr; // primary link + mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked - std::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc. + mempool::mds_co::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc. - mds_authority_t inode_auth; + mds_authority_t inode_auth = CDIR_AUTH_DEFAULT; // -- distributed state -- protected: - // file capabilities - std::map<client_t, Capability*> client_caps; // client -> caps - compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted - int replica_caps_wanted; // [replica] what i've requested from auth + // file capabilities FIXME Capability not part of mempool + using cap_map = mempool::mds_co::map<client_t, Capability*>; + cap_map client_caps; // client -> caps + mempool::mds_co::compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted + int replica_caps_wanted = 0; // [replica] what i've requested from auth public: - compact_map<int, std::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head - compact_map<snapid_t, std::set<client_t> > client_need_snapflush; + mempool::mds_co::compact_map<int, mempool::mds_co::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head + mempool::mds_co::compact_map<snapid_t, mempool::mds_co::set<client_t> > client_need_snapflush; void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client); @@ -576,8 +576,8 @@ public: protected: - ceph_lock_state_t *fcntl_locks; - ceph_lock_state_t *flock_locks; + ceph_lock_state_t *fcntl_locks = nullptr; + ceph_lock_state_t *flock_locks = nullptr; ceph_lock_state_t *get_fcntl_lock_state() { if (!fcntl_locks) @@ -644,7 +644,7 @@ public: elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest; public: - int auth_pin_freeze_allowance; + int auth_pin_freeze_allowance = 0; inode_load_vec_t pop; @@ -658,25 +658,17 @@ public: friend class CInodeExport; // --------------------------- + CInode() = delete; CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP) : mdcache(c), - snaprealm(0), containing_realm(0), first(f), last(l), - last_journaled(0), //last_open_journaled(0), - //hack_accessed(true), - num_projected_xattrs(0), - num_projected_srnodes(0), - stickydir_ref(0), - scrub_infop(NULL), - parent(0), - inode_auth(CDIR_AUTH_DEFAULT), - replica_caps_wanted(0), - fcntl_locks(0), flock_locks(0), - item_dirty(this), item_caps(this), item_open_file(this), item_dirty_parent(this), + item_dirty(this), + item_caps(this), + item_open_file(this), + item_dirty_parent(this), item_dirty_dirfrag_dir(this), item_dirty_dirfrag_nest(this), item_dirty_dirfrag_dirfragtree(this), - auth_pin_freeze_allowance(0), pop(ceph_clock_now()), versionlock(this, &versionlock_type), authlock(this, &authlock_type), @@ -687,10 +679,8 @@ public: snaplock(this, &snaplock_type), nestlock(this, &nestlock_type), flocklock(this, &flocklock_type), - policylock(this, &policylock_type), - loner_cap(-1), want_loner_cap(-1) + policylock(this, &policylock_type) { - state = 0; if (auth) state_set(STATE_AUTH); } ~CInode() override { @@ -730,7 +720,7 @@ public: vinodeno_t vino() const { return vinodeno_t(inode.ino, last); } int d_type() const { return IFTODT(inode.mode); } - inode_t& get_inode() { return inode; } + mempool_inode& get_inode() { return inode; } CDentry* get_parent_dn() { return parent; } const CDentry* get_parent_dn() const { return parent; } CDentry* get_projected_parent_dn() { return !projected_parent.empty() ? projected_parent.back() : parent; } @@ -831,7 +821,7 @@ public: // -- waiting -- protected: - compact_map<frag_t, std::list<MDSInternalContextBase*> > waiting_on_dir; + mempool::mds_co::compact_map<frag_t, std::list<MDSInternalContextBase*> > waiting_on_dir; public: void add_dir_waiter(frag_t fg, MDSInternalContextBase *c); void take_dir_waiting(frag_t fg, std::list<MDSInternalContextBase*>& ls); @@ -884,6 +874,7 @@ public: static LockType flocklock_type; static LockType policylock_type; + // FIXME not part of mempool LocalLock versionlock; SimpleLock authlock; SimpleLock linklock; @@ -935,7 +926,7 @@ public: // -- caps -- (new) // client caps - client_t loner_cap, want_loner_cap; + client_t loner_cap = -1, want_loner_cap = -1; client_t get_loner() const { return loner_cap; } client_t get_wanted_loner() const { return want_loner_cap; } @@ -960,33 +951,31 @@ public: int count_nonstale_caps() { int n = 0; - for (std::map<client_t,Capability*>::iterator it = client_caps.begin(); - it != client_caps.end(); - ++it) - if (!it->second->is_stale()) + for (const auto &p : client_caps) { + if (!p.second->is_stale()) n++; + } return n; } bool multiple_nonstale_caps() { int n = 0; - for (std::map<client_t,Capability*>::iterator it = client_caps.begin(); - it != client_caps.end(); - ++it) - if (!it->second->is_stale()) { + for (const auto &p : client_caps) { + if (!p.second->is_stale()) { if (n) return true; n++; } + } return false; } bool is_any_caps() { return !client_caps.empty(); } bool is_any_nonstale_caps() { return count_nonstale_caps(); } - const compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; } - compact_map<int32_t,int32_t>& get_mds_caps_wanted() { return mds_caps_wanted; } + const mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() const { return mds_caps_wanted; } + mempool::mds_co::compact_map<int32_t,int32_t>& get_mds_caps_wanted() { return mds_caps_wanted; } - const std::map<client_t,Capability*>& get_client_caps() const { return client_caps; } + const cap_map& get_client_caps() const { return client_caps; } Capability *get_client_cap(client_t client) { auto client_caps_entry = client_caps.find(client); if (client_caps_entry != client_caps.end()) @@ -1016,7 +1005,7 @@ public: int get_caps_allowed_by_type(int type) const; int get_caps_careful() const; int get_xlocker_mask(client_t client) const; - int get_caps_allowed_for_client(Session *s, inode_t *file_i) const; + int get_caps_allowed_for_client(Session *s, mempool_inode *file_i) const; // caps issued, wanted int get_caps_issued(int *ploner = 0, int *pother = 0, int *pxlocker = 0, @@ -1139,8 +1128,8 @@ public: std::stringstream error_str; }; - bool performed_validation; - bool passed_validation; + bool performed_validation = false; + bool passed_validation = false; struct raw_stats_t { frag_info_t dirstat; @@ -1148,11 +1137,10 @@ public: }; member_status<inode_backtrace_t> backtrace; - member_status<inode_t> inode; + member_status<mempool_inode> inode; // XXX should not be in mempool; wait for pmr member_status<raw_stats_t> raw_stats; - validated_data() : performed_validation(false), - passed_validation(false) {} + validated_data() {} void dump(Formatter *f) const; diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 3d9fa6c3a56..7daa84bb199 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -1857,7 +1857,7 @@ void Locker::file_update_finish(CInode *in, MutationRef& mut, bool share_max, bo dout(10) << " client_snap_caps " << in->client_snap_caps << dendl; // check for snap writeback completion bool gather = false; - compact_map<int,set<client_t> >::iterator p = in->client_snap_caps.begin(); + auto p = in->client_snap_caps.begin(); while (p != in->client_snap_caps.end()) { auto q = p->second.find(client); if (q != p->second.end()) { @@ -2294,7 +2294,7 @@ public: } }; -uint64_t Locker::calc_new_max_size(inode_t *pi, uint64_t size) +uint64_t Locker::calc_new_max_size(CInode::mempool_inode *pi, uint64_t size) { uint64_t new_max = (size + 1) << 1; uint64_t max_inc = g_conf->mds_client_writeable_range_max_inc_objs; @@ -2306,10 +2306,10 @@ uint64_t Locker::calc_new_max_size(inode_t *pi, uint64_t size) } void Locker::calc_new_client_ranges(CInode *in, uint64_t size, - map<client_t,client_writeable_range_t> *new_ranges, + CInode::mempool_inode::client_range_map *new_ranges, bool *max_increased) { - inode_t *latest = in->get_projected_inode(); + auto latest = in->get_projected_inode(); uint64_t ms; if(latest->has_layout()) { ms = calc_new_max_size(latest, size); @@ -2348,8 +2348,8 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, assert(in->is_auth()); assert(in->is_file()); - inode_t *latest = in->get_projected_inode(); - map<client_t, client_writeable_range_t> new_ranges; + CInode::mempool_inode *latest = in->get_projected_inode(); + CInode::mempool_inode::client_range_map new_ranges; uint64_t size = latest->size; bool update_size = new_size > 0; bool update_max = false; @@ -2409,20 +2409,20 @@ bool Locker::check_inode_max_size(CInode *in, bool force_wrlock, MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); + auto &pi = in->project_inode(); + pi.inode.version = in->pre_dirty(); if (update_max) { - dout(10) << "check_inode_max_size client_ranges " << pi->client_ranges << " -> " << new_ranges << dendl; - pi->client_ranges = new_ranges; + dout(10) << "check_inode_max_size client_ranges " << pi.inode.client_ranges << " -> " << new_ranges << dendl; + pi.inode.client_ranges = new_ranges; } if (update_size) { - dout(10) << "check_inode_max_size size " << pi->size << " -> " << new_size << dendl; - pi->size = new_size; - pi->rstat.rbytes = new_size; - dout(10) << "check_inode_max_size mtime " << pi->mtime << " -> " << new_mtime << dendl; - pi->mtime = new_mtime; + dout(10) << "check_inode_max_size size " << pi.inode.size << " -> " << new_size << dendl; + pi.inode.size = new_size; + pi.inode.rstat.rbytes = new_size; + dout(10) << "check_inode_max_size mtime " << pi.inode.mtime << " -> " << new_mtime << dendl; + pi.inode.mtime = new_mtime; } // use EOpen if the file is still open; otherwise, use EUpdate. @@ -2615,7 +2615,7 @@ void Locker::_do_null_snapflush(CInode *head_in, client_t client, snapid_t last) for (auto p = head_in->client_need_snapflush.begin(); p != head_in->client_need_snapflush.end() && p->first < last; ) { snapid_t snapid = p->first; - set<client_t>& clients = p->second; + auto &clients = p->second; ++p; // be careful, q loop below depends on this if (clients.count(client)) { @@ -3120,53 +3120,55 @@ void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t foll // normal metadata updates that we can apply to the head as well. // update xattrs? - bool xattrs = false; - map<string,bufferptr> *px = 0; - if ((dirty & CEPH_CAP_XATTR_EXCL) && - m->xattrbl.length() && - m->head.xattr_version > in->get_projected_inode()->xattr_version) - xattrs = true; - - old_inode_t *oi = 0; + CInode::mempool_xattr_map *px = nullptr; + bool xattrs = (dirty & CEPH_CAP_XATTR_EXCL) && + m->xattrbl.length() && + m->head.xattr_version > in->get_projected_inode()->xattr_version; + + CInode::mempool_old_inode *oi = 0; if (in->is_multiversion()) { oi = in->pick_old_inode(snap); } - inode_t *pi; + CInode::mempool_inode *i; if (oi) { dout(10) << " writing into old inode" << dendl; - pi = in->project_inode(); - pi->version = in->pre_dirty(); + auto &pi = in->project_inode(); + pi.inode.version = in->pre_dirty(); if (snap > oi->first) in->split_old_inode(snap); - pi = &oi->inode; + i = &oi->inode; if (xattrs) px = &oi->xattrs; } else { + auto &pi = in->project_inode(xattrs); + pi.inode.version = in->pre_dirty(); + i = &pi.inode; if (xattrs) - px = new map<string,bufferptr>; - pi = in->project_inode(px); - pi->version = in->pre_dirty(); + px = pi.xattrs.get(); } - _update_cap_fields(in, dirty, m, pi); + _update_cap_fields(in, dirty, m, i); // xattr - if (px) { - dout(7) << " xattrs v" << pi->xattr_version << " -> " << m->head.xattr_version + if (xattrs) { + dout(7) << " xattrs v" << i->xattr_version << " -> " << m->head.xattr_version << " len " << m->xattrbl.length() << dendl; - pi->xattr_version = m->head.xattr_version; + i->xattr_version = m->head.xattr_version; bufferlist::iterator p = m->xattrbl.begin(); decode(*px, p); } - if (pi->client_ranges.count(client)) { - if (in->last == snap) { - dout(10) << " removing client_range entirely" << dendl; - pi->client_ranges.erase(client); - } else { - dout(10) << " client_range now follows " << snap << dendl; - pi->client_ranges[client].follows = snap; + { + auto it = i->client_ranges.find(client); + if (it != i->client_ranges.end()) { + if (in->last == snap) { + dout(10) << " removing client_range entirely" << dendl; + i->client_ranges.erase(it); + } else { + dout(10) << " client_range now follows " << snap << dendl; + it->second.follows = snap; + } } } @@ -3183,7 +3185,7 @@ void Locker::_do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t foll client, ack)); } -void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t *pi) +void Locker::_update_cap_fields(CInode *in, int dirty, MClientCaps *m, CInode::mempool_inode *pi) { if (dirty == 0) return; @@ -3291,7 +3293,7 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, << " on " << *in << dendl; assert(in->is_auth()); client_t client = m->get_source().num(); - inode_t *latest = in->get_projected_inode(); + CInode::mempool_inode *latest = in->get_projected_inode(); // increase or zero max_size? uint64_t size = m->get_size(); @@ -3395,30 +3397,28 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, EUpdate *le = new EUpdate(mds->mdlog, "cap update"); mds->mdlog->start_entry(le); - // xattrs update? - map<string,bufferptr> *px = 0; - if ((dirty & CEPH_CAP_XATTR_EXCL) && - m->xattrbl.length() && - m->head.xattr_version > in->get_projected_inode()->xattr_version) - px = new map<string,bufferptr>; + bool xattr = (dirty & CEPH_CAP_XATTR_EXCL) && + m->xattrbl.length() && + m->head.xattr_version > in->get_projected_inode()->xattr_version; - inode_t *pi = in->project_inode(px); - pi->version = in->pre_dirty(); + auto &pi = in->project_inode(xattr); + pi.inode.version = in->pre_dirty(); MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); - _update_cap_fields(in, dirty, m, pi); + _update_cap_fields(in, dirty, m, &pi.inode); if (change_max) { dout(7) << " max_size " << old_max << " -> " << new_max << " for " << *in << dendl; if (new_max) { - pi->client_ranges[client].range.first = 0; - pi->client_ranges[client].range.last = new_max; - pi->client_ranges[client].follows = in->first - 1; + auto &cr = pi.inode.client_ranges[client]; + cr.range.first = 0; + cr.range.last = new_max; + cr.follows = in->first - 1; } else - pi->client_ranges.erase(client); + pi.inode.client_ranges.erase(client); } if (change_max || (dirty & (CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) @@ -3428,13 +3428,12 @@ bool Locker::_do_cap_update(CInode *in, Capability *cap, if (dirty & CEPH_CAP_AUTH_EXCL) wrlock_force(&in->authlock, mut); - // xattr - if (px) { - dout(7) << " xattrs v" << pi->xattr_version << " -> " << m->head.xattr_version << dendl; - pi->xattr_version = m->head.xattr_version; + // xattrs update? + if (xattr) { + dout(7) << " xattrs v" << pi.inode.xattr_version << " -> " << m->head.xattr_version << dendl; + pi.inode.xattr_version = m->head.xattr_version; bufferlist::iterator p = m->xattrbl.begin(); - decode(*px, p); - + decode(*pi.xattrs, p); wrlock_force(&in->xattrlock, mut); } @@ -4435,8 +4434,8 @@ void Locker::scatter_writebehind(ScatterLock *lock) in->pre_cow_old_inode(); // avoid cow mayhem - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); + auto &pi = in->project_inode(); + pi.inode.version = in->pre_dirty(); in->finish_scatter_gather_update(lock->get_type()); lock->start_flush(); diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 1c0d3888e08..315f2a207b4 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -28,7 +28,6 @@ using std::set; class MDSRank; class Session; -class CInode; class CDentry; struct SnapRealm; @@ -42,6 +41,7 @@ class SimpleLock; class ScatterLock; class LocalLock; +#include "CInode.h" #include "SimpleLock.h" #include "Mutation.h" @@ -193,7 +193,7 @@ protected: bool _need_flush_mdlog(CInode *in, int wanted_caps); void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq); void handle_client_caps(class MClientCaps *m); - void _update_cap_fields(CInode *in, int dirty, MClientCaps *m, inode_t *pi); + void _update_cap_fields(CInode *in, int dirty, MClientCaps *m, CInode::mempool_inode *pi); void _do_snap_update(CInode *in, snapid_t snap, int dirty, snapid_t follows, client_t client, MClientCaps *m, MClientCaps *ack); void _do_null_snapflush(CInode *head_in, client_t client, snapid_t last=CEPH_NOSNAP); bool _do_cap_update(CInode *in, Capability *cap, int dirty, snapid_t follows, MClientCaps *m, @@ -259,10 +259,10 @@ protected: void file_update_finish(CInode *in, MutationRef& mut, bool share_max, bool issue_client_cap, client_t client, MClientCaps *ack); private: - uint64_t calc_new_max_size(inode_t *pi, uint64_t size); + uint64_t calc_new_max_size(CInode::mempool_inode *pi, uint64_t size); public: void calc_new_client_ranges(CInode *in, uint64_t size, - map<client_t, client_writeable_range_t>* new_ranges, + CInode::mempool_inode::client_range_map* new_ranges, bool *max_increased); bool check_inode_max_size(CInode *in, bool force_wrlock=false, uint64_t newmax=0, uint64_t newsize=0, diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 038ce8ee1e8..de600c84f02 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -1513,15 +1513,14 @@ CInode *MDCache::cow_inode(CInode *in, snapid_t last) assert(head_in); if (head_in->split_need_snapflush(oldin, in)) { oldin->client_snap_caps = in->client_snap_caps; - for (compact_map<int,set<client_t> >::iterator p = in->client_snap_caps.begin(); - p != in->client_snap_caps.end(); - ++p) { - SimpleLock *lock = oldin->get_lock(p->first); + for (const auto &p : in->client_snap_caps) { + SimpleLock *lock = oldin->get_lock(p.first); assert(lock); - for (auto q = p->second.begin(); q != p->second.end(); ++q) { + for (const auto &q : p.second) { oldin->auth_pin(lock); lock->set_state(LOCK_SNAP_SYNC); // gathering lock->get_wrlock(true); + (void)q; /* unused */ } } } @@ -1729,7 +1728,7 @@ void MDCache::project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t fi int linkunlink, SnapRealm *prealm) { CDentry *parentdn = cur->get_projected_parent_dn(); - inode_t *curi = cur->get_projected_inode(); + CInode::mempool_inode *curi = cur->get_projected_inode(); if (cur->first > first) first = cur->first; @@ -1772,23 +1771,21 @@ void MDCache::project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t fi } if (g_conf->mds_snap_rstat) { - for (compact_set<snapid_t>::iterator p = cur->dirty_old_rstats.begin(); - p != cur->dirty_old_rstats.end(); - ++p) { - old_inode_t& old = cur->old_inodes[*p]; + for (const auto &p : cur->dirty_old_rstats) { + auto &old = cur->old_inodes[p]; snapid_t ofirst = std::max(old.first, floor); - set<snapid_t>::const_iterator q = snaps.lower_bound(ofirst); - if (q == snaps.end() || *q > *p) + auto it = snaps.lower_bound(ofirst); + if (it == snaps.end() || *it > p) continue; - if (*p >= floor) - _project_rstat_inode_to_frag(old.inode, ofirst, *p, parent, 0, false); + if (p >= floor) + _project_rstat_inode_to_frag(old.inode, ofirst, p, parent, 0, false); } } cur->dirty_old_rstats.clear(); } -void MDCache::_project_rstat_inode_to_frag(inode_t& inode, snapid_t ofirst, snapid_t last, +void MDCache::_project_rstat_inode_to_frag(CInode::mempool_inode& inode, snapid_t ofirst, snapid_t last, CDir *parent, int linkunlink, bool update_inode) { dout(10) << "_project_rstat_inode_to_frag [" << ofirst << "," << last << "]" << dendl; @@ -1919,13 +1916,13 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou dout(20) << " delta " << delta << dendl; while (last >= ofirst) { - inode_t *pi; + CInode::mempool_inode *pi; snapid_t first; if (last == pin->last) { pi = pin->get_projected_inode(); first = std::max(ofirst, pin->first); if (first > pin->first) { - old_inode_t& old = pin->cow_old_inode(first-1, cow_head); + auto &old = pin->cow_old_inode(first-1, cow_head); dout(20) << " cloned old_inode rstat is " << old.inode.rstat << dendl; } } else { @@ -1935,23 +1932,23 @@ void MDCache::project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accou } else { // our life is easier here because old_inodes is not sparse // (although it may not begin at snapid 1) - compact_map<snapid_t,old_inode_t>::iterator p = pin->old_inodes.lower_bound(last); - if (p == pin->old_inodes.end()) { + auto it = pin->old_inodes.lower_bound(last); + if (it == pin->old_inodes.end()) { dout(10) << " no old_inode <= " << last << ", done." << dendl; break; } - first = p->second.first; + first = it->second.first; if (first > last) { - dout(10) << " oldest old_inode is [" << first << "," << p->first << "], done." << dendl; + dout(10) << " oldest old_inode is [" << first << "," << it->first << "], done." << dendl; //assert(p == pin->old_inodes.begin()); break; } - if (p->first > last) { - dout(10) << " splitting right old_inode [" << first << "," << p->first << "] to [" - << (last+1) << "," << p->first << "]" << dendl; - pin->old_inodes[last] = p->second; - p->second.first = last+1; - pin->dirty_old_rstats.insert(p->first); + if (it->first > last) { + dout(10) << " splitting right old_inode [" << first << "," << it->first << "] to [" + << (last+1) << "," << it->first << "]" << dendl; + pin->old_inodes[last] = it->second; + it->second.first = last+1; + pin->dirty_old_rstats.insert(it->first); } } if (first < ofirst) { @@ -1977,7 +1974,7 @@ void MDCache::broadcast_quota_to_client(CInode *in) if (!in->is_auth() || in->is_frozen()) return; - inode_t *i = in->get_projected_inode(); + auto i = in->get_projected_inode(); if (!i->quota.is_enable()) return; @@ -2249,32 +2246,32 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, pin->pre_cow_old_inode(); // avoid cow mayhem! - inode_t *pi = pin->project_inode(); - pi->version = pin->pre_dirty(); + auto &pi = pin->project_inode(); + pi.inode.version = pin->pre_dirty(); // dirstat if (do_parent_mtime || linkunlink) { dout(20) << "predirty_journal_parents add_delta " << pf->fragstat << dendl; dout(20) << "predirty_journal_parents - " << pf->accounted_fragstat << dendl; bool touched_mtime = false, touched_chattr = false; - pi->dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr); + pi.inode.dirstat.add_delta(pf->fragstat, pf->accounted_fragstat, &touched_mtime, &touched_chattr); pf->accounted_fragstat = pf->fragstat; if (touched_mtime) - pi->mtime = pi->ctime = pi->dirstat.mtime; + pi.inode.mtime = pi.inode.ctime = pi.inode.dirstat.mtime; if (touched_chattr) - pi->change_attr = pi->dirstat.change_attr; - dout(20) << "predirty_journal_parents gives " << pi->dirstat << " on " << *pin << dendl; + pi.inode.change_attr = pi.inode.dirstat.change_attr; + dout(20) << "predirty_journal_parents gives " << pi.inode.dirstat << " on " << *pin << dendl; if (parent->get_frag() == frag_t()) { // i.e., we are the only frag - if (pi->dirstat.size() < 0) + if (pi.inode.dirstat.size() < 0) assert(!"negative dirstat size" == g_conf->mds_verify_scatter); - if (pi->dirstat.size() != pf->fragstat.size()) { + if (pi.inode.dirstat.size() != pf->fragstat.size()) { mds->clog->error() << "unmatched fragstat size on single dirfrag " - << parent->dirfrag() << ", inode has " << pi->dirstat + << parent->dirfrag() << ", inode has " << pi.inode.dirstat << ", dirfrag has " << pf->fragstat; // trust the dirfrag for now - pi->dirstat = pf->fragstat; + pi.inode.dirstat = pf->fragstat; assert(!"unmatched fragstat size" == g_conf->mds_verify_scatter); } @@ -2315,13 +2312,13 @@ void MDCache::predirty_journal_parents(MutationRef mut, EMetaBlob *blob, pf->accounted_rstat = pf->rstat; if (parent->get_frag() == frag_t()) { // i.e., we are the only frag - if (pi->rstat.rbytes != pf->rstat.rbytes) { + if (pi.inode.rstat.rbytes != pf->rstat.rbytes) { mds->clog->error() << "unmatched rstat rbytes on single dirfrag " - << parent->dirfrag() << ", inode has " << pi->rstat + << parent->dirfrag() << ", inode has " << pi.inode.rstat << ", dirfrag has " << pf->rstat; // trust the dirfrag for now - pi->rstat = pf->rstat; + pi.inode.rstat = pf->rstat; assert(!"unmatched rstat rbytes" == g_conf->mds_verify_scatter); } @@ -4201,10 +4198,9 @@ void MDCache::rejoin_walk(CDir *dir, MMDSCacheRejoin *rejoin) if (dn->last != CEPH_NOSNAP) { if (in && !in->remote_parents.empty()) { // unlink any stale remote snap dentry. - for (compact_set<CDentry*>::iterator q = in->remote_parents.begin(); - q != in->remote_parents.end(); ) { - CDentry *remote_dn = *q; - ++q; + for (auto it2 = in->remote_parents.begin(); it2 != in->remote_parents.end(); ) { + CDentry *remote_dn = *it2; + ++it2; assert(remote_dn->last != CEPH_NOSNAP); remote_dn->unlink_remote(remote_dn->get_linkage()); } @@ -6177,7 +6173,7 @@ void MDCache::queue_file_recover(CInode *in) s.erase(*s.rbegin()); dout(10) << " snaps in [" << in->first << "," << in->last << "] are " << s << dendl; if (s.size() > 1) { - inode_t *pi = in->project_inode(); + CInode::mempool_inode pi = in->project_inode(); pi->version = in->pre_dirty(); auto mut(std::make_shared<MutationImpl>()); @@ -6302,7 +6298,7 @@ public: void MDCache::truncate_inode(CInode *in, LogSegment *ls) { - inode_t *pi = in->get_projected_inode(); + auto pi = in->get_projected_inode(); dout(10) << "truncate_inode " << pi->truncate_from << " -> " << pi->truncate_size << " on " << *in @@ -6336,7 +6332,7 @@ struct C_IO_MDC_TruncateFinish : public MDCacheIOContext { void MDCache::_truncate_inode(CInode *in, LogSegment *ls) { - inode_t *pi = &in->inode; + auto pi = &in->inode; dout(10) << "_truncate_inode " << pi->truncate_from << " -> " << pi->truncate_size << " on " << *in << dendl; @@ -6385,10 +6381,10 @@ void MDCache::truncate_inode_finish(CInode *in, LogSegment *ls) ls->truncating_inodes.erase(p); // update - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); - pi->truncate_from = 0; - pi->truncate_pending--; + auto &pi = in->project_inode(); + pi.inode.version = in->pre_dirty(); + pi.inode.truncate_from = 0; + pi.inode.truncate_pending--; MutationRef mut(new MutationImpl()); mut->ls = mds->mdlog->get_current_segment(); @@ -9372,17 +9368,18 @@ void MDCache::snaprealm_create(MDRequestRef& mdr, CInode *in) le->metablob.add_table_transaction(TABLE_SNAP, mdr->more()->stid); - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); - pi->rstat.rsnaprealms++; + auto &pi = in->project_inode(false, true); + pi.inode.version = in->pre_dirty(); + pi.inode.rstat.rsnaprealms++; bufferlist::iterator p = mdr->more()->snapidbl.begin(); snapid_t seq; decode(seq, p); - sr_t *newsnap = in->project_snaprealm(seq); - newsnap->seq = seq; - newsnap->last_created = seq; + auto &newsnap = *pi.snapnode; + newsnap.created = seq; + newsnap.seq = seq; + newsnap.last_created = seq; predirty_journal_parents(mut, &le->metablob, in, 0, PREDIRTY_PRIMARY); journal_cow_inode(mut, &le->metablob, in); @@ -11351,8 +11348,8 @@ void MDCache::dispatch_fragment_dir(MDRequestRef& mdr) // dft lock if (diri->is_auth()) { // journal dirfragtree - inode_t *pi = diri->project_inode(); - pi->version = diri->pre_dirty(); + auto &pi = diri->project_inode(); + pi.inode.version = diri->pre_dirty(); journal_dirty_inode(mdr.get(), &le->metablob, diri); } else { mds->locker->mark_updated_scatterlock(&diri->dirfragtreelock); @@ -11715,7 +11712,8 @@ void MDCache::rollback_uncommitted_fragments() } if (diri_auth) { - diri->project_inode()->version = diri->pre_dirty(); + auto &pi = diri->project_inode(); + pi.inode.version = diri->pre_dirty(); diri->pop_and_dirty_projected_inode(ls); // hacky le->metablob.add_primary_dentry(diri->get_projected_parent_dn(), diri, true); } else { diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 7107fb1a358..bc551a15c68 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -397,7 +397,7 @@ public: void project_rstat_inode_to_frag(CInode *cur, CDir *parent, snapid_t first, int linkunlink, SnapRealm *prealm); - void _project_rstat_inode_to_frag(inode_t& inode, snapid_t ofirst, snapid_t last, + void _project_rstat_inode_to_frag(CInode::mempool_inode & inode, snapid_t ofirst, snapid_t last, CDir *parent, int linkunlink, bool update_inode); void project_rstat_frag_to_inode(nest_info_t& rstat, nest_info_t& accounted_rstat, snapid_t ofirst, snapid_t last, diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index ca0dc8cfed8..bbded0e8af6 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -437,7 +437,8 @@ int MDSDaemon::init() dout(10) << sizeof(MDSCacheObject) << "\tMDSCacheObject" << dendl; dout(10) << sizeof(CInode) << "\tCInode" << dendl; dout(10) << sizeof(elist<void*>::item) << "\t elist<>::item *7=" << 7*sizeof(elist<void*>::item) << dendl; - dout(10) << sizeof(inode_t) << "\t inode_t " << dendl; + dout(10) << sizeof(CInode::mempool_inode) << "\t inode " << dendl; + dout(10) << sizeof(CInode::mempool_old_inode) << "\t old_inode " << dendl; dout(10) << sizeof(nest_info_t) << "\t nest_info_t " << dendl; dout(10) << sizeof(frag_info_t) << "\t frag_info_t " << dendl; dout(10) << sizeof(SimpleLock) << "\t SimpleLock *5=" << 5*sizeof(SimpleLock) << dendl; diff --git a/src/mds/RecoveryQueue.cc b/src/mds/RecoveryQueue.cc index d205c6f278b..2e6ff30f2b8 100644 --- a/src/mds/RecoveryQueue.cc +++ b/src/mds/RecoveryQueue.cc @@ -87,7 +87,7 @@ void RecoveryQueue::advance() void RecoveryQueue::_start(CInode *in) { - inode_t *pi = in->get_projected_inode(); + auto pi = in->get_projected_inode(); // blech if (pi->client_ranges.size() && !pi->get_max_size()) { diff --git a/src/mds/Server.cc b/src/mds/Server.cc index d923fecd93a..7ada87c9da3 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -2665,11 +2665,13 @@ CInode* Server::prepare_new_inode(MDRequestRef& mdr, CDir *dir, inodeno_t useino bufferlist::iterator p = req->get_data().begin(); // xattrs on new inode? - map<string,bufferptr> xattrs; + CInode::mempool_xattr_map xattrs; decode(xattrs, p); - for (map<string,bufferptr>::iterator p = xattrs.begin(); p != xattrs.end(); ++p) { - dout(10) << "prepare_new_inode setting xattr " << p->first << dendl; - in->xattrs[p->first] = p->second; + for (const auto &p : xattrs) { + dout(10) << "prepare_new_inode setting xattr " << p.first << dendl; + auto em = in->xattrs.emplace(std::piecewise_construct, std::forward_as_tuple(p.first), std::forward_as_tuple(p.second)); + if (!em.second) + em.first->second = p.second; } } @@ -3373,7 +3375,7 @@ void Server::handle_client_open(MDRequestRef& mdr) return; // wait for pending truncate? - const inode_t *pi = cur->get_projected_inode(); + const auto pi = cur->get_projected_inode(); if (pi->is_truncating()) { dout(10) << " waiting for pending truncate from " << pi->truncate_from << " to " << pi->truncate_size << " to complete on " << *cur << dendl; @@ -4147,9 +4149,9 @@ void Server::handle_client_setattr(MDRequestRef& mdr) return; // trunc from bigger -> smaller? - inode_t *pi = cur->get_projected_inode(); + auto pip = cur->get_projected_inode(); - uint64_t old_size = std::max<uint64_t>(pi->size, req->head.args.setattr.old_size); + uint64_t old_size = std::max<uint64_t>(pip->size, req->head.args.setattr.old_size); // ENOSPC on growing file while full, but allow shrinks if (is_full && req->head.args.setattr.size > old_size) { @@ -4161,9 +4163,9 @@ void Server::handle_client_setattr(MDRequestRef& mdr) bool truncating_smaller = false; if (mask & CEPH_SETATTR_SIZE) { truncating_smaller = req->head.args.setattr.size < old_size; - if (truncating_smaller && pi->is_truncating()) { - dout(10) << " waiting for pending truncate from " << pi->truncate_from - << " to " << pi->truncate_size << " to complete on " << *cur << dendl; + if (truncating_smaller && pip->is_truncating()) { + dout(10) << " waiting for pending truncate from " << pip->truncate_from + << " to " << pip->truncate_size << " to complete on " << *cur << dendl; mds->locker->drop_locks(mdr.get()); mdr->drop_local_auth_pins(); cur->add_waiter(CInode::WAIT_TRUNC, new C_MDS_RetryRequest(mdcache, mdr)); @@ -4178,53 +4180,53 @@ void Server::handle_client_setattr(MDRequestRef& mdr) EUpdate *le = new EUpdate(mdlog, "setattr"); mdlog->start_entry(le); - pi = cur->project_inode(); + auto &pi = cur->project_inode(); if (mask & CEPH_SETATTR_UID) - pi->uid = req->head.args.setattr.uid; + pi.inode.uid = req->head.args.setattr.uid; if (mask & CEPH_SETATTR_GID) - pi->gid = req->head.args.setattr.gid; + pi.inode.gid = req->head.args.setattr.gid; if (mask & CEPH_SETATTR_MODE) - pi->mode = (pi->mode & ~07777) | (req->head.args.setattr.mode & 07777); + pi.inode.mode = (pi.inode.mode & ~07777) | (req->head.args.setattr.mode & 07777); else if ((mask & (CEPH_SETATTR_UID|CEPH_SETATTR_GID|CEPH_SETATTR_KILL_SGUID)) && - S_ISREG(pi->mode) && - (pi->mode & (S_IXUSR|S_IXGRP|S_IXOTH))) { - pi->mode &= ~(S_ISUID|S_ISGID); + S_ISREG(pi.inode.mode) && + (pi.inode.mode & (S_IXUSR|S_IXGRP|S_IXOTH))) { + pi.inode.mode &= ~(S_ISUID|S_ISGID); } if (mask & CEPH_SETATTR_MTIME) - pi->mtime = req->head.args.setattr.mtime; + pi.inode.mtime = req->head.args.setattr.mtime; if (mask & CEPH_SETATTR_ATIME) - pi->atime = req->head.args.setattr.atime; + pi.inode.atime = req->head.args.setattr.atime; if (mask & CEPH_SETATTR_BTIME) - pi->btime = req->head.args.setattr.btime; + pi.inode.btime = req->head.args.setattr.btime; if (mask & (CEPH_SETATTR_ATIME | CEPH_SETATTR_MTIME | CEPH_SETATTR_BTIME)) - pi->time_warp_seq++; // maybe not a timewarp, but still a serialization point. + pi.inode.time_warp_seq++; // maybe not a timewarp, but still a serialization point. if (mask & CEPH_SETATTR_SIZE) { if (truncating_smaller) { - pi->truncate(old_size, req->head.args.setattr.size); + pi.inode.truncate(old_size, req->head.args.setattr.size); le->metablob.add_truncate_start(cur->ino()); } else { - pi->size = req->head.args.setattr.size; - pi->rstat.rbytes = pi->size; + pi.inode.size = req->head.args.setattr.size; + pi.inode.rstat.rbytes = pi.inode.size; } - pi->mtime = mdr->get_op_stamp(); + pi.inode.mtime = mdr->get_op_stamp(); // adjust client's max_size? - map<client_t,client_writeable_range_t> new_ranges; + CInode::mempool_inode::client_range_map new_ranges; bool max_increased = false; - mds->locker->calc_new_client_ranges(cur, pi->size, &new_ranges, &max_increased); - if (pi->client_ranges != new_ranges) { - dout(10) << " client_ranges " << pi->client_ranges << " -> " << new_ranges << dendl; - pi->client_ranges = new_ranges; + mds->locker->calc_new_client_ranges(cur, pi.inode.size, &new_ranges, &max_increased); + if (pi.inode.client_ranges != new_ranges) { + dout(10) << " client_ranges " << pi.inode.client_ranges << " -> " << new_ranges << dendl; + pi.inode.client_ranges = new_ranges; changed_ranges = true; } } - pi->version = cur->pre_dirty(); - pi->ctime = mdr->get_op_stamp(); - pi->change_attr++; + pi.inode.version = cur->pre_dirty(); + pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.change_attr++; // log + wait le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); @@ -4257,22 +4259,22 @@ void Server::do_open_truncate(MDRequestRef& mdr, int cmode) mdlog->start_entry(le); // prepare - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); - pi->mtime = pi->ctime = mdr->get_op_stamp(); - pi->change_attr++; + auto &pi = in->project_inode(); + pi.inode.version = in->pre_dirty(); + pi.inode.mtime = pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.change_attr++; - uint64_t old_size = std::max<uint64_t>(pi->size, mdr->client_request->head.args.open.old_size); + uint64_t old_size = std::max<uint64_t>(pi.inode.size, mdr->client_request->head.args.open.old_size); if (old_size > 0) { - pi->truncate(old_size, 0); + pi.inode.truncate(old_size, 0); le->metablob.add_truncate_start(in->ino()); } bool changed_ranges = false; if (cmode & CEPH_FILE_MODE_WR) { - pi->client_ranges[client].range.first = 0; - pi->client_ranges[client].range.last = pi->get_layout_size_increment(); - pi->client_ranges[client].follows = realm->get_newest_seq(); + pi.inode.client_ranges[client].range.first = 0; + pi.inode.client_ranges[client].range.last = pi.inode.get_layout_size_increment(); + pi.inode.client_ranges[client].follows = realm->get_newest_seq(); changed_ranges = true; } @@ -4372,13 +4374,13 @@ void Server::handle_client_setlayout(MDRequestRef& mdr) return; // project update - inode_t *pi = cur->project_inode(); - pi->layout = layout; + auto &pi = cur->project_inode(); + pi.inode.layout = layout; // add the old pool to the inode - pi->add_old_pool(old_layout.pool_id); - pi->version = cur->pre_dirty(); - pi->ctime = mdr->get_op_stamp(); - pi->change_attr++; + pi.inode.add_old_pool(old_layout.pool_id); + pi.inode.version = cur->pre_dirty(); + pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.change_attr++; // log + wait mdr->ls = mdlog->get_current_segment(); @@ -4414,7 +4416,7 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr) return; // validate layout - const inode_t *old_pi = cur->get_projected_inode(); + const auto old_pi = cur->get_projected_inode(); file_layout_t layout; if (old_pi->has_layout()) layout = old_pi->layout; @@ -4461,9 +4463,9 @@ void Server::handle_client_setdirlayout(MDRequestRef& mdr) if (!check_access(mdr, cur, access)) return; - inode_t *pi = cur->project_inode(); - pi->layout = layout; - pi->version = cur->pre_dirty(); + auto &pi = cur->project_inode(); + pi.inode.layout = layout; + pi.inode.version = cur->pre_dirty(); // log + wait mdr->ls = mdlog->get_current_segment(); @@ -4674,7 +4676,7 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, << " bytes on " << *cur << dendl; - inode_t *pi = NULL; + CInode::mempool_inode *pip = nullptr; string rest; if (!check_access(mdr, cur, MAY_SET_VXATTR)) { @@ -4704,9 +4706,10 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; - pi = cur->project_inode(); - pi->layout = layout; + auto &pi = cur->project_inode(); + pi.inode.layout = layout; mdr->no_early_reply = true; + pip = &pi.inode; } else if (name.compare(0, 16, "ceph.file.layout") == 0) { if (!cur->is_file()) { respond_to_request(mdr, -EINVAL); @@ -4726,11 +4729,12 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; - pi = cur->project_inode(); - int64_t old_pool = pi->layout.pool_id; - pi->add_old_pool(old_pool); - pi->layout = layout; - pi->ctime = mdr->get_op_stamp(); + auto &pi = cur->project_inode(); + int64_t old_pool = pi.inode.layout.pool_id; + pi.inode.add_old_pool(old_pool); + pi.inode.layout = layout; + pi.inode.ctime = mdr->get_op_stamp(); + pip = &pi.inode; } else if (name.compare(0, 10, "ceph.quota") == 0) { if (!cur->is_dir() || cur->is_root()) { respond_to_request(mdr, -EINVAL); @@ -4755,15 +4759,18 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; - pi = cur->project_inode(); - pi->quota = quota; + auto &pi = cur->project_inode(false, new_realm); + pi.inode.quota = quota; if (new_realm) { SnapRealm *realm = cur->find_snaprealm(); - sr_t *newsnap = cur->project_snaprealm(realm->get_newest_seq()); - newsnap->seq = realm->get_newest_seq(); + auto seq = realm->get_newest_seq(); + auto &newsnap = *pi.snapnode; + newsnap.created = seq; + newsnap.seq = seq; } mdr->no_early_reply = true; + pip = &pi.inode; } else if (name.find("ceph.dir.pin") == 0) { if (!cur->is_dir() || cur->is_root()) { respond_to_request(mdr, -EINVAL); @@ -4784,19 +4791,20 @@ void Server::handle_set_vxattr(MDRequestRef& mdr, CInode *cur, if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; - pi = cur->project_inode(); + auto &pi = cur->project_inode(); cur->set_export_pin(rank); + pip = &pi.inode; } else { dout(10) << " unknown vxattr " << name << dendl; respond_to_request(mdr, -EINVAL); return; } - pi->change_attr++; - pi->ctime = mdr->get_op_stamp(); - pi->version = cur->pre_dirty(); + pip->change_attr++; + pip->ctime = mdr->get_op_stamp(); + pip->version = cur->pre_dirty(); if (cur->is_file()) - pi->update_backtrace(); + pip->update_backtrace(); // log + wait mdr->ls = mdlog->get_current_segment(); @@ -4842,9 +4850,9 @@ void Server::handle_remove_vxattr(MDRequestRef& mdr, CInode *cur, if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; - inode_t *pi = cur->project_inode(); - pi->clear_layout(); - pi->version = cur->pre_dirty(); + auto &pi = cur->project_inode(); + pi.inode.clear_layout(); + pi.inode.version = cur->pre_dirty(); // log + wait mdr->ls = mdlog->get_current_segment(); @@ -4925,7 +4933,7 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) if (!check_access(mdr, cur, MAY_WRITE)) return; - map<string, bufferptr> *pxattrs = cur->get_projected_xattrs(); + auto pxattrs = cur->get_projected_xattrs(); size_t len = req->get_data().length(); size_t inc = len + name.length(); @@ -4945,12 +4953,12 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) return; } - if ((flags & CEPH_XATTR_CREATE) && pxattrs->count(name)) { + if ((flags & CEPH_XATTR_CREATE) && pxattrs->count(mempool::mds_co::string(name))) { dout(10) << "setxattr '" << name << "' XATTR_CREATE and EEXIST on " << *cur << dendl; respond_to_request(mdr, -EEXIST); return; } - if ((flags & CEPH_XATTR_REPLACE) && !pxattrs->count(name)) { + if ((flags & CEPH_XATTR_REPLACE) && !pxattrs->count(mempool::mds_co::string(name))) { dout(10) << "setxattr '" << name << "' XATTR_REPLACE and ENODATA on " << *cur << dendl; respond_to_request(mdr, -ENODATA); return; @@ -4959,17 +4967,21 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) dout(10) << "setxattr '" << name << "' len " << len << " on " << *cur << dendl; // project update - map<string,bufferptr> *px = new map<string,bufferptr>; - inode_t *pi = cur->project_inode(px); - pi->version = cur->pre_dirty(); - pi->ctime = mdr->get_op_stamp(); - pi->change_attr++; - pi->xattr_version++; - px->erase(name); - if (!(flags & CEPH_XATTR_REMOVE)) { - (*px)[name] = buffer::create(len); + auto &pi = cur->project_inode(true); + pi.inode.version = cur->pre_dirty(); + pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.change_attr++; + pi.inode.xattr_version++; + auto &px = *pi.xattrs; + if ((flags & CEPH_XATTR_REMOVE)) { + px.erase(mempool::mds_co::string(name)); + } else { + bufferptr b = buffer::create(len); if (len) - req->get_data().copy(0, len, (*px)[name].c_str()); + req->get_data().copy(0, len, b.c_str()); + auto em = px.emplace(std::piecewise_construct, std::forward_as_tuple(mempool::mds_co::string(name)), std::forward_as_tuple(b)); + if (!em.second) + em.first->second = b; } // log + wait @@ -4986,8 +4998,8 @@ void Server::handle_client_setxattr(MDRequestRef& mdr) void Server::handle_client_removexattr(MDRequestRef& mdr) { MClientRequest *req = mdr->client_request; - string name(req->get_path2()); - set<SimpleLock*> rdlocks, wrlocks, xlocks; + std::string name(req->get_path2()); + std::set<SimpleLock*> rdlocks, wrlocks, xlocks; file_layout_t *dir_layout = NULL; CInode *cur; if (name == "ceph.dir.layout") @@ -5011,8 +5023,8 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; - map<string, bufferptr> *pxattrs = cur->get_projected_xattrs(); - if (pxattrs->count(name) == 0) { + auto pxattrs = cur->get_projected_xattrs(); + if (pxattrs->count(mempool::mds_co::string(name)) == 0) { dout(10) << "removexattr '" << name << "' and ENODATA on " << *cur << dendl; respond_to_request(mdr, -ENODATA); return; @@ -5021,13 +5033,13 @@ void Server::handle_client_removexattr(MDRequestRef& mdr) dout(10) << "removexattr '" << name << "' on " << *cur << dendl; // project update - map<string,bufferptr> *px = new map<string,bufferptr>; - inode_t *pi = cur->project_inode(px); - pi->version = cur->pre_dirty(); - pi->ctime = mdr->get_op_stamp(); - pi->change_attr++; - pi->xattr_version++; - px->erase(name); + auto &pi = cur->project_inode(true); + auto &px = *pi.xattrs; + pi.inode.version = cur->pre_dirty(); + pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.change_attr++; + pi.inode.xattr_version++; + px.erase(mempool::mds_co::string(name)); // log + wait mdr->ls = mdlog->get_current_segment(); @@ -5416,11 +5428,11 @@ void Server::_link_local(MDRequestRef& mdr, CDentry *dn, CInode *targeti) version_t tipv = targeti->pre_dirty(); // project inode update - inode_t *pi = targeti->project_inode(); - pi->nlink++; - pi->ctime = mdr->get_op_stamp(); - pi->change_attr++; - pi->version = tipv; + auto &pi = targeti->project_inode(); + pi.inode.nlink++; + pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.change_attr++; + pi.inode.version = tipv; // log + wait EUpdate *le = new EUpdate(mdlog, "link_local"); @@ -5648,16 +5660,16 @@ void Server::handle_slave_link_prep(MDRequestRef& mdr) ESlaveUpdate::OP_PREPARE, ESlaveUpdate::LINK); mdlog->start_entry(le); - inode_t *pi = dnl->get_inode()->project_inode(); + auto &pi = dnl->get_inode()->project_inode(); // update journaled target inode bool inc; if (mdr->slave_request->get_op() == MMDSSlaveRequest::OP_LINKPREP) { inc = true; - pi->nlink++; + pi.inode.nlink++; } else { inc = false; - pi->nlink--; + pi.inode.nlink--; } link_rollback rollback; @@ -5671,10 +5683,10 @@ void Server::handle_slave_link_prep(MDRequestRef& mdr) encode(rollback, le->rollback); mdr->more()->rollback_bl = le->rollback; - pi->ctime = mdr->get_op_stamp(); - pi->version = targeti->pre_dirty(); + pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.version = targeti->pre_dirty(); - dout(10) << " projected inode " << pi << " v " << pi->version << dendl; + dout(10) << " projected inode " << pi.inode.ino << " v " << pi.inode.version << dendl; // commit case mdcache->predirty_journal_parents(mdr, &le->commit, dnl->get_inode(), 0, PREDIRTY_SHALLOW|PREDIRTY_PRIMARY); @@ -5793,8 +5805,8 @@ void Server::do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& dout(10) << " target is " << *in << dendl; assert(!in->is_projected()); // live slave request hold versionlock xlock. - inode_t *pi = in->project_inode(); - pi->version = in->pre_dirty(); + auto &pi = in->project_inode(); + pi.inode.version = in->pre_dirty(); mut->add_projected_inode(in); // parent dir rctime @@ -5802,20 +5814,20 @@ void Server::do_link_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef& fnode_t *pf = parent->project_fnode(); mut->add_projected_fnode(parent); pf->version = parent->pre_dirty(); - if (pf->fragstat.mtime == pi->ctime) { + if (pf->fragstat.mtime == pi.inode.ctime) { pf->fragstat.mtime = rollback.old_dir_mtime; - if (pf->rstat.rctime == pi->ctime) + if (pf->rstat.rctime == pi.inode.ctime) pf->rstat.rctime = rollback.old_dir_rctime; mut->add_updated_lock(&parent->get_inode()->filelock); mut->add_updated_lock(&parent->get_inode()->nestlock); } // inode - pi->ctime = rollback.old_ctime; + pi.inode.ctime = rollback.old_ctime; if (rollback.was_inc) - pi->nlink--; + pi.inode.nlink--; else - pi->nlink++; + pi.inode.nlink++; // journal it ESlaveUpdate *le = new ESlaveUpdate(mdlog, "slave_link_rollback", rollback.reqid, master, @@ -6076,14 +6088,18 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, sna // the unlinked dentry dn->pre_dirty(); - inode_t *pi = in->project_inode(); - dn->make_path_string(pi->stray_prior_path, true); + auto &pi = in->project_inode(); + { + std::string t; + dn->make_path_string(t, true); + pi.inode.stray_prior_path = std::move(t); + } mdr->add_projected_inode(in); // do this _after_ my dn->pre_dirty().. we apply that one manually. - pi->version = in->pre_dirty(); - pi->ctime = mdr->get_op_stamp(); - pi->change_attr++; - pi->nlink--; - if (pi->nlink == 0) + pi.inode.version = in->pre_dirty(); + pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.change_attr++; + pi.inode.nlink--; + if (pi.inode.nlink == 0) in->state_set(CInode::STATE_ORPHAN); if (dnl->is_primary()) { @@ -6096,7 +6112,7 @@ void Server::_unlink_local(MDRequestRef& mdr, CDentry *dn, CDentry *straydn, sna if (in->snaprealm || follows + 1 > in->get_oldest_snap()) in->project_past_snaprealm_parent(straydn->get_dir()->inode->find_snaprealm()); - pi->update_backtrace(); + pi.inode.update_backtrace(); le->metablob.add_primary_dentry(straydn, in, true, true); } else { // remote link. update remote inode. @@ -7193,8 +7209,8 @@ void Server::_rename_prepare(MDRequestRef& mdr, } // prepare - inode_t *pi = 0; // renamed inode - inode_t *tpi = 0; // target/overwritten inode + CInode::mempool_inode *spi = 0; // renamed inode + CInode::mempool_inode *tpi = 0; // target/overwritten inode // target inode if (!linkmerge) { @@ -7202,16 +7218,18 @@ void Server::_rename_prepare(MDRequestRef& mdr, assert(straydn); // moving to straydn. // link--, and move. if (destdn->is_auth()) { - tpi = oldin->project_inode(); //project_snaprealm - tpi->version = straydn->pre_dirty(tpi->version); - tpi->update_backtrace(); + auto &pi= oldin->project_inode(); //project_snaprealm + pi.inode.version = straydn->pre_dirty(pi.inode.version); + pi.inode.update_backtrace(); + tpi = &pi.inode; } straydn->push_projected_linkage(oldin); } else if (destdnl->is_remote()) { // nlink-- targeti if (oldin->is_auth()) { - tpi = oldin->project_inode(); - tpi->version = oldin->pre_dirty(); + auto &pi = oldin->project_inode(); + pi.inode.version = oldin->pre_dirty(); + tpi = &pi.inode; } } } @@ -7225,14 +7243,16 @@ void Server::_rename_prepare(MDRequestRef& mdr, destdn->push_projected_linkage(srcdnl->get_remote_ino(), srcdnl->get_remote_d_type()); // srci if (srci->is_auth()) { - pi = srci->project_inode(); - pi->version = srci->pre_dirty(); + auto &pi = srci->project_inode(); + pi.inode.version = srci->pre_dirty(); + spi = &pi.inode; } } else { dout(10) << " will merge remote onto primary link" << dendl; if (destdn->is_auth()) { - pi = oldin->project_inode(); - pi->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldin->inode.version); + auto &pi = oldin->project_inode(); + pi.inode.version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldin->inode.version); + spi = &pi.inode; } } } else { // primary @@ -7256,10 +7276,11 @@ void Server::_rename_prepare(MDRequestRef& mdr, dout(10) << " noting renamed dir open frags " << metablob->renamed_dir_frags << dendl; } } - pi = srci->project_inode(); // project snaprealm if srcdnl->is_primary + auto &pi = srci->project_inode(); // project snaprealm if srcdnl->is_primary // & srcdnl->snaprealm - pi->version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldpv); - pi->update_backtrace(); + pi.inode.version = mdr->more()->pvmap[destdn] = destdn->pre_dirty(oldpv); + pi.inode.update_backtrace(); + spi = &pi.inode; } destdn->push_projected_linkage(srci); } @@ -7270,16 +7291,20 @@ void Server::_rename_prepare(MDRequestRef& mdr, srcdn->push_projected_linkage(); // push null linkage if (!silent) { - if (pi) { - pi->ctime = mdr->get_op_stamp(); - pi->change_attr++; + if (spi) { + spi->ctime = mdr->get_op_stamp(); + spi->change_attr++; if (linkmerge) - pi->nlink--; + spi->nlink--; } if (tpi) { tpi->ctime = mdr->get_op_stamp(); tpi->change_attr++; - destdn->make_path_string(tpi->stray_prior_path, true); + { + std::string t; + destdn->make_path_string(t, true); + tpi->stray_prior_path = std::move(t); + } tpi->nlink--; if (tpi->nlink == 0) oldin->state_set(CInode::STATE_ORPHAN); @@ -8163,22 +8188,23 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef rollback.orig_src.remote_d_type); } - inode_t *pi = 0; + CInode::mempool_inode *pip = 0; if (in) { if (in->authority().first == whoami) { - pi = in->project_inode(); + auto &pi = in->project_inode(); mut->add_projected_inode(in); - pi->version = in->pre_dirty(); + pi.inode.version = in->pre_dirty(); + pip = &pi.inode; } else - pi = in->get_projected_inode(); - if (pi->ctime == rollback.ctime) - pi->ctime = rollback.orig_src.old_ctime; + pip = in->get_projected_inode(); + if (pip->ctime == rollback.ctime) + pip->ctime = rollback.orig_src.old_ctime; } if (srcdn && srcdn->authority().first == whoami) { nest_info_t blah; _rollback_repair_dir(mut, srcdir, rollback.orig_src, rollback.ctime, - in ? in->is_dir() : false, 1, pi ? pi->accounted_rstat : blah); + in ? in->is_dir() : false, 1, pip ? pip->accounted_rstat : blah); } // repair dest @@ -8200,11 +8226,12 @@ void Server::do_rename_rollback(bufferlist &rbl, mds_rank_t master, MDRequestRef straydn->push_projected_linkage(); if (target) { - inode_t *ti = NULL; + CInode::mempool_inode *ti = NULL; if (target->authority().first == whoami) { - ti = target->project_inode(); + auto &pi = target->project_inode(); mut->add_projected_inode(target); - ti->version = target->pre_dirty(); + pi.inode.version = target->pre_dirty(); + ti = &pi.inode; } else ti = target->get_projected_inode(); if (ti->ctime == rollback.ctime) @@ -8661,15 +8688,18 @@ void Server::handle_client_mksnap(MDRequestRef& mdr) info.name = snapname; info.stamp = mdr->get_op_stamp(); - inode_t *pi = diri->project_inode(); - pi->ctime = info.stamp; - pi->version = diri->pre_dirty(); + auto &pi = diri->project_inode(false, true); + pi.inode.ctime = info.stamp; + pi.inode.version = diri->pre_dirty(); // project the snaprealm - sr_t *newsnap = diri->project_snaprealm(snapid); - newsnap->snaps[snapid] = info; - newsnap->seq = snapid; - newsnap->last_created = snapid; + auto &newsnap = *pi.snapnode; + newsnap.created = snapid; + auto em = newsnap.snaps.emplace(std::piecewise_construct, std::forward_as_tuple(snapid), std::forward_as_tuple(info)); + if (!em.second) + em.first->second = info; + newsnap.seq = snapid; + newsnap.last_created = snapid; // journal the inode changes mdr->ls = mdlog->get_current_segment(); @@ -8789,19 +8819,19 @@ void Server::handle_client_rmsnap(MDRequestRef& mdr) dout(10) << " stid is " << stid << ", seq is " << seq << dendl; // journal - inode_t *pi = diri->project_inode(); - pi->version = diri->pre_dirty(); - pi->ctime = mdr->get_op_stamp(); + auto &pi = diri->project_inode(false, true); + pi.inode.version = diri->pre_dirty(); + pi.inode.ctime = mdr->get_op_stamp(); mdr->ls = mdlog->get_current_segment(); EUpdate *le = new EUpdate(mdlog, "rmsnap"); mdlog->start_entry(le); // project the snaprealm - sr_t *newnode = diri->project_snaprealm(); - newnode->snaps.erase(snapid); - newnode->seq = seq; - newnode->last_destroyed = seq; + auto &newnode = *pi.snapnode; + newnode.snaps.erase(snapid); + newnode.seq = seq; + newnode.last_destroyed = seq; le->metablob.add_client_req(req->get_reqid(), req->get_oldest_client_tid()); le->metablob.add_table_transaction(TABLE_SNAP, stid); @@ -8929,14 +8959,15 @@ void Server::handle_client_renamesnap(MDRequestRef& mdr) dout(10) << " stid is " << stid << dendl; // journal - inode_t *pi = diri->project_inode(); - pi->ctime = mdr->get_op_stamp(); - pi->version = diri->pre_dirty(); + auto &pi = diri->project_inode(false, true); + pi.inode.ctime = mdr->get_op_stamp(); + pi.inode.version = diri->pre_dirty(); // project the snaprealm - sr_t *newsnap = diri->project_snaprealm(); - assert(newsnap->snaps.count(snapid)); - newsnap->snaps[snapid].name = dstname; + auto &newsnap = *pi.snapnode; + auto it = newsnap.snaps.find(snapid); + assert(it != newsnap.snaps.end()); + it->second.name = dstname; // journal the inode changes mdr->ls = mdlog->get_current_segment(); diff --git a/src/mds/StrayManager.cc b/src/mds/StrayManager.cc index e1cd070c18c..b7f94fa5ef0 100644 --- a/src/mds/StrayManager.cc +++ b/src/mds/StrayManager.cc @@ -125,11 +125,13 @@ void StrayManager::purge(CDentry *dn) to = std::max(in->inode.max_size_ever, to); } - inode_t *pi = in->get_projected_inode(); + auto pi = in->get_projected_inode(); item.size = to; item.layout = pi->layout; - item.old_pools = pi->old_pools; + item.old_pools.clear(); + for (const auto &p : pi->old_pools) + item.old_pools.insert(p); item.snapc = *snapc; } @@ -175,13 +177,13 @@ void StrayManager::_purge_stray_purged( EUpdate *le = new EUpdate(mds->mdlog, "purge_stray truncate"); mds->mdlog->start_entry(le); - inode_t *pi = in->project_inode(); - pi->size = 0; - pi->max_size_ever = 0; - pi->client_ranges.clear(); - pi->truncate_size = 0; - pi->truncate_from = 0; - pi->version = in->pre_dirty(); + auto &pi = in->project_inode(); + pi.inode.size = 0; + pi.inode.max_size_ever = 0; + pi.inode.client_ranges.clear(); + pi.inode.truncate_size = 0; + pi.inode.truncate_from = 0; + pi.inode.version = in->pre_dirty(); le->metablob.add_dir_context(dn->dir); le->metablob.add_primary_dentry(dn, in, true); @@ -463,10 +465,9 @@ bool StrayManager::_eval_stray(CDentry *dn, bool delay) if (!in->remote_parents.empty()) { // unlink any stale remote snap dentry. - for (compact_set<CDentry*>::iterator p = in->remote_parents.begin(); - p != in->remote_parents.end(); ) { - CDentry *remote_dn = *p; - ++p; + for (auto it = in->remote_parents.begin(); it != in->remote_parents.end(); ) { + CDentry *remote_dn = *it; + ++it; assert(remote_dn->last != CEPH_NOSNAP); remote_dn->unlink_remote(remote_dn->get_linkage()); } @@ -606,18 +607,17 @@ void StrayManager::_eval_stray_remote(CDentry *stray_dn, CDentry *remote_dn) /* If no remote_dn hinted, pick one arbitrarily */ if (remote_dn == NULL) { if (!stray_in->remote_parents.empty()) { - for (compact_set<CDentry*>::iterator p = stray_in->remote_parents.begin(); - p != stray_in->remote_parents.end(); - ++p) - if ((*p)->last == CEPH_NOSNAP && !(*p)->is_projected()) { - if ((*p)->is_auth()) { - remote_dn = *p; + for (const auto &dn : stray_in->remote_parents) { + if (dn->last == CEPH_NOSNAP && !dn->is_projected()) { + if (dn->is_auth()) { + remote_dn = dn; if (remote_dn->dir->can_auth_pin()) break; } else if (!remote_dn) { - remote_dn = *p; + remote_dn = dn; } } + } } if (!remote_dn) { dout(20) << __func__ << ": not reintegrating (no remote parents in cache)" << dendl; diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index 55db2db31ec..c38c2ccb528 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -65,27 +65,26 @@ public: static const int STATE_DIRTYPARENT = (1<<1); static const int STATE_DIRTYPOOL = (1<<2); static const int STATE_NEED_SNAPFLUSH = (1<<3); - typedef compact_map<snapid_t, old_inode_t> old_inodes_t; std::string dn; // dentry snapid_t dnfirst, dnlast; version_t dnv{0}; - inode_t inode; // if it's not + CInode::mempool_inode inode; // if it's not XXX should not be part of mempool; wait for std::pmr to simplify fragtree_t dirfragtree; - map<string,bufferptr> xattrs; - string symlink; + CInode::mempool_xattr_map xattrs; + std::string symlink; snapid_t oldest_snap; bufferlist snapbl; __u8 state{0}; - old_inodes_t old_inodes; + CInode::mempool_old_inode_map old_inodes; // XXX should not be part of mempool; wait for std::pmr to simplify fullbit(const fullbit& o); const fullbit& operator=(const fullbit& o); fullbit(std::string_view d, snapid_t df, snapid_t dl, - version_t v, const inode_t& i, const fragtree_t &dft, - const map<string,bufferptr> &xa, const string& sym, + version_t v, const CInode::mempool_inode& i, const fragtree_t &dft, + const CInode::mempool_xattr_map &xa, std::string_view sym, snapid_t os, const bufferlist &sbl, __u8 st, - const old_inodes_t *oi = NULL) : + const CInode::mempool_old_inode_map *oi = NULL) : dn(d), dnfirst(df), dnlast(dl), dnv(v), inode(i), xattrs(xa), oldest_snap(os), state(st) { @@ -446,7 +445,7 @@ private: in->last_journaled = event_seq; //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; - const inode_t *pi = in->get_projected_inode(); + const auto pi = in->get_projected_inode(); if ((state & fullbit::STATE_DIRTY) && pi->is_backtrace_updated()) state |= fullbit::STATE_DIRTYPARENT; @@ -494,8 +493,8 @@ private: add_primary_dentry(dn, 0, dirty, dirty_parent, dirty_pool); } - void add_root(bool dirty, CInode *in, const inode_t *pi=0, fragtree_t *pdft=0, bufferlist *psnapbl=0, - map<string,bufferptr> *px=0) { + void add_root(bool dirty, CInode *in, const CInode::mempool_inode *pi=0, fragtree_t *pdft=0, bufferlist *psnapbl=0, + CInode::mempool_xattr_map *px=0) { in->last_journaled = event_seq; //cout << "journaling " << in->inode.ino << " at " << my_offset << std::endl; diff --git a/src/mds/journal.cc b/src/mds/journal.cc index a0feaed3bec..aaefd373151 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -501,10 +501,9 @@ void EMetaBlob::fullbit::dump(Formatter *f) const inode.dump(f); f->close_section(); // inode f->open_object_section("xattrs"); - for (map<string, bufferptr>::const_iterator iter = xattrs.begin(); - iter != xattrs.end(); ++iter) { - string s(iter->second.c_str(), iter->second.length()); - f->dump_string(iter->first.c_str(), s); + for (const auto &p : xattrs) { + std::string s(p.second.c_str(), p.second.length()); + f->dump_string(p.first.c_str(), s); } f->close_section(); // xattrs if (inode.is_symlink()) { @@ -523,12 +522,10 @@ void EMetaBlob::fullbit::dump(Formatter *f) const f->dump_string("state", state_string()); if (!old_inodes.empty()) { f->open_array_section("old inodes"); - for (old_inodes_t::const_iterator iter = old_inodes.begin(); - iter != old_inodes.end(); - ++iter) { + for (const auto &p : old_inodes) { f->open_object_section("inode"); - f->dump_int("snapid", iter->first); - iter->second.dump(f); + f->dump_int("snapid", p.first); + p.second.dump(f); f->close_section(); // inode } f->close_section(); // old inodes @@ -537,9 +534,9 @@ void EMetaBlob::fullbit::dump(Formatter *f) const void EMetaBlob::fullbit::generate_test_instances(list<EMetaBlob::fullbit*>& ls) { - inode_t inode; + CInode::mempool_inode inode; fragtree_t fragtree; - map<string,bufferptr> empty_xattrs; + CInode::mempool_xattr_map empty_xattrs; bufferlist empty_snapbl; fullbit *sample = new fullbit("/testdn", 0, 0, 0, inode, fragtree, empty_xattrs, "", 0, empty_snapbl, diff --git a/src/mds/mdstypes.cc b/src/mds/mdstypes.cc index 8658f1b6851..a641f2812bf 100644 --- a/src/mds/mdstypes.cc +++ b/src/mds/mdstypes.cc @@ -241,338 +241,6 @@ void inline_data_t::decode(bufferlist::iterator &p) free_data(); } -/* - * inode_t - */ -void inode_t::encode(bufferlist &bl, uint64_t features) const -{ - ENCODE_START(15, 6, bl); - - encode(ino, bl); - encode(rdev, bl); - encode(ctime, bl); - - encode(mode, bl); - encode(uid, bl); - encode(gid, bl); - - encode(nlink, bl); - { - // removed field - bool anchored = 0; - encode(anchored, bl); - } - - encode(dir_layout, bl); - encode(layout, bl, features); - encode(size, bl); - encode(truncate_seq, bl); - encode(truncate_size, bl); - encode(truncate_from, bl); - encode(truncate_pending, bl); - encode(mtime, bl); - encode(atime, bl); - encode(time_warp_seq, bl); - encode(client_ranges, bl); - - encode(dirstat, bl); - encode(rstat, bl); - encode(accounted_rstat, bl); - - encode(version, bl); - encode(file_data_version, bl); - encode(xattr_version, bl); - encode(backtrace_version, bl); - encode(old_pools, bl); - encode(max_size_ever, bl); - encode(inline_data, bl); - encode(quota, bl); - - encode(stray_prior_path, bl); - - encode(last_scrub_version, bl); - encode(last_scrub_stamp, bl); - - encode(btime, bl); - encode(change_attr, bl); - - encode(export_pin, bl); - - ENCODE_FINISH(bl); -} - -void inode_t::decode(bufferlist::iterator &p) -{ - DECODE_START_LEGACY_COMPAT_LEN(15, 6, 6, p); - - decode(ino, p); - decode(rdev, p); - decode(ctime, p); - - decode(mode, p); - decode(uid, p); - decode(gid, p); - - decode(nlink, p); - { - bool anchored; - decode(anchored, p); - } - - if (struct_v >= 4) - decode(dir_layout, p); - else - memset(&dir_layout, 0, sizeof(dir_layout)); - decode(layout, p); - decode(size, p); - decode(truncate_seq, p); - decode(truncate_size, p); - decode(truncate_from, p); - if (struct_v >= 5) - decode(truncate_pending, p); - else - truncate_pending = 0; - decode(mtime, p); - decode(atime, p); - decode(time_warp_seq, p); - if (struct_v >= 3) { - decode(client_ranges, p); - } else { - map<client_t, client_writeable_range_t::byte_range_t> m; - decode(m, p); - for (map<client_t, client_writeable_range_t::byte_range_t>::iterator - q = m.begin(); q != m.end(); ++q) - client_ranges[q->first].range = q->second; - } - - decode(dirstat, p); - decode(rstat, p); - decode(accounted_rstat, p); - - decode(version, p); - decode(file_data_version, p); - decode(xattr_version, p); - if (struct_v >= 2) - decode(backtrace_version, p); - if (struct_v >= 7) - decode(old_pools, p); - if (struct_v >= 8) - decode(max_size_ever, p); - if (struct_v >= 9) { - decode(inline_data, p); - } else { - inline_data.version = CEPH_INLINE_NONE; - } - if (struct_v < 10) - backtrace_version = 0; // force update backtrace - if (struct_v >= 11) - decode(quota, p); - - if (struct_v >= 12) { - decode(stray_prior_path, p); - } - - if (struct_v >= 13) { - decode(last_scrub_version, p); - decode(last_scrub_stamp, p); - } - if (struct_v >= 14) { - decode(btime, p); - decode(change_attr, p); - } else { - btime = utime_t(); - change_attr = 0; - } - - if (struct_v >= 15) { - decode(export_pin, p); - } else { - export_pin = MDS_RANK_NONE; - } - - DECODE_FINISH(p); -} - -void inode_t::dump(Formatter *f) const -{ - f->dump_unsigned("ino", ino); - f->dump_unsigned("rdev", rdev); - f->dump_stream("ctime") << ctime; - f->dump_stream("btime") << btime; - f->dump_unsigned("mode", mode); - f->dump_unsigned("uid", uid); - f->dump_unsigned("gid", gid); - f->dump_unsigned("nlink", nlink); - - f->open_object_section("dir_layout"); - ::dump(dir_layout, f); - f->close_section(); - - f->dump_object("layout", layout); - - f->open_array_section("old_pools"); - for (compact_set<int64_t>::const_iterator i = old_pools.begin(); - i != old_pools.end(); - ++i) - f->dump_int("pool", *i); - f->close_section(); - - f->dump_unsigned("size", size); - f->dump_unsigned("truncate_seq", truncate_seq); - f->dump_unsigned("truncate_size", truncate_size); - f->dump_unsigned("truncate_from", truncate_from); - f->dump_unsigned("truncate_pending", truncate_pending); - f->dump_stream("mtime") << mtime; - f->dump_stream("atime") << atime; - f->dump_unsigned("time_warp_seq", time_warp_seq); - f->dump_unsigned("change_attr", change_attr); - f->dump_int("export_pin", export_pin); - - f->open_array_section("client_ranges"); - for (map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin(); p != client_ranges.end(); ++p) { - f->open_object_section("client"); - f->dump_unsigned("client", p->first.v); - p->second.dump(f); - f->close_section(); - } - f->close_section(); - - f->open_object_section("dirstat"); - dirstat.dump(f); - f->close_section(); - - f->open_object_section("rstat"); - rstat.dump(f); - f->close_section(); - - f->open_object_section("accounted_rstat"); - accounted_rstat.dump(f); - f->close_section(); - - f->dump_unsigned("version", version); - f->dump_unsigned("file_data_version", file_data_version); - f->dump_unsigned("xattr_version", xattr_version); - f->dump_unsigned("backtrace_version", backtrace_version); - - f->dump_string("stray_prior_path", stray_prior_path); -} - -void inode_t::generate_test_instances(list<inode_t*>& ls) -{ - ls.push_back(new inode_t); - ls.push_back(new inode_t); - ls.back()->ino = 1; - // i am lazy. -} - -int inode_t::compare(const inode_t &other, bool *divergent) const -{ - assert(ino == other.ino); - *divergent = false; - if (version == other.version) { - if (rdev != other.rdev || - ctime != other.ctime || - btime != other.btime || - mode != other.mode || - uid != other.uid || - gid != other.gid || - nlink != other.nlink || - memcmp(&dir_layout, &other.dir_layout, sizeof(dir_layout)) || - layout != other.layout || - old_pools != other.old_pools || - size != other.size || - max_size_ever != other.max_size_ever || - truncate_seq != other.truncate_seq || - truncate_size != other.truncate_size || - truncate_from != other.truncate_from || - truncate_pending != other.truncate_pending || - change_attr != other.change_attr || - mtime != other.mtime || - atime != other.atime || - time_warp_seq != other.time_warp_seq || - inline_data != other.inline_data || - client_ranges != other.client_ranges || - !(dirstat == other.dirstat) || - !(rstat == other.rstat) || - !(accounted_rstat == other.accounted_rstat) || - file_data_version != other.file_data_version || - xattr_version != other.xattr_version || - backtrace_version != other.backtrace_version) { - *divergent = true; - } - return 0; - } else if (version > other.version) { - *divergent = !older_is_consistent(other); - return 1; - } else { - assert(version < other.version); - *divergent = !other.older_is_consistent(*this); - return -1; - } -} - -bool inode_t::older_is_consistent(const inode_t &other) const -{ - if (max_size_ever < other.max_size_ever || - truncate_seq < other.truncate_seq || - time_warp_seq < other.time_warp_seq || - inline_data.version < other.inline_data.version || - dirstat.version < other.dirstat.version || - rstat.version < other.rstat.version || - accounted_rstat.version < other.accounted_rstat.version || - file_data_version < other.file_data_version || - xattr_version < other.xattr_version || - backtrace_version < other.backtrace_version) { - return false; - } - return true; -} - -/* - * old_inode_t - */ -void old_inode_t::encode(bufferlist& bl, uint64_t features) const -{ - ENCODE_START(2, 2, bl); - encode(first, bl); - encode(inode, bl, features); - encode(xattrs, bl); - ENCODE_FINISH(bl); -} - -void old_inode_t::decode(bufferlist::iterator& bl) -{ - DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); - decode(first, bl); - decode(inode, bl); - decode(xattrs, bl); - DECODE_FINISH(bl); -} - -void old_inode_t::dump(Formatter *f) const -{ - f->dump_unsigned("first", first); - inode.dump(f); - f->open_object_section("xattrs"); - for (map<string,bufferptr>::const_iterator p = xattrs.begin(); p != xattrs.end(); ++p) { - string v(p->second.c_str(), p->second.length()); - f->dump_string(p->first.c_str(), v); - } - f->close_section(); -} - -void old_inode_t::generate_test_instances(list<old_inode_t*>& ls) -{ - ls.push_back(new old_inode_t); - ls.push_back(new old_inode_t); - ls.back()->first = 2; - list<inode_t*> ils; - inode_t::generate_test_instances(ils); - ls.back()->inode = *ils.back(); - ls.back()->xattrs["user.foo"] = buffer::copy("asdf", 4); - ls.back()->xattrs["user.unprintable"] = buffer::copy("\000\001\002", 3); -} - /* * fnode_t @@ -926,25 +594,27 @@ void mds_table_pending_t::generate_test_instances(list<mds_table_pending_t*>& ls void inode_load_vec_t::encode(bufferlist &bl) const { ENCODE_START(2, 2, bl); - for (int i=0; i<NUM; i++) - encode(vec[i], bl); + for (const auto &i : vec) { + encode(i, bl); + } ENCODE_FINISH(bl); } void inode_load_vec_t::decode(const utime_t &t, bufferlist::iterator &p) { DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p); - for (int i=0; i<NUM; i++) - decode(vec[i], t, p); + for (auto &i : vec) { + decode(i, t, p); + } DECODE_FINISH(p); } void inode_load_vec_t::dump(Formatter *f) { f->open_array_section("Decay Counters"); - for (vector<DecayCounter>::const_iterator i = vec.begin(); i != vec.end(); ++i) { + for (const auto &i : vec) { f->open_object_section("Decay Counter"); - i->dump(f); + i.dump(f); f->close_section(); } f->close_section(); @@ -963,9 +633,9 @@ void inode_load_vec_t::generate_test_instances(list<inode_load_vec_t*>& ls) void dirfrag_load_vec_t::dump(Formatter *f) const { f->open_array_section("Decay Counters"); - for (vector<DecayCounter>::const_iterator i = vec.begin(); i != vec.end(); ++i) { + for (const auto &i : vec) { f->open_object_section("Decay Counter"); - i->dump(f); + i.dump(f); f->close_section(); } f->close_section(); diff --git a/src/mds/mdstypes.h b/src/mds/mdstypes.h index 380cea18f42..7ddf2d1f13c 100644 --- a/src/mds/mdstypes.h +++ b/src/mds/mdstypes.h @@ -380,8 +380,8 @@ inline std::ostream& operator<<(std::ostream &out, const vinodeno_t &vino) { */ struct client_writeable_range_t { struct byte_range_t { - uint64_t first, last; // interval client can write to - byte_range_t() : first(0), last(0) {} + uint64_t first = 0, last = 0; // interval client can write to + byte_range_t() {} }; byte_range_t range; @@ -392,7 +392,7 @@ struct client_writeable_range_t { void encode(bufferlist &bl) const; void decode(bufferlist::iterator& bl); void dump(Formatter *f) const; - static void generate_test_instances(list<client_writeable_range_t*>& ls); + static void generate_test_instances(std::list<client_writeable_range_t*>& ls); }; inline void decode(client_writeable_range_t::byte_range_t& range, bufferlist::iterator& bl) { @@ -462,6 +462,7 @@ typedef uint32_t damage_flags_t; /* * inode_t */ +template<template<typename> class Allocator = std::allocator> struct inode_t { /** * *************** @@ -487,7 +488,7 @@ struct inode_t { // file (data access) ceph_dir_layout dir_layout; // [dir only] file_layout_t layout; - compact_set <int64_t> old_pools; + compact_set<int64_t, std::less<int64_t>, Allocator<int64_t>> old_pools; uint64_t size = 0; // on directory, # dentries uint64_t max_size_ever = 0; // max size the file has ever been uint32_t truncate_seq = 0; @@ -496,12 +497,13 @@ struct inode_t { utime_t mtime; // file data modify time. utime_t atime; // file data access time. uint32_t time_warp_seq = 0; // count of (potential) mtime/atime timewarps (i.e., utimes()) - inline_data_t inline_data; + inline_data_t inline_data; // FIXME check // change attribute uint64_t change_attr = 0; - std::map<client_t,client_writeable_range_t> client_ranges; // client(s) can write to these ranges + using client_range_map = std::map<client_t,client_writeable_range_t,std::less<client_t>,Allocator<std::pair<const client_t,client_writeable_range_t>>>; + client_range_map client_ranges; // client(s) can write to these ranges // dirfrag, recursive accountin frag_info_t dirstat; // protected by my filelock @@ -524,7 +526,7 @@ struct inode_t { snapid_t oldest_snap; - string stray_prior_path; //stores path before unlink + std::basic_string<char,std::char_traits<char>,Allocator<char>> stray_prior_path; //stores path before unlink inode_t() { @@ -610,7 +612,7 @@ struct inode_t { void encode(bufferlist &bl, uint64_t features) const; void decode(bufferlist::iterator& bl); void dump(Formatter *f) const; - static void generate_test_instances(list<inode_t*>& ls); + static void generate_test_instances(std::list<inode_t*>& ls); /** * Compare this inode_t with another that represent *the same inode* * at different points in time. @@ -627,23 +629,392 @@ struct inode_t { private: bool older_is_consistent(const inode_t &other) const; }; -WRITE_CLASS_ENCODER_FEATURES(inode_t) +// These methods may be moved back to mdstypes.cc when we have pmr +template<template<typename> class Allocator> +void inode_t<Allocator>::encode(bufferlist &bl, uint64_t features) const +{ + ENCODE_START(15, 6, bl); + + encode(ino, bl); + encode(rdev, bl); + encode(ctime, bl); + + encode(mode, bl); + encode(uid, bl); + encode(gid, bl); + + encode(nlink, bl); + { + // removed field + bool anchored = 0; + encode(anchored, bl); + } + + encode(dir_layout, bl); + encode(layout, bl, features); + encode(size, bl); + encode(truncate_seq, bl); + encode(truncate_size, bl); + encode(truncate_from, bl); + encode(truncate_pending, bl); + encode(mtime, bl); + encode(atime, bl); + encode(time_warp_seq, bl); + encode(client_ranges, bl); + + encode(dirstat, bl); + encode(rstat, bl); + encode(accounted_rstat, bl); + + encode(version, bl); + encode(file_data_version, bl); + encode(xattr_version, bl); + encode(backtrace_version, bl); + encode(old_pools, bl); + encode(max_size_ever, bl); + encode(inline_data, bl); + encode(quota, bl); + + encode(stray_prior_path, bl); + + encode(last_scrub_version, bl); + encode(last_scrub_stamp, bl); + + encode(btime, bl); + encode(change_attr, bl); + + encode(export_pin, bl); + + ENCODE_FINISH(bl); +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::decode(bufferlist::iterator &p) +{ + DECODE_START_LEGACY_COMPAT_LEN(15, 6, 6, p); + + decode(ino, p); + decode(rdev, p); + decode(ctime, p); + + decode(mode, p); + decode(uid, p); + decode(gid, p); + + decode(nlink, p); + { + bool anchored; + decode(anchored, p); + } + + if (struct_v >= 4) + decode(dir_layout, p); + else + memset(&dir_layout, 0, sizeof(dir_layout)); + decode(layout, p); + decode(size, p); + decode(truncate_seq, p); + decode(truncate_size, p); + decode(truncate_from, p); + if (struct_v >= 5) + decode(truncate_pending, p); + else + truncate_pending = 0; + decode(mtime, p); + decode(atime, p); + decode(time_warp_seq, p); + if (struct_v >= 3) { + decode(client_ranges, p); + } else { + map<client_t, client_writeable_range_t::byte_range_t> m; + decode(m, p); + for (map<client_t, client_writeable_range_t::byte_range_t>::iterator + q = m.begin(); q != m.end(); ++q) + client_ranges[q->first].range = q->second; + } + + decode(dirstat, p); + decode(rstat, p); + decode(accounted_rstat, p); + + decode(version, p); + decode(file_data_version, p); + decode(xattr_version, p); + if (struct_v >= 2) + decode(backtrace_version, p); + if (struct_v >= 7) + decode(old_pools, p); + if (struct_v >= 8) + decode(max_size_ever, p); + if (struct_v >= 9) { + decode(inline_data, p); + } else { + inline_data.version = CEPH_INLINE_NONE; + } + if (struct_v < 10) + backtrace_version = 0; // force update backtrace + if (struct_v >= 11) + decode(quota, p); + + if (struct_v >= 12) { + std::string tmp; + decode(tmp, p); + stray_prior_path = std::string_view(tmp); + } + + if (struct_v >= 13) { + decode(last_scrub_version, p); + decode(last_scrub_stamp, p); + } + if (struct_v >= 14) { + decode(btime, p); + decode(change_attr, p); + } else { + btime = utime_t(); + change_attr = 0; + } + + if (struct_v >= 15) { + decode(export_pin, p); + } else { + export_pin = MDS_RANK_NONE; + } + + DECODE_FINISH(p); +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::dump(Formatter *f) const +{ + f->dump_unsigned("ino", ino); + f->dump_unsigned("rdev", rdev); + f->dump_stream("ctime") << ctime; + f->dump_stream("btime") << btime; + f->dump_unsigned("mode", mode); + f->dump_unsigned("uid", uid); + f->dump_unsigned("gid", gid); + f->dump_unsigned("nlink", nlink); + + f->open_object_section("dir_layout"); + ::dump(dir_layout, f); + f->close_section(); + + f->dump_object("layout", layout); + + f->open_array_section("old_pools"); + for (const auto &p : old_pools) { + f->dump_int("pool", p); + } + f->close_section(); + + f->dump_unsigned("size", size); + f->dump_unsigned("truncate_seq", truncate_seq); + f->dump_unsigned("truncate_size", truncate_size); + f->dump_unsigned("truncate_from", truncate_from); + f->dump_unsigned("truncate_pending", truncate_pending); + f->dump_stream("mtime") << mtime; + f->dump_stream("atime") << atime; + f->dump_unsigned("time_warp_seq", time_warp_seq); + f->dump_unsigned("change_attr", change_attr); + f->dump_int("export_pin", export_pin); + + f->open_array_section("client_ranges"); + for (const auto &p : client_ranges) { + f->open_object_section("client"); + f->dump_unsigned("client", p.first.v); + p.second.dump(f); + f->close_section(); + } + f->close_section(); + + f->open_object_section("dirstat"); + dirstat.dump(f); + f->close_section(); + + f->open_object_section("rstat"); + rstat.dump(f); + f->close_section(); + + f->open_object_section("accounted_rstat"); + accounted_rstat.dump(f); + f->close_section(); + + f->dump_unsigned("version", version); + f->dump_unsigned("file_data_version", file_data_version); + f->dump_unsigned("xattr_version", xattr_version); + f->dump_unsigned("backtrace_version", backtrace_version); + + f->dump_string("stray_prior_path", stray_prior_path); +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::generate_test_instances(list<inode_t*>& ls) +{ + ls.push_back(new inode_t<Allocator>); + ls.push_back(new inode_t<Allocator>); + ls.back()->ino = 1; + // i am lazy. +} + +template<template<typename> class Allocator> +int inode_t<Allocator>::compare(const inode_t<Allocator> &other, bool *divergent) const +{ + assert(ino == other.ino); + *divergent = false; + if (version == other.version) { + if (rdev != other.rdev || + ctime != other.ctime || + btime != other.btime || + mode != other.mode || + uid != other.uid || + gid != other.gid || + nlink != other.nlink || + memcmp(&dir_layout, &other.dir_layout, sizeof(dir_layout)) || + layout != other.layout || + old_pools != other.old_pools || + size != other.size || + max_size_ever != other.max_size_ever || + truncate_seq != other.truncate_seq || + truncate_size != other.truncate_size || + truncate_from != other.truncate_from || + truncate_pending != other.truncate_pending || + change_attr != other.change_attr || + mtime != other.mtime || + atime != other.atime || + time_warp_seq != other.time_warp_seq || + inline_data != other.inline_data || + client_ranges != other.client_ranges || + !(dirstat == other.dirstat) || + !(rstat == other.rstat) || + !(accounted_rstat == other.accounted_rstat) || + file_data_version != other.file_data_version || + xattr_version != other.xattr_version || + backtrace_version != other.backtrace_version) { + *divergent = true; + } + return 0; + } else if (version > other.version) { + *divergent = !older_is_consistent(other); + return 1; + } else { + assert(version < other.version); + *divergent = !other.older_is_consistent(*this); + return -1; + } +} + +template<template<typename> class Allocator> +bool inode_t<Allocator>::older_is_consistent(const inode_t<Allocator> &other) const +{ + if (max_size_ever < other.max_size_ever || + truncate_seq < other.truncate_seq || + time_warp_seq < other.time_warp_seq || + inline_data.version < other.inline_data.version || + dirstat.version < other.dirstat.version || + rstat.version < other.rstat.version || + accounted_rstat.version < other.accounted_rstat.version || + file_data_version < other.file_data_version || + xattr_version < other.xattr_version || + backtrace_version < other.backtrace_version) { + return false; + } + return true; +} + +template<template<typename> class Allocator> +inline void encode(const inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) +{ + ENCODE_DUMP_PRE(); + c.encode(bl, features); + ENCODE_DUMP_POST(cl); +} +template<template<typename> class Allocator> +inline void decode(inode_t<Allocator> &c, ::ceph::bufferlist::iterator &p) +{ + c.decode(p); +} + +template<template<typename> class Allocator> +using alloc_string = std::basic_string<char,std::char_traits<char>,Allocator<char>>; + +template<template<typename> class Allocator> +using xattr_map = compact_map<alloc_string<Allocator>, bufferptr, std::less<alloc_string<Allocator>>, Allocator<std::pair<const alloc_string<Allocator>, bufferptr>>>; // FIXME bufferptr not in mempool /* * old_inode_t */ +template<template<typename> class Allocator = std::allocator> struct old_inode_t { snapid_t first; - inode_t inode; - std::map<string,bufferptr> xattrs; + inode_t<Allocator> inode; + xattr_map<Allocator> xattrs; void encode(bufferlist &bl, uint64_t features) const; void decode(bufferlist::iterator& bl); void dump(Formatter *f) const; - static void generate_test_instances(list<old_inode_t*>& ls); + static void generate_test_instances(std::list<old_inode_t*>& ls); }; -WRITE_CLASS_ENCODER_FEATURES(old_inode_t) + +// These methods may be moved back to mdstypes.cc when we have pmr +template<template<typename> class Allocator> +void old_inode_t<Allocator>::encode(bufferlist& bl, uint64_t features) const +{ + ENCODE_START(2, 2, bl); + encode(first, bl); + encode(inode, bl, features); + encode(xattrs, bl); + ENCODE_FINISH(bl); +} + +template<template<typename> class Allocator> +void old_inode_t<Allocator>::decode(bufferlist::iterator& bl) +{ + DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); + decode(first, bl); + decode(inode, bl); + decode(xattrs, bl); + DECODE_FINISH(bl); +} + +template<template<typename> class Allocator> +void old_inode_t<Allocator>::dump(Formatter *f) const +{ + f->dump_unsigned("first", first); + inode.dump(f); + f->open_object_section("xattrs"); + for (const auto &p : xattrs) { + std::string v(p.second.c_str(), p.second.length()); + f->dump_string(p.first.c_str(), v); + } + f->close_section(); +} + +template<template<typename> class Allocator> +void old_inode_t<Allocator>::generate_test_instances(std::list<old_inode_t<Allocator>*>& ls) +{ + ls.push_back(new old_inode_t<Allocator>); + ls.push_back(new old_inode_t<Allocator>); + ls.back()->first = 2; + std::list<inode_t<Allocator>*> ils; + inode_t<Allocator>::generate_test_instances(ils); + ls.back()->inode = *ils.back(); + ls.back()->xattrs["user.foo"] = buffer::copy("asdf", 4); + ls.back()->xattrs["user.unprintable"] = buffer::copy("\000\001\002", 3); +} + +template<template<typename> class Allocator> +inline void encode(const old_inode_t<Allocator> &c, ::ceph::bufferlist &bl, uint64_t features) +{ + ENCODE_DUMP_PRE(); + c.encode(bl, features); + ENCODE_DUMP_POST(cl); +} +template<template<typename> class Allocator> +inline void decode(old_inode_t<Allocator> &c, ::ceph::bufferlist::iterator &p) +{ + c.decode(p); +} /* @@ -1037,15 +1408,13 @@ namespace std { class inode_load_vec_t { static const int NUM = 2; - std::vector < DecayCounter > vec; + std::array<DecayCounter, NUM> vec; public: explicit inode_load_vec_t(const utime_t &now) - : vec(NUM, DecayCounter(now)) + : vec{DecayCounter(now), DecayCounter(now)} {} // for dencoder infrastructure - inode_load_vec_t() : - vec(NUM, DecayCounter()) - {} + inode_load_vec_t() {} DecayCounter &get(int t) { assert(t < NUM); return vec[t]; @@ -1074,24 +1443,30 @@ inline void decode(inode_load_vec_t & c, bufferlist::iterator &p) { class dirfrag_load_vec_t { public: static const int NUM = 5; - std::vector < DecayCounter > vec; + std::array<DecayCounter, NUM> vec; explicit dirfrag_load_vec_t(const utime_t &now) - : vec(NUM, DecayCounter(now)) - { } - // for dencoder infrastructure - dirfrag_load_vec_t() - : vec(NUM, DecayCounter()) + : vec{ + DecayCounter(now), + DecayCounter(now), + DecayCounter(now), + DecayCounter(now), + DecayCounter(now) + } {} + // for dencoder infrastructure + dirfrag_load_vec_t() {} void encode(bufferlist &bl) const { ENCODE_START(2, 2, bl); - for (int i=0; i<NUM; i++) - encode(vec[i], bl); + for (const auto &i : vec) { + encode(i, bl); + } ENCODE_FINISH(bl); } void decode(const utime_t &t, bufferlist::iterator &p) { DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, p); - for (int i=0; i<NUM; i++) - decode(vec[i], t, p); + for (auto &i : vec) { + decode(i, t, p); + } DECODE_FINISH(p); } // for dencoder infrastructure @@ -1108,12 +1483,14 @@ public: return vec[t]; } void adjust(utime_t now, const DecayRate& rate, double d) { - for (int i=0; i<NUM; i++) - vec[i].adjust(now, rate, d); + for (auto &i : vec) { + i.adjust(now, rate, d); + } } void zero(utime_t now) { - for (int i=0; i<NUM; i++) - vec[i].reset(now); + for (auto &i : vec) { + i.reset(now); + } } double meta_load(utime_t now, const DecayRate& rate) { return diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h index 9748345ffc6..d725b199f0a 100644 --- a/src/test/encoding/types.h +++ b/src/test/encoding/types.h @@ -188,8 +188,8 @@ TYPE(sr_t) TYPE(frag_info_t) TYPE(nest_info_t) TYPE(client_writeable_range_t) -TYPE_FEATUREFUL(inode_t) -TYPE_FEATUREFUL(old_inode_t) +TYPE_FEATUREFUL(inode_t<std::allocator>) +TYPE_FEATUREFUL(old_inode_t<std::allocator>) TYPE(fnode_t) TYPE(old_rstat_t) TYPE_FEATUREFUL(session_info_t) diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 0d3e3ace724..f8bf524097a 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -909,7 +909,7 @@ int DataScan::scan_links() int nlink; bool is_dir; link_info_t() : version(0), nlink(0), is_dir(false) {} - link_info_t(inodeno_t di, frag_t df, const string& n, const inode_t i) : + link_info_t(inodeno_t di, frag_t df, const string& n, const CInode::mempool_inode& i) : dirino(di), frag(df), name(n), version(i.version), nlink(i.nlink), is_dir(S_IFDIR & i.mode) {} dirfrag_t dirfrag() const { |