summaryrefslogtreecommitdiffstats
path: root/src/mds/CInode.h
diff options
context:
space:
mode:
authorVarsha Rao <varao@redhat.com>2019-07-16 14:51:03 +0200
committerVarsha Rao <varao@redhat.com>2019-07-25 18:07:49 +0200
commit2ef2bf6e8255736821e459b969c6187f6f462780 (patch)
tree0b8f23fdb790628f34929e764b404b0f79fd2f20 /src/mds/CInode.h
parentMerge pull request #29117 from tchaikov/wip-mgr-set_health_checks-unicode (diff)
downloadceph-2ef2bf6e8255736821e459b969c6187f6f462780.tar.xz
ceph-2ef2bf6e8255736821e459b969c6187f6f462780.zip
mds: Reorganize class members in CInode header
Fixes: https://tracker.ceph.com/issues/40787 Signed-off-by: Varsha Rao <varao@redhat.com>
Diffstat (limited to '')
-rw-r--r--src/mds/CInode.h719
1 files changed, 346 insertions, 373 deletions
diff --git a/src/mds/CInode.h b/src/mds/CInode.h
index ae641073a9c..75ec7f1acdd 100644
--- a/src/mds/CInode.h
+++ b/src/mds/CInode.h
@@ -12,8 +12,6 @@
*
*/
-
-
#ifndef CEPH_CINODE_H
#define CEPH_CINODE_H
@@ -55,18 +53,11 @@ class Session;
struct ObjectOperation;
class EMetaBlob;
-
-ostream& operator<<(ostream& out, const CInode& in);
-
struct cinode_lock_info_t {
int lock;
int wr_caps;
};
-extern cinode_lock_info_t cinode_lock_info[];
-extern int num_cinode_locks;
-
-
/**
* Base class for CInode, containing the backing store data and
* serialization methods. This exists so that we can read and
@@ -80,14 +71,6 @@ public:
typedef mempool::mds_co::compact_map<snapid_t, mempool_old_inode> mempool_old_inode_map;
typedef xattr_map<mempool::mds_co::pool_allocator> mempool_xattr_map; // FIXME bufferptr not in mempool
- mempool_inode inode; // the inode itself
- mempool::mds_co::string symlink; // symlink dest, if symlink
- mempool_xattr_map xattrs;
- fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
- mempool_old_inode_map old_inodes; // key = last, value.first = first
- snapid_t oldest_snap = CEPH_NOSNAP;
- damage_flags_t damage_flags = 0;
-
InodeStoreBase() {}
/* Helpers */
@@ -110,13 +93,18 @@ public:
/* For use by offline tools */
__u32 hash_dentry_name(std::string_view dn);
frag_t pick_dirfrag(std::string_view dn);
+
+ mempool_inode inode; // the inode itself
+ mempool::mds_co::string symlink; // symlink dest, if symlink
+ mempool_xattr_map xattrs;
+ fragtree_t dirfragtree; // dir frag tree, if any. always consistent with our dirfrag map.
+ mempool_old_inode_map old_inodes; // key = last, value.first = first
+ snapid_t oldest_snap = CEPH_NOSNAP;
+ damage_flags_t damage_flags = 0;
};
class InodeStore : public InodeStoreBase {
public:
- // FIXME bufferlist not part of mempool
- bufferlist snap_blob; // Encoded copy of SnapRealm, because we can't
- // rehydrate it without full MDCache
void encode(bufferlist &bl, uint64_t features) const {
InodeStoreBase::encode(bl, features, &snap_blob);
}
@@ -131,6 +119,10 @@ public:
}
static void generate_test_instances(std::list<InodeStore*>& ls);
+
+ // FIXME bufferlist not part of mempool
+ bufferlist snap_blob; // Encoded copy of SnapRealm, because we can't
+ // rehydrate it without full MDCache
};
WRITE_CLASS_ENCODER_FEATURES(InodeStore)
@@ -151,6 +143,121 @@ WRITE_CLASS_ENCODER_FEATURES(InodeStoreBare)
class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CInode> {
public:
MEMPOOL_CLASS_HELPERS();
+
+ using mempool_cap_map = mempool::mds_co::map<client_t, Capability>;
+ /**
+ * @defgroup Scrubbing and fsck
+ */
+
+ /**
+ * Report the results of validation against a particular inode.
+ * Each member is a pair of bools.
+ * <member>.first represents if validation was performed against the member.
+ * <member.second represents if the member passed validation.
+ * performed_validation is set to true if the validation was actually
+ * run. It might not be run if, for instance, the inode is marked as dirty.
+ * passed_validation is set to true if everything that was checked
+ * passed its validation.
+ */
+ struct validated_data {
+ template<typename T>struct member_status {
+ bool checked = false;
+ bool passed = false;
+ bool repaired = false;
+ int ondisk_read_retval = 0;
+ T ondisk_value;
+ T memory_value;
+ std::stringstream error_str;
+ };
+
+ struct raw_stats_t {
+ frag_info_t dirstat;
+ nest_info_t rstat;
+ };
+
+ validated_data() {}
+
+ void dump(Formatter *f) const;
+
+ bool all_damage_repaired() const;
+
+ bool performed_validation = false;
+ bool passed_validation = false;
+
+ member_status<inode_backtrace_t> backtrace;
+ member_status<mempool_inode> inode; // XXX should not be in mempool; wait for pmr
+ member_status<raw_stats_t> raw_stats;
+ };
+
+ // friends
+ friend class Server;
+ friend class Locker;
+ friend class Migrator;
+ friend class MDCache;
+ friend class StrayManager;
+ friend class CDir;
+ friend class CInodeExport;
+
+ class scrub_stamp_info_t {
+ public:
+ scrub_stamp_info_t() {}
+ void reset() {
+ scrub_start_version = last_scrub_version = 0;
+ scrub_start_stamp = last_scrub_stamp = utime_t();
+ }
+ /// version we started our latest scrub (whether in-progress or finished)
+ version_t scrub_start_version = 0;
+ /// time we started our latest scrub (whether in-progress or finished)
+ utime_t scrub_start_stamp;
+ /// version we started our most recent finished scrub
+ version_t last_scrub_version = 0;
+ /// time we started our most recent finished scrub
+ utime_t last_scrub_stamp;
+ };
+
+ class scrub_info_t : public scrub_stamp_info_t {
+ public:
+ scrub_info_t() {}
+
+ CDentry *scrub_parent = nullptr;
+ MDSContext *on_finish = nullptr;
+
+ bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state?
+ bool scrub_in_progress = false; /// are we currently scrubbing?
+ bool children_scrubbed = false;
+
+ /// my own (temporary) stamps and versions for each dirfrag we have
+ std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; // XXX not part of mempool
+
+ ScrubHeaderRef header;
+ };
+
+ /**
+ * Projection methods, used to store inode changes until they have been journaled,
+ * at which point they are popped.
+ * Usage:
+ * project_inode as needed. If you're changing xattrs or sr_t, then pass true
+ * as needed then change the xattrs/snapnode member as needed. (Dirty
+ * exception: project_past_snaprealm_parent allows you to project the
+ * snapnode after doing project_inode (i.e. you don't need to pass
+ * snap=true).
+ *
+ * Then, journal. Once journaling is done, pop_and_dirty_projected_inode.
+ * This function will take care of the inode itself, the xattrs, and the snaprealm.
+ */
+
+ class projected_inode {
+ public:
+ static sr_t* const UNDEF_SRNODE;
+
+ projected_inode() = delete;
+ explicit projected_inode(const mempool_inode &in) : inode(in) {}
+
+ mempool_inode inode;
+ std::unique_ptr<mempool_xattr_map> xattrs;
+ sr_t *snapnode = UNDEF_SRNODE;
+ };
+
// -- pins --
static const int PIN_DIRFRAG = -1;
static const int PIN_CAPS = 2; // client caps
@@ -175,34 +282,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
static const int PIN_DIRWAITER = 24;
static const int PIN_SCRUBQUEUE = 25;
- std::string_view pin_name(int p) const override {
- switch (p) {
- case PIN_DIRFRAG: return "dirfrag";
- case PIN_CAPS: return "caps";
- case PIN_IMPORTING: return "importing";
- case PIN_OPENINGDIR: return "openingdir";
- case PIN_REMOTEPARENT: return "remoteparent";
- case PIN_BATCHOPENJOURNAL: return "batchopenjournal";
- case PIN_SCATTERED: return "scattered";
- case PIN_STICKYDIRS: return "stickydirs";
- //case PIN_PURGING: return "purging";
- case PIN_FREEZING: return "freezing";
- case PIN_FROZEN: return "frozen";
- case PIN_IMPORTINGCAPS: return "importingcaps";
- case PIN_EXPORTINGCAPS: return "exportingcaps";
- case PIN_PASTSNAPPARENT: return "pastsnapparent";
- case PIN_OPENINGSNAPPARENTS: return "openingsnapparents";
- case PIN_TRUNCATING: return "truncating";
- case PIN_STRAY: return "stray";
- case PIN_NEEDSNAPFLUSH: return "needsnapflush";
- case PIN_DIRTYRSTAT: return "dirtyrstat";
- case PIN_DIRTYPARENT: return "dirtyparent";
- case PIN_DIRWAITER: return "dirwaiter";
- case PIN_SCRUBQUEUE: return "scrubqueue";
- default: return generic_pin_name(p);
- }
- }
-
// -- dump flags --
static const int DUMP_INODE_STORE_BASE = (1 << 0);
static const int DUMP_MDS_CACHE_OBJECT = (1 << 1);
@@ -254,49 +333,49 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
// misc
static const unsigned EXPORT_NONCE = 1; // nonce given to replicas created by export
- ostream& print_db_line_prefix(ostream& out) override;
-
- public:
- MDCache *mdcache;
-
- SnapRealm *snaprealm = nullptr;
- SnapRealm *containing_realm = nullptr;
- snapid_t first, last;
- mempool::mds_co::compact_set<snapid_t> dirty_old_rstats;
+ // ---------------------------
+ CInode() = delete;
+ CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP);
+ ~CInode() override {
+ close_dirfrags();
+ close_snaprealm();
+ clear_file_locks();
+ ceph_assert(num_projected_xattrs == 0);
+ ceph_assert(num_projected_srnodes == 0);
+ ceph_assert(num_caps_wanted == 0);
+ ceph_assert(num_subtree_roots == 0);
+ ceph_assert(num_exporting_dirs == 0);
+ }
- class scrub_stamp_info_t {
- public:
- /// version we started our latest scrub (whether in-progress or finished)
- version_t scrub_start_version = 0;
- /// time we started our latest scrub (whether in-progress or finished)
- utime_t scrub_start_stamp;
- /// version we started our most recent finished scrub
- version_t last_scrub_version = 0;
- /// time we started our most recent finished scrub
- utime_t last_scrub_stamp;
- scrub_stamp_info_t() {}
- void reset() {
- scrub_start_version = last_scrub_version = 0;
- scrub_start_stamp = last_scrub_stamp = utime_t();
+ std::string_view pin_name(int p) const override {
+ switch (p) {
+ case PIN_DIRFRAG: return "dirfrag";
+ case PIN_CAPS: return "caps";
+ case PIN_IMPORTING: return "importing";
+ case PIN_OPENINGDIR: return "openingdir";
+ case PIN_REMOTEPARENT: return "remoteparent";
+ case PIN_BATCHOPENJOURNAL: return "batchopenjournal";
+ case PIN_SCATTERED: return "scattered";
+ case PIN_STICKYDIRS: return "stickydirs";
+ //case PIN_PURGING: return "purging";
+ case PIN_FREEZING: return "freezing";
+ case PIN_FROZEN: return "frozen";
+ case PIN_IMPORTINGCAPS: return "importingcaps";
+ case PIN_EXPORTINGCAPS: return "exportingcaps";
+ case PIN_PASTSNAPPARENT: return "pastsnapparent";
+ case PIN_OPENINGSNAPPARENTS: return "openingsnapparents";
+ case PIN_TRUNCATING: return "truncating";
+ case PIN_STRAY: return "stray";
+ case PIN_NEEDSNAPFLUSH: return "needsnapflush";
+ case PIN_DIRTYRSTAT: return "dirtyrstat";
+ case PIN_DIRTYPARENT: return "dirtyparent";
+ case PIN_DIRWAITER: return "dirwaiter";
+ case PIN_SCRUBQUEUE: return "scrubqueue";
+ default: return generic_pin_name(p);
}
- };
-
- class scrub_info_t : public scrub_stamp_info_t {
- public:
- CDentry *scrub_parent = nullptr;
- MDSContext *on_finish = nullptr;
-
- bool last_scrub_dirty = false; /// are our stamps dirty with respect to disk state?
- bool scrub_in_progress = false; /// are we currently scrubbing?
- bool children_scrubbed = false;
-
- /// my own (temporary) stamps and versions for each dirfrag we have
- std::map<frag_t, scrub_stamp_info_t> dirfrag_stamps; // XXX not part of mempool
-
- ScrubHeaderRef header;
+ }
- scrub_info_t() {}
- };
+ ostream& print_db_line_prefix(ostream& out) override;
const scrub_info_t *scrub_info() const{
if (!scrub_infop)
@@ -371,17 +450,6 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
scrub_infop->on_finish = c;
}
-private:
- /**
- * Create a scrub_info_t struct for the scrub_infop pointer.
- */
- void scrub_info_create() const;
- /**
- * Delete the scrub_info_t struct if it's not got any useful data
- */
- void scrub_maybe_delete_info();
-public:
-
bool is_multiversion() const {
return snaprealm || // other snaprealms will link to me
inode.is_dir() || // links to me in other snaps
@@ -390,55 +458,12 @@ public:
}
snapid_t get_oldest_snap();
- uint64_t last_journaled = 0; // log offset for the last time i was journaled
- //loff_t last_open_journaled; // log offset for the last journaled EOpen
- utime_t last_dirstat_prop;
-
-
- // list item node for when we have unpropagated rstat data
- elist<CInode*>::item dirty_rstat_item;
-
bool is_dirty_rstat() {
return state_test(STATE_DIRTYRSTAT);
}
void mark_dirty_rstat();
void clear_dirty_rstat();
- //bool hack_accessed = false;
- //utime_t hack_load_stamp;
-
- /**
- * Projection methods, used to store inode changes until they have been journaled,
- * at which point they are popped.
- * Usage:
- * project_inode as needed. If you're changing xattrs or sr_t, then pass true
- * as needed then change the xattrs/snapnode member as needed. (Dirty
- * exception: project_past_snaprealm_parent allows you to project the
- * snapnode after doing project_inode (i.e. you don't need to pass
- * snap=true).
- *
- * Then, journal. Once journaling is done, pop_and_dirty_projected_inode.
- * This function will take care of the inode itself, the xattrs, and the snaprealm.
- */
-
- class projected_inode {
- public:
- static sr_t* const UNDEF_SRNODE;
-
- mempool_inode inode;
- std::unique_ptr<mempool_xattr_map> xattrs;
- sr_t *snapnode = UNDEF_SRNODE;
-
- projected_inode() = delete;
- explicit projected_inode(const mempool_inode &in) : inode(in) {}
- };
-
-private:
- mempool::mds_co::list<projected_inode> projected_nodes; // projected values (only defined while dirty)
- size_t num_projected_xattrs = 0;
- size_t num_projected_srnodes = 0;
-
-public:
CInode::projected_inode &project_inode(bool xattr = false, bool snap = false);
void pop_and_dirty_projected_inode(LogSegment *ls);
@@ -527,10 +552,6 @@ public:
void project_snaprealm_past_parent(SnapRealm *newparent);
void early_pop_projected_snaprealm();
-private:
- void pop_projected_snaprealm(sr_t *next_snaprealm, bool early);
-
-public:
mempool_old_inode& cow_old_inode(snapid_t follows, bool cow_head);
void split_old_inode(snapid_t snap);
mempool_old_inode *pick_old_inode(snapid_t last);
@@ -538,18 +559,6 @@ public:
bool has_snap_data(snapid_t s);
void purge_stale_snap_data(const std::set<snapid_t>& snaps);
- // -- cache infrastructure --
-private:
- mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode
-
- //for the purpose of quickly determining whether there's a subtree root or exporting dir
- int num_subtree_roots = 0;
- int num_exporting_dirs = 0;
-
- int stickydir_ref = 0;
- scrub_info_t *scrub_infop = nullptr;
-
-public:
bool has_dirfrags() { return !dirfrags.empty(); }
CDir* get_dirfrag(frag_t fg) {
auto pi = dirfrags.find(fg);
@@ -603,131 +612,10 @@ public:
void get_stickydirs();
void put_stickydirs();
- protected:
- // parent dentries in cache
- CDentry *parent = nullptr; // primary link
- mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked
-
- mempool::mds_co::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc.
-
- mds_authority_t inode_auth = CDIR_AUTH_DEFAULT;
-
- // -- distributed state --
-protected:
- // file capabilities
- using mempool_cap_map = mempool::mds_co::map<client_t, Capability>;
- mempool_cap_map client_caps; // client -> caps
- mempool::mds_co::compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
- int replica_caps_wanted = 0; // [replica] what i've requested from auth
- int num_caps_wanted = 0;
-
-public:
- mempool::mds_co::compact_map<int, mempool::mds_co::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head
- mempool::mds_co::compact_map<snapid_t, mempool::mds_co::set<client_t> > client_need_snapflush;
-
void add_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
void remove_need_snapflush(CInode *snapin, snapid_t snapid, client_t client);
pair<bool,bool> split_need_snapflush(CInode *cowin, CInode *in);
-protected:
-
- ceph_lock_state_t *fcntl_locks = nullptr;
- ceph_lock_state_t *flock_locks = nullptr;
-
- ceph_lock_state_t *get_fcntl_lock_state() {
- if (!fcntl_locks)
- fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL);
- return fcntl_locks;
- }
- void clear_fcntl_lock_state() {
- delete fcntl_locks;
- fcntl_locks = NULL;
- }
- ceph_lock_state_t *get_flock_lock_state() {
- if (!flock_locks)
- flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK);
- return flock_locks;
- }
- void clear_flock_lock_state() {
- delete flock_locks;
- flock_locks = NULL;
- }
- void clear_file_locks() {
- clear_fcntl_lock_state();
- clear_flock_lock_state();
- }
- void _encode_file_locks(bufferlist& bl) const {
- using ceph::encode;
- bool has_fcntl_locks = fcntl_locks && !fcntl_locks->empty();
- encode(has_fcntl_locks, bl);
- if (has_fcntl_locks)
- encode(*fcntl_locks, bl);
- bool has_flock_locks = flock_locks && !flock_locks->empty();
- encode(has_flock_locks, bl);
- if (has_flock_locks)
- encode(*flock_locks, bl);
- }
- void _decode_file_locks(bufferlist::const_iterator& p) {
- using ceph::decode;
- bool has_fcntl_locks;
- decode(has_fcntl_locks, p);
- if (has_fcntl_locks)
- decode(*get_fcntl_lock_state(), p);
- else
- clear_fcntl_lock_state();
- bool has_flock_locks;
- decode(has_flock_locks, p);
- if (has_flock_locks)
- decode(*get_flock_lock_state(), p);
- else
- clear_flock_lock_state();
- }
-
- // LogSegment lists i (may) belong to
-public:
- elist<CInode*>::item item_dirty;
- elist<CInode*>::item item_caps;
- elist<CInode*>::item item_open_file;
- elist<CInode*>::item item_dirty_parent;
- elist<CInode*>::item item_dirty_dirfrag_dir;
- elist<CInode*>::item item_dirty_dirfrag_nest;
- elist<CInode*>::item item_dirty_dirfrag_dirfragtree;
- elist<CInode*>::item item_scrub;
-
- // also update RecoveryQueue::RecoveryQueue() if you change this
- elist<CInode*>::item& item_recover_queue = item_dirty_dirfrag_dir;
- elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest;
-
-public:
- int auth_pin_freeze_allowance = 0;
-
- inode_load_vec_t pop;
- elist<CInode*>::item item_pop_lru;
-
- // friends
- friend class Server;
- friend class Locker;
- friend class Migrator;
- friend class MDCache;
- friend class StrayManager;
- friend class CDir;
- friend class CInodeExport;
-
- // ---------------------------
- CInode() = delete;
- CInode(MDCache *c, bool auth=true, snapid_t f=2, snapid_t l=CEPH_NOSNAP);
- ~CInode() override {
- close_dirfrags();
- close_snaprealm();
- clear_file_locks();
- ceph_assert(num_projected_xattrs == 0);
- ceph_assert(num_projected_srnodes == 0);
- ceph_assert(num_caps_wanted == 0);
- ceph_assert(num_subtree_roots == 0);
- ceph_assert(num_exporting_dirs == 0);
- }
-
-
// -- accessors --
bool is_root() const { return inode.ino == MDS_INO_ROOT; }
bool is_stray() const { return MDS_INO_IS_STRAY(inode.ino); }
@@ -807,20 +695,11 @@ public:
void fetch(MDSContext *fin);
void _fetched(bufferlist& bl, bufferlist& bl2, Context *fin);
-
void build_backtrace(int64_t pool, inode_backtrace_t& bt);
void store_backtrace(MDSContext *fin, int op_prio=-1);
void _stored_backtrace(int r, version_t v, Context *fin);
void fetch_backtrace(Context *fin, bufferlist *backtrace);
-protected:
- /**
- * Return the pool ID where we currently write backtraces for
- * this inode (in addition to inode.old_pools)
- *
- * @returns a pool ID >=0
- */
- int64_t get_backtrace_pool() const;
-public:
+
void mark_dirty_parent(LogSegment *ls, bool dirty_pool=false);
void clear_dirty_parent();
void verify_diri_backtrace(bufferlist &bl, int err);
@@ -852,10 +731,6 @@ public:
_decode_locks_state(p, is_new);
}
- // -- waiting --
-protected:
- mempool::mds_co::compact_map<frag_t, MDSContext::vec > waiting_on_dir;
-public:
void add_dir_waiter(frag_t fg, MDSContext *c);
void take_dir_waiting(frag_t fg, MDSContext::vec& ls);
bool is_waiting_for_dir(frag_t fg) {
@@ -886,39 +761,12 @@ public:
}
void decode_import(bufferlist::const_iterator& p, LogSegment *ls);
-
// for giving to clients
int encode_inodestat(bufferlist& bl, Session *session, SnapRealm *realm,
snapid_t snapid=CEPH_NOSNAP, unsigned max_bytes=0,
int getattr_wants=0);
void encode_cap_message(const ref_t<MClientCaps> &m, Capability *cap);
-
- // -- locks --
-public:
- static LockType versionlock_type;
- static LockType authlock_type;
- static LockType linklock_type;
- static LockType dirfragtreelock_type;
- static LockType filelock_type;
- static LockType xattrlock_type;
- static LockType snaplock_type;
- static LockType nestlock_type;
- static LockType flocklock_type;
- static LockType policylock_type;
-
- // FIXME not part of mempool
- LocalLock versionlock;
- SimpleLock authlock;
- SimpleLock linklock;
- ScatterLock dirfragtreelock;
- ScatterLock filelock;
- SimpleLock xattrlock;
- SimpleLock snaplock;
- ScatterLock nestlock;
- SimpleLock flocklock;
- SimpleLock policylock;
-
SimpleLock* get_lock(int type) override {
switch (type) {
case CEPH_LOCK_IFILE: return &filelock;
@@ -957,10 +805,6 @@ public:
void encode_snap(bufferlist& bl);
void decode_snap(bufferlist::const_iterator& p);
- // -- caps -- (new)
- // client caps
- client_t loner_cap = -1, want_loner_cap = -1;
-
client_t get_loner() const { return loner_cap; }
client_t get_wanted_loner() const { return want_loner_cap; }
@@ -1102,9 +946,7 @@ public:
void last_put() override;
void _put() override;
-
// -- hierarchy stuff --
-public:
void set_primary_parent(CDentry *p) {
ceph_assert(parent == 0 ||
g_conf().get_val<bool>("mds_hack_allow_loading_invalid_metadata"));
@@ -1129,7 +971,6 @@ public:
projected_parent.pop_front();
}
-public:
void maybe_export_pin(bool update=false);
void set_export_pin(mds_rank_t rank);
mds_rank_t get_export_pin(bool inherit=true) const;
@@ -1139,51 +980,6 @@ public:
void dump(Formatter *f, int flags = DUMP_DEFAULT) const;
/**
- * @defgroup Scrubbing and fsck
- * @{
- */
-
- /**
- * Report the results of validation against a particular inode.
- * Each member is a pair of bools.
- * <member>.first represents if validation was performed against the member.
- * <member.second represents if the member passed validation.
- * performed_validation is set to true if the validation was actually
- * run. It might not be run if, for instance, the inode is marked as dirty.
- * passed_validation is set to true if everything that was checked
- * passed its validation.
- */
- struct validated_data {
- template<typename T>struct member_status {
- bool checked = false;
- bool passed = false;
- bool repaired = false;
- int ondisk_read_retval = 0;
- T ondisk_value;
- T memory_value;
- std::stringstream error_str;
- };
-
- bool performed_validation = false;
- bool passed_validation = false;
-
- struct raw_stats_t {
- frag_info_t dirstat;
- nest_info_t rstat;
- };
-
- member_status<inode_backtrace_t> backtrace;
- member_status<mempool_inode> inode; // XXX should not be in mempool; wait for pmr
- member_status<raw_stats_t> raw_stats;
-
- validated_data() {}
-
- void dump(Formatter *f) const;
-
- bool all_damage_repaired() const;
- };
-
- /**
* Validate that the on-disk state of an inode matches what
* we expect from our memory state. Currently this checks that:
* 1) The backtrace associated with the file data exists and is correct
@@ -1200,14 +996,191 @@ public:
MDSContext *fin);
static void dump_validation_results(const validated_data& results,
Formatter *f);
+
+ //bool hack_accessed = false;
+ //utime_t hack_load_stamp;
+
+ MDCache *mdcache;
+
+ SnapRealm *snaprealm = nullptr;
+ SnapRealm *containing_realm = nullptr;
+ snapid_t first, last;
+ mempool::mds_co::compact_set<snapid_t> dirty_old_rstats;
+
+ uint64_t last_journaled = 0; // log offset for the last time i was journaled
+ //loff_t last_open_journaled; // log offset for the last journaled EOpen
+ utime_t last_dirstat_prop;
+
+ // list item node for when we have unpropagated rstat data
+ elist<CInode*>::item dirty_rstat_item;
+
+ mempool::mds_co::compact_map<int, mempool::mds_co::set<client_t> > client_snap_caps; // [auth] [snap] dirty metadata we still need from the head
+ mempool::mds_co::compact_map<snapid_t, mempool::mds_co::set<client_t> > client_need_snapflush;
+
+ // LogSegment lists i (may) belong to
+ elist<CInode*>::item item_dirty;
+ elist<CInode*>::item item_caps;
+ elist<CInode*>::item item_open_file;
+ elist<CInode*>::item item_dirty_parent;
+ elist<CInode*>::item item_dirty_dirfrag_dir;
+ elist<CInode*>::item item_dirty_dirfrag_nest;
+ elist<CInode*>::item item_dirty_dirfrag_dirfragtree;
+ elist<CInode*>::item item_scrub;
+
+ // also update RecoveryQueue::RecoveryQueue() if you change this
+ elist<CInode*>::item& item_recover_queue = item_dirty_dirfrag_dir;
+ elist<CInode*>::item& item_recover_queue_front = item_dirty_dirfrag_nest;
+
+ int auth_pin_freeze_allowance = 0;
+
+ inode_load_vec_t pop;
+ elist<CInode*>::item item_pop_lru;
+
+ // -- locks --
+ static LockType versionlock_type;
+ static LockType authlock_type;
+ static LockType linklock_type;
+ static LockType dirfragtreelock_type;
+ static LockType filelock_type;
+ static LockType xattrlock_type;
+ static LockType snaplock_type;
+ static LockType nestlock_type;
+ static LockType flocklock_type;
+ static LockType policylock_type;
+
+ // FIXME not part of mempool
+ LocalLock versionlock;
+ SimpleLock authlock;
+ SimpleLock linklock;
+ ScatterLock dirfragtreelock;
+ ScatterLock filelock;
+ SimpleLock xattrlock;
+ SimpleLock snaplock;
+ ScatterLock nestlock;
+ SimpleLock flocklock;
+ SimpleLock policylock;
+
+ // -- caps -- (new)
+ // client caps
+ client_t loner_cap = -1, want_loner_cap = -1;
+
+protected:
+ ceph_lock_state_t *get_fcntl_lock_state() {
+ if (!fcntl_locks)
+ fcntl_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FCNTL);
+ return fcntl_locks;
+ }
+ void clear_fcntl_lock_state() {
+ delete fcntl_locks;
+ fcntl_locks = NULL;
+ }
+ ceph_lock_state_t *get_flock_lock_state() {
+ if (!flock_locks)
+ flock_locks = new ceph_lock_state_t(g_ceph_context, CEPH_LOCK_FLOCK);
+ return flock_locks;
+ }
+ void clear_flock_lock_state() {
+ delete flock_locks;
+ flock_locks = NULL;
+ }
+ void clear_file_locks() {
+ clear_fcntl_lock_state();
+ clear_flock_lock_state();
+ }
+ void _encode_file_locks(bufferlist& bl) const {
+ using ceph::encode;
+ bool has_fcntl_locks = fcntl_locks && !fcntl_locks->empty();
+ encode(has_fcntl_locks, bl);
+ if (has_fcntl_locks)
+ encode(*fcntl_locks, bl);
+ bool has_flock_locks = flock_locks && !flock_locks->empty();
+ encode(has_flock_locks, bl);
+ if (has_flock_locks)
+ encode(*flock_locks, bl);
+ }
+ void _decode_file_locks(bufferlist::const_iterator& p) {
+ using ceph::decode;
+ bool has_fcntl_locks;
+ decode(has_fcntl_locks, p);
+ if (has_fcntl_locks)
+ decode(*get_fcntl_lock_state(), p);
+ else
+ clear_fcntl_lock_state();
+ bool has_flock_locks;
+ decode(has_flock_locks, p);
+ if (has_flock_locks)
+ decode(*get_flock_lock_state(), p);
+ else
+ clear_flock_lock_state();
+ }
+
+ /**
+ * Return the pool ID where we currently write backtraces for
+ * this inode (in addition to inode.old_pools)
+ *
+ * @returns a pool ID >=0
+ */
+ int64_t get_backtrace_pool() const;
+
+ // parent dentries in cache
+ CDentry *parent = nullptr; // primary link
+ mempool::mds_co::compact_set<CDentry*> remote_parents; // if hard linked
+
+ mempool::mds_co::list<CDentry*> projected_parent; // for in-progress rename, (un)link, etc.
+
+ mds_authority_t inode_auth = CDIR_AUTH_DEFAULT;
+
+ // -- distributed state --
+ // file capabilities
+ mempool_cap_map client_caps; // client -> caps
+ mempool::mds_co::compact_map<int32_t, int32_t> mds_caps_wanted; // [auth] mds -> caps wanted
+ int replica_caps_wanted = 0; // [replica] what i've requested from auth
+ int num_caps_wanted = 0;
+
+ ceph_lock_state_t *fcntl_locks = nullptr;
+ ceph_lock_state_t *flock_locks = nullptr;
+
+ // -- waiting --
+ mempool::mds_co::compact_map<frag_t, MDSContext::vec > waiting_on_dir;
+
private:
+
+ friend class ValidationContinuation;
+
+ /**
+ * Create a scrub_info_t struct for the scrub_infop pointer.
+ */
+ void scrub_info_create() const;
+ /**
+ * Delete the scrub_info_t struct if it's not got any useful data
+ */
+ void scrub_maybe_delete_info();
+
+ void pop_projected_snaprealm(sr_t *next_snaprealm, bool early);
+
bool _validate_disk_state(class ValidationContinuation *c,
int rval, int stage);
- friend class ValidationContinuation;
+
+ mempool::mds_co::list<projected_inode> projected_nodes; // projected values (only defined while dirty)
+ size_t num_projected_xattrs = 0;
+ size_t num_projected_srnodes = 0;
+
+ // -- cache infrastructure --
+ mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode
+
+ //for the purpose of quickly determining whether there's a subtree root or exporting dir
+ int num_subtree_roots = 0;
+ int num_exporting_dirs = 0;
+
+ int stickydir_ref = 0;
+ scrub_info_t *scrub_infop = nullptr;
/** @} Scrubbing and fsck */
};
+ostream& operator<<(ostream& out, const CInode& in);
ostream& operator<<(ostream& out, const CInode::scrub_stamp_info_t& si);
+extern cinode_lock_info_t cinode_lock_info[];
+extern int num_cinode_locks;
#undef dout_context
#endif