diff options
-rw-r--r-- | src/TODO | 2 | ||||
-rw-r--r-- | src/client/Client.cc | 29 | ||||
-rw-r--r-- | src/client/Client.h | 20 | ||||
-rw-r--r-- | src/include/ceph_fs.h | 6 | ||||
-rw-r--r-- | src/mds/MDCache.cc | 3 | ||||
-rw-r--r-- | src/mds/Migrator.cc | 3 | ||||
-rw-r--r-- | src/mds/Server.cc | 19 | ||||
-rw-r--r-- | src/mds/snap.cc | 6 | ||||
-rw-r--r-- | src/mds/snap.h | 9 | ||||
-rw-r--r-- | src/messages/MClientFileCaps.h | 7 | ||||
-rw-r--r-- | src/messages/MClientReply.h | 8 | ||||
-rw-r--r-- | src/messages/MClientSnap.h | 2 |
12 files changed, 77 insertions, 37 deletions
@@ -227,7 +227,7 @@ todo / - SnapRealm open_parents, get_snap_set need to recursively open/examine parents over given ranges... / - realm split - adjust parent/child linkages - - make realm split notifications safe from races from multiple mds's +/ - make realm split notifications safe from races from multiple mds's - make sense of snap_highwater... - figure out how to fix up rados logging diff --git a/src/client/Client.cc b/src/client/Client.cc index 5ed633c8de0..2b1abab9798 100644 --- a/src/client/Client.cc +++ b/src/client/Client.cc @@ -1510,7 +1510,8 @@ void Client::_flushed(Inode *in, bool checkafter) * do not block. */ void Client::add_update_cap(Inode *in, int mds, - inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps, + inodeno_t realm, snapid_t snap_created, snapid_t snap_highwater, + vector<snapid_t> &snaps, unsigned issued, unsigned seq, unsigned mseq) { InodeCap *cap = 0; @@ -1532,7 +1533,7 @@ void Client::add_update_cap(Inode *in, int mds, } in->caps[mds] = cap = new InodeCap; } - maybe_update_snaprealm(in->snaprealm, snap_highwater, snaps); + maybe_update_snaprealm(in->snaprealm, snap_created, snap_highwater, snaps); unsigned old_caps = cap->issued; cap->issued |= issued; @@ -1568,9 +1569,10 @@ void Client::remove_all_caps(Inode *in) } } -void Client::maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_highwater, vector<snapid_t>& snaps) +void Client::maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, + snapid_t snap_highwater, vector<snapid_t>& snaps) { - if (realm->maybe_update(snap_highwater, snaps)) + if (realm->maybe_update(snap_created, snap_highwater, snaps)) dout(10) << *realm << " now " << snaps << " highwater " << snap_highwater << dendl; } @@ -1582,7 +1584,7 @@ void Client::handle_snap(MClientSnap *m) switch (m->op) { case CEPH_SNAP_OP_UPDATE: - maybe_update_snaprealm(realm, m->snap_highwater, m->snaps); + maybe_update_snaprealm(realm, m->snap_created, m->snap_highwater, m->snaps); break; case CEPH_SNAP_OP_SPLIT: @@ -1597,15 +1599,21 @@ void Client::handle_snap(MClientSnap *m) p++) { if (inode_map.count(*p)) { Inode *in = inode_map[*p]; - dout(10) << " moving " << *in << " from old realm " << m->split_parent << dendl; - if (in->snaprealm) + if (in->snaprealm) { + if (in->snaprealm->created > m->snap_created) { + dout(10) << " NOT moving " << *in << " from _newer_ realm " + << *in->snaprealm << dendl; + continue; + } put_snap_realm(in->snaprealm); + } + dout(10) << " moving " << *in << " from old realm " << m->split_parent << dendl; in->snaprealm = realm; realm->nref++; } } - // oh.. update it too - maybe_update_snaprealm(realm, m->snap_highwater, m->snaps); + // update it too + maybe_update_snaprealm(realm, m->snap_created, m->snap_highwater, m->snaps); } break; @@ -1645,7 +1653,7 @@ void Client::handle_file_caps(MClientFileCaps *m) if (m->get_op() == CEPH_CAP_OP_IMPORT) { // add/update it add_update_cap(in, mds, - m->get_realm(), m->get_snap_highwater(), m->get_snaps(), + m->get_snap_realm(), m->get_snap_created(), m->get_snap_highwater(), m->get_snaps(), m->get_caps(), m->get_seq(), m->get_mseq()); if (in->exporting_mseq < m->get_mseq()) { @@ -2984,6 +2992,7 @@ int Client::_open(const filepath &path, int flags, mode_t mode, Fh **fhp, int ui int mds = reply->get_source().num(); add_update_cap(in, mds, reply->get_snap_realm(), + reply->get_snap_created(), reply->get_snap_highwater(), reply->get_snaps(), reply->get_file_caps(), diff --git a/src/client/Client.h b/src/client/Client.h index b7f68697253..3390812bccb 100644 --- a/src/client/Client.h +++ b/src/client/Client.h @@ -130,15 +130,17 @@ struct InodeCap; struct SnapRealm { inodeno_t dirino; int nref; - snapid_t snap_highwater; + snapid_t created; + snapid_t highwater; vector<snapid_t> snaps; SnapRealm(inodeno_t i) : - dirino(i), nref(0), snap_highwater(0) { } + dirino(i), nref(0), created(0), highwater(0) { } - bool maybe_update(snapid_t sh, vector<snapid_t> &s) { - if (sh > snap_highwater) { - snap_highwater = sh; + bool maybe_update(snapid_t c, snapid_t sh, vector<snapid_t> &s) { + created = c; + if (sh > highwater) { + highwater = sh; snaps = s; return true; } @@ -147,7 +149,7 @@ struct SnapRealm { }; inline ostream& operator<<(ostream& out, const SnapRealm& r) { - return out << "snaprealm(" << r.dirino << " nref=" << r.nref << " hw=" << r.snap_highwater + return out << "snaprealm(" << r.dirino << " nref=" << r.nref << " c=" << r.created << " hw=" << r.highwater << " snaps=" << r.snaps << ")"; } @@ -787,12 +789,14 @@ protected: // file caps void add_update_cap(Inode *in, int mds, - inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps, + inodeno_t realm, snapid_t snap_created, snapid_t snap_highwater, + vector<snapid_t> &snaps, unsigned issued, unsigned seq, unsigned mseq); void remove_cap(Inode *in, int mds); void remove_all_caps(Inode *in); - void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_highwater, vector<snapid_t>& snaps); + void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, snapid_t snap_highwater, + vector<snapid_t>& snaps); void handle_snap(class MClientSnap *m); void handle_file_caps(class MClientFileCaps *m); diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 0dee6eb7e60..6d2fce665dd 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -672,7 +672,7 @@ struct ceph_mds_reply_head { __le32 file_caps_mseq; __le32 mdsmap_epoch; __le64 snap_realm; - __le64 snap_highwater; + __le64 snap_created, snap_highwater; __le32 num_snaps; __le64 snaps[]; } __attribute__ ((packed)); @@ -791,13 +791,13 @@ struct ceph_mds_file_caps { __le32 seq; __le32 caps, wanted; __le64 ino; - __le64 realm; __le64 size, max_size; __le32 migrate_seq; struct ceph_timespec mtime, atime, ctime; __le64 time_warp_seq; + __le64 snap_realm; + __le64 snap_created, snap_highwater; __le32 num_snaps; - __le64 snap_highwater; __le64 snaps[]; } __attribute__ ((packed)); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 0ca59264fb6..3d7c0d1304f 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2798,7 +2798,8 @@ void MDCache::rejoin_import_cap(CInode *in, int client, inode_caps_reconnect_t& cap->wanted(), cap->get_mseq()); realm->get_snap_vector(reap->get_snaps()); - reap->set_snap_highwater(realm->snap_highwater); + reap->set_snap_created(realm->created); + reap->set_snap_highwater(realm->highwater); mds->messenger->send_message(reap, session->inst); } diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 37ecc7191c3..53e8543e035 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -2062,7 +2062,8 @@ void Migrator::finish_import_inode_caps(CInode *in, int from, cap->wanted(), cap->get_mseq()); realm->get_snap_vector(caps->get_snaps()); - caps->set_snap_highwater(realm->snap_highwater); + caps->set_snap_created(realm->created); + caps->set_snap_highwater(realm->highwater); mds->send_message_client(caps, session->inst); } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 2423ae46a2e..c1837c3112e 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -4435,8 +4435,7 @@ void Server::_do_open(MDRequest *mdr, CInode *cur) SnapRealm *realm = cur->find_containing_snaprealm(); realm->get_snap_vector(reply->get_snaps()); - reply->set_snap_highwater(realm->snap_highwater); - reply->set_snap_realm(realm->inode->ino()); + reply->set_snap_info(realm->inode->ino(), realm->created, realm->highwater); dout(10) << " snaprealm is " << *realm << " snaps=" << reply->get_snaps() << " on " << *realm->inode << dendl; //reply->set_file_data_version(fdv); @@ -4663,9 +4662,21 @@ void Server::handle_client_mksnap(MDRequest *mdr) // lock snap set<SimpleLock*> rdlocks, wrlocks, xlocks; + + // rdlock path for (int i=0; i<(int)trace.size()-1; i++) rdlocks.insert(&trace[i]->lock); + + // rdlock ancestor snaps + CInode *t = diri->get_parent_dn()->get_dir()->get_inode(); + while (t) { + rdlocks.insert(&t->snaplock); + t = t->get_parent_dn()->get_dir()->get_inode(); + } + + // xlock snap xlocks.insert(&dn->inode->snaplock); + if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; @@ -4688,6 +4699,7 @@ void Server::handle_client_mksnap(MDRequest *mdr) if (!diri->snaprealm) { dout(10) << "creating snaprealm on " << *diri << dendl; diri->open_snaprealm(); + diri->snaprealm->created = snapid; // link them up // HACK! parent may be on another mds... @@ -4742,7 +4754,8 @@ void Server::handle_client_mksnap(MDRequest *mdr) MClientSnap *update = new MClientSnap(split_parent ? CEPH_SNAP_OP_SPLIT:CEPH_SNAP_OP_UPDATE, realm->inode->ino()); update->snaps = snaps; - update->snap_highwater = diri->snaprealm->snap_highwater; + update->snap_created = diri->snaprealm->created; + update->snap_highwater = diri->snaprealm->highwater; update->split_parent = split_parent; update->split_inos = split_inos; mds->send_message_client(update, p->first); diff --git a/src/mds/snap.cc b/src/mds/snap.cc index 370b1fbc940..31958397e6d 100644 --- a/src/mds/snap.cc +++ b/src/mds/snap.cc @@ -71,8 +71,8 @@ void SnapRealm::get_snap_set(set<snapid_t> &s, snapid_t first, snapid_t last) if (!s.empty()) { snapid_t t = *s.rbegin(); - if (snap_highwater < t) - snap_highwater = t; + if (highwater < t) + highwater = t; } } @@ -88,7 +88,7 @@ void SnapRealm::get_snap_vector(vector<snapid_t> &v) for (set<snapid_t>::reverse_iterator p = s.rbegin(); p != s.rend(); p++) v[i++] = *p; - dout(10) << "get_snap_vector " << v << " (highwater " << snap_highwater << ")" << dendl; + dout(10) << "get_snap_vector " << v << " (highwater " << highwater << ")" << dendl; } diff --git a/src/mds/snap.h b/src/mds/snap.h index 4050e510102..c5a376049e7 100644 --- a/src/mds/snap.h +++ b/src/mds/snap.h @@ -83,15 +83,18 @@ WRITE_CLASS_ENCODER(snaplink_t) struct SnapRealm { // realm state + snapid_t created; map<snapid_t, SnapInfo> snaps; multimap<snapid_t, snaplink_t> parents, children; // key is "last" (or NOSNAP) void encode(bufferlist& bl) const { + ::encode(created, bl); ::encode(snaps, bl); ::encode(parents, bl); ::encode(children, bl); } void decode(bufferlist::iterator& p) { + ::decode(created, p); ::decode(snaps, p); ::decode(parents, p); ::decode(children, p); @@ -101,7 +104,7 @@ struct SnapRealm { MDCache *mdcache; CInode *inode; - snapid_t snap_highwater; // largest snap this realm has exposed to clients (implicitly or explicitly) + snapid_t highwater; // largest snap this realm has exposed to clients (implicitly or explicitly) // caches? //set<snapid_t> cached_snaps; @@ -110,7 +113,9 @@ struct SnapRealm { xlist<CInode*> inodes_with_caps; // for efficient realm splits map<int, xlist<Capability*> > client_caps; // to identify clients who need snap notifications - SnapRealm(MDCache *c, CInode *in) : mdcache(c), inode(in), snap_highwater(0) {} + SnapRealm(MDCache *c, CInode *in) : + created(0), + mdcache(c), inode(in), highwater(0) {} bool open_parents(MDRequest *mdr); void get_snap_set(set<snapid_t>& s, snapid_t first=0, snapid_t last=CEPH_NOSNAP); diff --git a/src/messages/MClientFileCaps.h b/src/messages/MClientFileCaps.h index 5e75298743e..1012a15d05e 100644 --- a/src/messages/MClientFileCaps.h +++ b/src/messages/MClientFileCaps.h @@ -41,11 +41,13 @@ class MClientFileCaps : public Message { int get_wanted() { return h.wanted; } capseq_t get_seq() { return h.seq; } capseq_t get_mseq() { return h.migrate_seq; } + + inodeno_t get_snap_realm() { return inodeno_t(h.snap_realm); } + snapid_t get_snap_created() { return h.snap_created; } snapid_t get_snap_highwater() { return h.snap_highwater; } vector<snapid_t> &get_snaps() { return snaps; } inodeno_t get_ino() { return inodeno_t(h.ino); } - inodeno_t get_realm() { return inodeno_t(h.realm); } __u64 get_size() { return h.size; } __u64 get_max_size() { return h.max_size; } utime_t get_ctime() { return utime_t(h.ctime); } @@ -68,6 +70,7 @@ class MClientFileCaps : public Message { void set_mtime(const utime_t &t) { t.encode_timeval(&h.mtime); } void set_atime(const utime_t &t) { t.encode_timeval(&h.atime); } + void set_snap_created(snapid_t c) { h.snap_created = c; } void set_snap_highwater(snapid_t hw) { h.snap_highwater = hw; } MClientFileCaps() {} @@ -84,7 +87,7 @@ class MClientFileCaps : public Message { h.caps = caps; h.wanted = wanted; h.ino = inode.ino; - h.realm = realm; + h.snap_realm = realm; h.size = inode.size; h.max_size = inode.max_size; h.migrate_seq = mseq; diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h index 3f7bf6841ea..4450fcd628c 100644 --- a/src/messages/MClientReply.h +++ b/src/messages/MClientReply.h @@ -165,11 +165,15 @@ class MClientReply : public Message { int get_result() { return (__s32)(__u32)st.result; } inodeno_t get_snap_realm() { return inodeno_t((__u64)st.snap_realm); } + snapid_t get_snap_created() { return st.snap_created; } snapid_t get_snap_highwater() { return st.snap_highwater; } vector<snapid_t> &get_snaps() { return snaps; } - void set_snap_realm(snapid_t hw) { st.snap_realm = hw; } - void set_snap_highwater(snapid_t hw) { st.snap_highwater = hw; } + void set_snap_info(inodeno_t r, snapid_t c, snapid_t hw) { + st.snap_realm = r; + st.snap_created = c; + st.snap_highwater = hw; + } unsigned get_file_caps() { return st.file_caps; } unsigned get_file_caps_seq() { return st.file_caps_seq; } diff --git a/src/messages/MClientSnap.h b/src/messages/MClientSnap.h index 6490b56bbaf..bf8f8628e5a 100644 --- a/src/messages/MClientSnap.h +++ b/src/messages/MClientSnap.h @@ -31,7 +31,7 @@ struct MClientSnap : public Message { inodeno_t realm; // new snap state - snapid_t snap_highwater; + snapid_t snap_created, snap_highwater; vector<snapid_t> snaps; // (for split only) |