summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/TODO2
-rw-r--r--src/client/Client.cc29
-rw-r--r--src/client/Client.h20
-rw-r--r--src/include/ceph_fs.h6
-rw-r--r--src/mds/MDCache.cc3
-rw-r--r--src/mds/Migrator.cc3
-rw-r--r--src/mds/Server.cc19
-rw-r--r--src/mds/snap.cc6
-rw-r--r--src/mds/snap.h9
-rw-r--r--src/messages/MClientFileCaps.h7
-rw-r--r--src/messages/MClientReply.h8
-rw-r--r--src/messages/MClientSnap.h2
12 files changed, 77 insertions, 37 deletions
diff --git a/src/TODO b/src/TODO
index 3d17d9a54cc..f2fdbf82ffc 100644
--- a/src/TODO
+++ b/src/TODO
@@ -227,7 +227,7 @@ todo
/ - SnapRealm open_parents, get_snap_set need to recursively open/examine parents over given ranges...
/ - realm split
- adjust parent/child linkages
- - make realm split notifications safe from races from multiple mds's
+/ - make realm split notifications safe from races from multiple mds's
- make sense of snap_highwater...
- figure out how to fix up rados logging
diff --git a/src/client/Client.cc b/src/client/Client.cc
index 5ed633c8de0..2b1abab9798 100644
--- a/src/client/Client.cc
+++ b/src/client/Client.cc
@@ -1510,7 +1510,8 @@ void Client::_flushed(Inode *in, bool checkafter)
* do not block.
*/
void Client::add_update_cap(Inode *in, int mds,
- inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
+ inodeno_t realm, snapid_t snap_created, snapid_t snap_highwater,
+ vector<snapid_t> &snaps,
unsigned issued, unsigned seq, unsigned mseq)
{
InodeCap *cap = 0;
@@ -1532,7 +1533,7 @@ void Client::add_update_cap(Inode *in, int mds,
}
in->caps[mds] = cap = new InodeCap;
}
- maybe_update_snaprealm(in->snaprealm, snap_highwater, snaps);
+ maybe_update_snaprealm(in->snaprealm, snap_created, snap_highwater, snaps);
unsigned old_caps = cap->issued;
cap->issued |= issued;
@@ -1568,9 +1569,10 @@ void Client::remove_all_caps(Inode *in)
}
}
-void Client::maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_highwater, vector<snapid_t>& snaps)
+void Client::maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created,
+ snapid_t snap_highwater, vector<snapid_t>& snaps)
{
- if (realm->maybe_update(snap_highwater, snaps))
+ if (realm->maybe_update(snap_created, snap_highwater, snaps))
dout(10) << *realm << " now " << snaps << " highwater " << snap_highwater << dendl;
}
@@ -1582,7 +1584,7 @@ void Client::handle_snap(MClientSnap *m)
switch (m->op) {
case CEPH_SNAP_OP_UPDATE:
- maybe_update_snaprealm(realm, m->snap_highwater, m->snaps);
+ maybe_update_snaprealm(realm, m->snap_created, m->snap_highwater, m->snaps);
break;
case CEPH_SNAP_OP_SPLIT:
@@ -1597,15 +1599,21 @@ void Client::handle_snap(MClientSnap *m)
p++) {
if (inode_map.count(*p)) {
Inode *in = inode_map[*p];
- dout(10) << " moving " << *in << " from old realm " << m->split_parent << dendl;
- if (in->snaprealm)
+ if (in->snaprealm) {
+ if (in->snaprealm->created > m->snap_created) {
+ dout(10) << " NOT moving " << *in << " from _newer_ realm "
+ << *in->snaprealm << dendl;
+ continue;
+ }
put_snap_realm(in->snaprealm);
+ }
+ dout(10) << " moving " << *in << " from old realm " << m->split_parent << dendl;
in->snaprealm = realm;
realm->nref++;
}
}
- // oh.. update it too
- maybe_update_snaprealm(realm, m->snap_highwater, m->snaps);
+ // update it too
+ maybe_update_snaprealm(realm, m->snap_created, m->snap_highwater, m->snaps);
}
break;
@@ -1645,7 +1653,7 @@ void Client::handle_file_caps(MClientFileCaps *m)
if (m->get_op() == CEPH_CAP_OP_IMPORT) {
// add/update it
add_update_cap(in, mds,
- m->get_realm(), m->get_snap_highwater(), m->get_snaps(),
+ m->get_snap_realm(), m->get_snap_created(), m->get_snap_highwater(), m->get_snaps(),
m->get_caps(), m->get_seq(), m->get_mseq());
if (in->exporting_mseq < m->get_mseq()) {
@@ -2984,6 +2992,7 @@ int Client::_open(const filepath &path, int flags, mode_t mode, Fh **fhp, int ui
int mds = reply->get_source().num();
add_update_cap(in, mds,
reply->get_snap_realm(),
+ reply->get_snap_created(),
reply->get_snap_highwater(),
reply->get_snaps(),
reply->get_file_caps(),
diff --git a/src/client/Client.h b/src/client/Client.h
index b7f68697253..3390812bccb 100644
--- a/src/client/Client.h
+++ b/src/client/Client.h
@@ -130,15 +130,17 @@ struct InodeCap;
struct SnapRealm {
inodeno_t dirino;
int nref;
- snapid_t snap_highwater;
+ snapid_t created;
+ snapid_t highwater;
vector<snapid_t> snaps;
SnapRealm(inodeno_t i) :
- dirino(i), nref(0), snap_highwater(0) { }
+ dirino(i), nref(0), created(0), highwater(0) { }
- bool maybe_update(snapid_t sh, vector<snapid_t> &s) {
- if (sh > snap_highwater) {
- snap_highwater = sh;
+ bool maybe_update(snapid_t c, snapid_t sh, vector<snapid_t> &s) {
+ created = c;
+ if (sh > highwater) {
+ highwater = sh;
snaps = s;
return true;
}
@@ -147,7 +149,7 @@ struct SnapRealm {
};
inline ostream& operator<<(ostream& out, const SnapRealm& r) {
- return out << "snaprealm(" << r.dirino << " nref=" << r.nref << " hw=" << r.snap_highwater
+ return out << "snaprealm(" << r.dirino << " nref=" << r.nref << " c=" << r.created << " hw=" << r.highwater
<< " snaps=" << r.snaps << ")";
}
@@ -787,12 +789,14 @@ protected:
// file caps
void add_update_cap(Inode *in, int mds,
- inodeno_t realm, snapid_t snap_highwater, vector<snapid_t> &snaps,
+ inodeno_t realm, snapid_t snap_created, snapid_t snap_highwater,
+ vector<snapid_t> &snaps,
unsigned issued, unsigned seq, unsigned mseq);
void remove_cap(Inode *in, int mds);
void remove_all_caps(Inode *in);
- void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_highwater, vector<snapid_t>& snaps);
+ void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, snapid_t snap_highwater,
+ vector<snapid_t>& snaps);
void handle_snap(class MClientSnap *m);
void handle_file_caps(class MClientFileCaps *m);
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h
index 0dee6eb7e60..6d2fce665dd 100644
--- a/src/include/ceph_fs.h
+++ b/src/include/ceph_fs.h
@@ -672,7 +672,7 @@ struct ceph_mds_reply_head {
__le32 file_caps_mseq;
__le32 mdsmap_epoch;
__le64 snap_realm;
- __le64 snap_highwater;
+ __le64 snap_created, snap_highwater;
__le32 num_snaps;
__le64 snaps[];
} __attribute__ ((packed));
@@ -791,13 +791,13 @@ struct ceph_mds_file_caps {
__le32 seq;
__le32 caps, wanted;
__le64 ino;
- __le64 realm;
__le64 size, max_size;
__le32 migrate_seq;
struct ceph_timespec mtime, atime, ctime;
__le64 time_warp_seq;
+ __le64 snap_realm;
+ __le64 snap_created, snap_highwater;
__le32 num_snaps;
- __le64 snap_highwater;
__le64 snaps[];
} __attribute__ ((packed));
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 0ca59264fb6..3d7c0d1304f 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -2798,7 +2798,8 @@ void MDCache::rejoin_import_cap(CInode *in, int client, inode_caps_reconnect_t&
cap->wanted(),
cap->get_mseq());
realm->get_snap_vector(reap->get_snaps());
- reap->set_snap_highwater(realm->snap_highwater);
+ reap->set_snap_created(realm->created);
+ reap->set_snap_highwater(realm->highwater);
mds->messenger->send_message(reap, session->inst);
}
diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc
index 37ecc7191c3..53e8543e035 100644
--- a/src/mds/Migrator.cc
+++ b/src/mds/Migrator.cc
@@ -2062,7 +2062,8 @@ void Migrator::finish_import_inode_caps(CInode *in, int from,
cap->wanted(),
cap->get_mseq());
realm->get_snap_vector(caps->get_snaps());
- caps->set_snap_highwater(realm->snap_highwater);
+ caps->set_snap_created(realm->created);
+ caps->set_snap_highwater(realm->highwater);
mds->send_message_client(caps, session->inst);
}
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 2423ae46a2e..c1837c3112e 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -4435,8 +4435,7 @@ void Server::_do_open(MDRequest *mdr, CInode *cur)
SnapRealm *realm = cur->find_containing_snaprealm();
realm->get_snap_vector(reply->get_snaps());
- reply->set_snap_highwater(realm->snap_highwater);
- reply->set_snap_realm(realm->inode->ino());
+ reply->set_snap_info(realm->inode->ino(), realm->created, realm->highwater);
dout(10) << " snaprealm is " << *realm << " snaps=" << reply->get_snaps() << " on " << *realm->inode << dendl;
//reply->set_file_data_version(fdv);
@@ -4663,9 +4662,21 @@ void Server::handle_client_mksnap(MDRequest *mdr)
// lock snap
set<SimpleLock*> rdlocks, wrlocks, xlocks;
+
+ // rdlock path
for (int i=0; i<(int)trace.size()-1; i++)
rdlocks.insert(&trace[i]->lock);
+
+ // rdlock ancestor snaps
+ CInode *t = diri->get_parent_dn()->get_dir()->get_inode();
+ while (t) {
+ rdlocks.insert(&t->snaplock);
+ t = t->get_parent_dn()->get_dir()->get_inode();
+ }
+
+ // xlock snap
xlocks.insert(&dn->inode->snaplock);
+
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
return;
@@ -4688,6 +4699,7 @@ void Server::handle_client_mksnap(MDRequest *mdr)
if (!diri->snaprealm) {
dout(10) << "creating snaprealm on " << *diri << dendl;
diri->open_snaprealm();
+ diri->snaprealm->created = snapid;
// link them up
// HACK! parent may be on another mds...
@@ -4742,7 +4754,8 @@ void Server::handle_client_mksnap(MDRequest *mdr)
MClientSnap *update = new MClientSnap(split_parent ? CEPH_SNAP_OP_SPLIT:CEPH_SNAP_OP_UPDATE,
realm->inode->ino());
update->snaps = snaps;
- update->snap_highwater = diri->snaprealm->snap_highwater;
+ update->snap_created = diri->snaprealm->created;
+ update->snap_highwater = diri->snaprealm->highwater;
update->split_parent = split_parent;
update->split_inos = split_inos;
mds->send_message_client(update, p->first);
diff --git a/src/mds/snap.cc b/src/mds/snap.cc
index 370b1fbc940..31958397e6d 100644
--- a/src/mds/snap.cc
+++ b/src/mds/snap.cc
@@ -71,8 +71,8 @@ void SnapRealm::get_snap_set(set<snapid_t> &s, snapid_t first, snapid_t last)
if (!s.empty()) {
snapid_t t = *s.rbegin();
- if (snap_highwater < t)
- snap_highwater = t;
+ if (highwater < t)
+ highwater = t;
}
}
@@ -88,7 +88,7 @@ void SnapRealm::get_snap_vector(vector<snapid_t> &v)
for (set<snapid_t>::reverse_iterator p = s.rbegin(); p != s.rend(); p++)
v[i++] = *p;
- dout(10) << "get_snap_vector " << v << " (highwater " << snap_highwater << ")" << dendl;
+ dout(10) << "get_snap_vector " << v << " (highwater " << highwater << ")" << dendl;
}
diff --git a/src/mds/snap.h b/src/mds/snap.h
index 4050e510102..c5a376049e7 100644
--- a/src/mds/snap.h
+++ b/src/mds/snap.h
@@ -83,15 +83,18 @@ WRITE_CLASS_ENCODER(snaplink_t)
struct SnapRealm {
// realm state
+ snapid_t created;
map<snapid_t, SnapInfo> snaps;
multimap<snapid_t, snaplink_t> parents, children; // key is "last" (or NOSNAP)
void encode(bufferlist& bl) const {
+ ::encode(created, bl);
::encode(snaps, bl);
::encode(parents, bl);
::encode(children, bl);
}
void decode(bufferlist::iterator& p) {
+ ::decode(created, p);
::decode(snaps, p);
::decode(parents, p);
::decode(children, p);
@@ -101,7 +104,7 @@ struct SnapRealm {
MDCache *mdcache;
CInode *inode;
- snapid_t snap_highwater; // largest snap this realm has exposed to clients (implicitly or explicitly)
+ snapid_t highwater; // largest snap this realm has exposed to clients (implicitly or explicitly)
// caches?
//set<snapid_t> cached_snaps;
@@ -110,7 +113,9 @@ struct SnapRealm {
xlist<CInode*> inodes_with_caps; // for efficient realm splits
map<int, xlist<Capability*> > client_caps; // to identify clients who need snap notifications
- SnapRealm(MDCache *c, CInode *in) : mdcache(c), inode(in), snap_highwater(0) {}
+ SnapRealm(MDCache *c, CInode *in) :
+ created(0),
+ mdcache(c), inode(in), highwater(0) {}
bool open_parents(MDRequest *mdr);
void get_snap_set(set<snapid_t>& s, snapid_t first=0, snapid_t last=CEPH_NOSNAP);
diff --git a/src/messages/MClientFileCaps.h b/src/messages/MClientFileCaps.h
index 5e75298743e..1012a15d05e 100644
--- a/src/messages/MClientFileCaps.h
+++ b/src/messages/MClientFileCaps.h
@@ -41,11 +41,13 @@ class MClientFileCaps : public Message {
int get_wanted() { return h.wanted; }
capseq_t get_seq() { return h.seq; }
capseq_t get_mseq() { return h.migrate_seq; }
+
+ inodeno_t get_snap_realm() { return inodeno_t(h.snap_realm); }
+ snapid_t get_snap_created() { return h.snap_created; }
snapid_t get_snap_highwater() { return h.snap_highwater; }
vector<snapid_t> &get_snaps() { return snaps; }
inodeno_t get_ino() { return inodeno_t(h.ino); }
- inodeno_t get_realm() { return inodeno_t(h.realm); }
__u64 get_size() { return h.size; }
__u64 get_max_size() { return h.max_size; }
utime_t get_ctime() { return utime_t(h.ctime); }
@@ -68,6 +70,7 @@ class MClientFileCaps : public Message {
void set_mtime(const utime_t &t) { t.encode_timeval(&h.mtime); }
void set_atime(const utime_t &t) { t.encode_timeval(&h.atime); }
+ void set_snap_created(snapid_t c) { h.snap_created = c; }
void set_snap_highwater(snapid_t hw) { h.snap_highwater = hw; }
MClientFileCaps() {}
@@ -84,7 +87,7 @@ class MClientFileCaps : public Message {
h.caps = caps;
h.wanted = wanted;
h.ino = inode.ino;
- h.realm = realm;
+ h.snap_realm = realm;
h.size = inode.size;
h.max_size = inode.max_size;
h.migrate_seq = mseq;
diff --git a/src/messages/MClientReply.h b/src/messages/MClientReply.h
index 3f7bf6841ea..4450fcd628c 100644
--- a/src/messages/MClientReply.h
+++ b/src/messages/MClientReply.h
@@ -165,11 +165,15 @@ class MClientReply : public Message {
int get_result() { return (__s32)(__u32)st.result; }
inodeno_t get_snap_realm() { return inodeno_t((__u64)st.snap_realm); }
+ snapid_t get_snap_created() { return st.snap_created; }
snapid_t get_snap_highwater() { return st.snap_highwater; }
vector<snapid_t> &get_snaps() { return snaps; }
- void set_snap_realm(snapid_t hw) { st.snap_realm = hw; }
- void set_snap_highwater(snapid_t hw) { st.snap_highwater = hw; }
+ void set_snap_info(inodeno_t r, snapid_t c, snapid_t hw) {
+ st.snap_realm = r;
+ st.snap_created = c;
+ st.snap_highwater = hw;
+ }
unsigned get_file_caps() { return st.file_caps; }
unsigned get_file_caps_seq() { return st.file_caps_seq; }
diff --git a/src/messages/MClientSnap.h b/src/messages/MClientSnap.h
index 6490b56bbaf..bf8f8628e5a 100644
--- a/src/messages/MClientSnap.h
+++ b/src/messages/MClientSnap.h
@@ -31,7 +31,7 @@ struct MClientSnap : public Message {
inodeno_t realm;
// new snap state
- snapid_t snap_highwater;
+ snapid_t snap_created, snap_highwater;
vector<snapid_t> snaps;
// (for split only)