summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMilind Changire <mchangir@redhat.com>2023-09-13 14:00:54 +0200
committerMilind Changire <mchangir@redhat.com>2024-08-28 09:03:25 +0200
commit8970fef4ecc4dc28011bb57c5150fbdc691279d0 (patch)
tree74ac881f43e43f04c050066390909e2dfd037668
parentMerge pull request #59401 from nbalacha/wip-nbalacha-check-mirror-ns (diff)
downloadceph-8970fef4ecc4dc28011bb57c5150fbdc691279d0.tar.xz
ceph-8970fef4ecc4dc28011bb57c5150fbdc691279d0.zip
mds/scrub: move inline data to data pool object
If inline data version is not CEPH_INLINE_NONE then move data to data pool object and set inline data version to CEPH_INLINE_NONE. Fixes: https://tracker.ceph.com/issues/52916 Signed-off-by: Milind Changire <mchangir@redhat.com>
-rw-r--r--src/include/ceph_fs.h1
-rw-r--r--src/mds/CDir.cc7
-rw-r--r--src/mds/CInode.cc3
-rw-r--r--src/mds/CInode.h9
-rw-r--r--src/mds/MDCache.cc161
-rw-r--r--src/mds/MDCache.h2
-rw-r--r--src/mds/ScrubStack.cc15
-rw-r--r--src/mds/ScrubStack.h2
8 files changed, 200 insertions, 0 deletions
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h
index 57eb18b0d3e..627f4a3e85b 100644
--- a/src/include/ceph_fs.h
+++ b/src/include/ceph_fs.h
@@ -440,6 +440,7 @@ enum {
CEPH_MDS_OP_QUIESCE_PATH = 0x01508,
CEPH_MDS_OP_QUIESCE_INODE = 0x01509,
CEPH_MDS_OP_LOCK_PATH = 0x0150a,
+ CEPH_MDS_OP_UNINLINE_DATA = 0x0150b
};
#define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \
diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc
index 89e2e5e4be9..f000da7928a 100644
--- a/src/mds/CDir.cc
+++ b/src/mds/CDir.cc
@@ -3461,16 +3461,23 @@ bool CDir::can_auth_pin(int *err_ret) const
{
int err;
if (!is_auth()) {
+ dout(20) << __func__ << ": error - no auth" << dendl;
err = ERR_NOT_AUTH;
} else if (is_freezing_dir() || is_frozen_dir()) {
+ dout(20) << __func__ << ": error - fragmenting dir ("
+ << (is_freezing_dir() ? "freezing" : "frozen")
+ << ")" << dendl;
err = ERR_FRAGMENTING_DIR;
} else {
auto p = is_freezing_or_frozen_tree();
if (p.first) {
+ dout(20) << __func__ << ": error - exporting tree" << dendl;
err = ERR_EXPORTING_TREE;
} else if (p.second) {
+ dout(20) << __func__ << ": error - exporting tree" << dendl;
err = ERR_EXPORTING_TREE;
} else {
+ dout(20) << __func__ << ": auth!" << dendl;
err = 0;
}
}
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc
index c2ea2facbd0..2bb493b79fa 100644
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -2987,12 +2987,15 @@ void CInode::clear_ambiguous_auth()
bool CInode::can_auth_pin(int *err_ret) const {
int err;
if (!is_auth()) {
+ dout(20) << __func__ << ": error - no auth" << dendl;
err = ERR_NOT_AUTH;
} else if (is_freezing_inode() || is_frozen_inode() || is_frozen_auth_pin()) {
+ dout(20) << __func__ << ": error - exporting inode" << dendl;
err = ERR_EXPORTING_INODE;
} else {
if (parent)
return parent->can_auth_pin(err_ret);
+ dout(20) << __func__ << ": auth!" << dendl;
err = 0;
}
if (err && err_ret)
diff --git a/src/mds/CInode.h b/src/mds/CInode.h
index cf2322998e3..92486d0f343 100644
--- a/src/mds/CInode.h
+++ b/src/mds/CInode.h
@@ -1058,6 +1058,15 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno
MDSContext *fin);
static void dump_validation_results(const validated_data& results,
ceph::Formatter *f);
+ bool has_inline_data() {
+ if (is_normal() && is_file()) {
+ auto pin = get_projected_inode();
+ if (pin->inline_data.version != CEPH_INLINE_NONE) {
+ return true;
+ }
+ }
+ return false;
+ }
//bool hack_accessed = false;
//utime_t hack_load_stamp;
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index e9cb50c6e00..ab5f9c96105 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -9894,6 +9894,9 @@ void MDCache::dispatch_request(const MDRequestRef& mdr)
case CEPH_MDS_OP_RDLOCK_FRAGSSTATS:
rdlock_dirfrags_stats_work(mdr);
break;
+ case CEPH_MDS_OP_UNINLINE_DATA:
+ uninline_data_work(mdr);
+ break;
default:
ceph_abort();
}
@@ -13172,6 +13175,164 @@ void MDCache::enqueue_scrub_work(const MDRequestRef& mdr)
mds->server->respond_to_request(mdr, r);
}
+class C_MDC_DataUninlinedSubmitted : public MDCacheLogContext {
+ MDRequestRef mdr;
+
+ public:
+ C_MDC_DataUninlinedSubmitted(MDRequestRef r, MDSRank *mds) :
+ MDCacheLogContext(mds->mdcache), mdr(r) {}
+
+ void finish(int r) {
+ auto mds = get_mds(); // to keep dout happy
+ auto in = mds->server->rdlock_path_pin_ref(mdr, true);
+
+ if (r) {
+ dout(20) << "(uninline_data) log submission failed; r=" << r
+ << " (" << cpp_strerror(r) << ")" << dendl;
+ } else {
+ dout(20) << "(uninline_data) log submission succeeded" << dendl;
+ }
+
+ mdr->apply();
+ mds->server->respond_to_request(mdr, r);
+ }
+};
+
+struct C_IO_DataUninlined : public MDSIOContext {
+ MDRequestRef mdr;
+
+ public:
+ C_IO_DataUninlined(MDRequestRef r, MDSRank *mds) : MDSIOContext(mds), mdr(r) {}
+
+ virtual void print(std::ostream& os) const {
+ os << "data uninlined";
+ }
+
+ void finish(int r) override {
+ auto mds = get_mds(); // to keep dout/derr happy
+ auto in = mds->server->rdlock_path_pin_ref(mdr, true);
+
+ // return faster if operation has failed (non-zero) status
+ if (r) {
+ derr << "(uninline_data) mutation failed: r=" << r
+ << "(" << cpp_strerror(r) << ")" << dendl;
+ mds->server->respond_to_request(mdr, r);
+ return;
+ }
+
+ dout(20) << "(uninline_data) mutation succeeded for " << *in << dendl;
+
+ // journal the inode changes
+ MDLog *mdlog = mds->mdlog;
+
+ dout(20) << "(uninline_data) writing to journal for " << *in << dendl;
+
+ EUpdate *le = new EUpdate(mdlog, "uninline");
+ mdr->ls = mdlog->get_current_segment();
+
+ auto pi = in->project_inode(mdr);
+ pi.inode->version = in->pre_dirty();
+ pi.inode->inline_data.free_data();
+ pi.inode->inline_data.version = CEPH_INLINE_NONE;
+ pi.inode->ctime = mdr->get_op_stamp();
+ if (mdr->get_op_stamp() > pi.inode->rstat.rctime) {
+ pi.inode->rstat.rctime = mdr->get_op_stamp();
+ }
+ pi.inode->change_attr++;
+
+ in->mdcache->predirty_journal_parents(mdr, &le->metablob, in, nullptr,
+ PREDIRTY_PRIMARY);
+ in->mdcache->journal_dirty_inode(mdr.get(), &le->metablob, in);
+
+ mdr->committing = true;
+
+ string event_str("submit entry: ");
+ event_str += __func__;
+ mdr->mark_event(event_str);
+
+ auto fin = new C_MDC_DataUninlinedSubmitted(mdr, mds);
+ mdlog->submit_entry(le, fin);
+ }
+};
+
+void MDCache::uninline_data_work(MDRequestRef mdr)
+{
+ CInode *in = mds->server->rdlock_path_pin_ref(mdr, true);
+
+ if (!in) {
+ return;
+ }
+
+ MutationImpl::LockOpVec lov;
+ lov.add_xlock(&in->authlock);
+ lov.add_xlock(&in->filelock);
+ lov.add_xlock(&in->versionlock);
+
+ if (!mds->locker->acquire_locks(mdr, lov)) {
+ dout(20) << "(uninline_data) acquire_locks failed; will retry later for " << *in << dendl;
+ return; // lock not available immediately
+ }
+
+ if (!in->has_inline_data()) {
+ dout(20) << "(uninline_data) inode doesn't have inline data anymore " << *in << dendl;
+ mds->server->respond_to_request(mdr, 0);
+ return;
+ }
+
+ auto ino = [&]() { return in->ino(); };
+ auto pi = in->get_projected_inode();
+ auto objecter = mds->objecter;
+
+ dout(20) << "(uninline_data) testing inline_data.version for " << *in << dendl;
+ ceph_assert(objecter);
+ ceph_assert(pi->inline_data.version != CEPH_INLINE_NONE);
+
+ object_t oid = InodeStoreBase::get_object_name(ino(), frag_t(), "");
+ SnapContext snapc;
+ SnapRealm *snaprealm = in->find_snaprealm();
+ auto& snapc_ref = (snaprealm ? snaprealm->get_snap_context() : snapc);
+
+ ObjectOperation create_ops;
+ create_ops.create(false);
+
+ dout(20) << "(uninline_data) dispatching objecter to create \""
+ << mdr->get_filepath() << "\" for " << *in << dendl;
+
+ objecter->mutate(oid,
+ OSDMap::file_to_object_locator(pi->layout),
+ create_ops,
+ snapc_ref,
+ ceph::real_clock::now(),
+ 0,
+ nullptr);
+
+ bufferlist inline_version_bl;
+
+ in->encode(inline_version_bl, pi->inline_data.version);
+
+ ObjectOperation uninline_ops;
+ uninline_ops.cmpxattr("inline_version",
+ CEPH_OSD_CMPXATTR_OP_GT,
+ CEPH_OSD_CMPXATTR_MODE_U64,
+ inline_version_bl);
+
+ if (pi->inline_data.length() > 0) {
+ dout(10) << "(uninline_data) moving inline data for \"" << mdr->get_filepath() << "\" to file for " << *in << dendl;
+ bufferlist inline_data;
+ pi->inline_data.get_data(inline_data);
+ uninline_ops.write(0, inline_data, pi->truncate_size, pi->truncate_seq);
+ }
+ uninline_ops.setxattr("inline_version", std::to_string(CEPH_INLINE_NONE));
+
+ objecter->mutate(oid,
+ OSDMap::file_to_object_locator(pi->layout),
+ uninline_ops,
+ snapc_ref,
+ ceph::real_clock::now(),
+ 0,
+ new C_IO_DataUninlined(mdr, mds));
+}
+
struct C_MDC_RespondInternalRequest : public MDCacheLogContext {
MDRequestRef mdr;
C_MDC_RespondInternalRequest(MDCache *c, const MDRequestRef& m) :
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index 8ddadcd716a..c54d9e0cf28 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -1094,6 +1094,8 @@ private:
void repair_dirfrag_stats(CDir *dir);
void rdlock_dirfrags_stats(CInode *diri, MDSInternalContext *fin);
+ void uninline_data_work(MDRequestRef mdr);
+
// my leader
MDSRank *mds;
diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc
index 28392f53366..45188402a68 100644
--- a/src/mds/ScrubStack.cc
+++ b/src/mds/ScrubStack.cc
@@ -17,6 +17,7 @@
#include "mds/MDSRank.h"
#include "mds/MDCache.h"
#include "mds/MDSContinuation.h"
+#include "osdc/Objecter.h"
#define dout_context g_ceph_context
#define dout_subsys ceph_subsys_mds
@@ -227,6 +228,7 @@ void ScrubStack::kick_off_scrubs()
// it's a regular file, symlink, or hard link
dequeue(in); // we only touch it this once, so remove from stack
+ uninline_data(in, new C_MDSInternalNoop);
scrub_file_inode(in);
} else {
bool added_children = false;
@@ -1197,3 +1199,16 @@ void ScrubStack::handle_mds_failure(mds_rank_t mds)
if (kick)
kick_off_scrubs();
}
+
+void ScrubStack::uninline_data(CInode *in, Context *fin)
+{
+ dout(10) << "(uninline_data) starting data uninlining for " << *in << dendl;
+
+ MDRequestRef mdr = in->mdcache->request_start_internal(CEPH_MDS_OP_UNINLINE_DATA);
+ mdr->set_filepath(filepath(in->ino()));
+ mdr->snapid = CEPH_NOSNAP;
+ mdr->no_early_reply = true;
+ mdr->internal_op_finish = fin;
+
+ in->mdcache->dispatch_request(mdr);
+}
diff --git a/src/mds/ScrubStack.h b/src/mds/ScrubStack.h
index 756ebd9cb0e..5e3c810ceca 100644
--- a/src/mds/ScrubStack.h
+++ b/src/mds/ScrubStack.h
@@ -22,6 +22,7 @@
#include "ScrubHeader.h"
#include "common/LogClient.h"
+#include "common/Cond.h"
#include "include/elist.h"
#include "messages/MMDSScrub.h"
#include "messages/MMDSScrubStats.h"
@@ -267,6 +268,7 @@ private:
void handle_scrub(const cref_t<MMDSScrub> &m);
void handle_scrub_stats(const cref_t<MMDSScrubStats> &m);
+ void uninline_data(CInode *in, Context *fin);
State state = STATE_IDLE;
bool clear_stack = false;