diff options
author | Milind Changire <mchangir@redhat.com> | 2023-09-13 14:00:54 +0200 |
---|---|---|
committer | Milind Changire <mchangir@redhat.com> | 2024-08-28 09:03:25 +0200 |
commit | 8970fef4ecc4dc28011bb57c5150fbdc691279d0 (patch) | |
tree | 74ac881f43e43f04c050066390909e2dfd037668 | |
parent | Merge pull request #59401 from nbalacha/wip-nbalacha-check-mirror-ns (diff) | |
download | ceph-8970fef4ecc4dc28011bb57c5150fbdc691279d0.tar.xz ceph-8970fef4ecc4dc28011bb57c5150fbdc691279d0.zip |
mds/scrub: move inline data to data pool object
If inline data version is not CEPH_INLINE_NONE then move data to data
pool object and set inline data version to CEPH_INLINE_NONE.
Fixes: https://tracker.ceph.com/issues/52916
Signed-off-by: Milind Changire <mchangir@redhat.com>
-rw-r--r-- | src/include/ceph_fs.h | 1 | ||||
-rw-r--r-- | src/mds/CDir.cc | 7 | ||||
-rw-r--r-- | src/mds/CInode.cc | 3 | ||||
-rw-r--r-- | src/mds/CInode.h | 9 | ||||
-rw-r--r-- | src/mds/MDCache.cc | 161 | ||||
-rw-r--r-- | src/mds/MDCache.h | 2 | ||||
-rw-r--r-- | src/mds/ScrubStack.cc | 15 | ||||
-rw-r--r-- | src/mds/ScrubStack.h | 2 |
8 files changed, 200 insertions, 0 deletions
diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 57eb18b0d3e..627f4a3e85b 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -440,6 +440,7 @@ enum { CEPH_MDS_OP_QUIESCE_PATH = 0x01508, CEPH_MDS_OP_QUIESCE_INODE = 0x01509, CEPH_MDS_OP_LOCK_PATH = 0x0150a, + CEPH_MDS_OP_UNINLINE_DATA = 0x0150b }; #define IS_CEPH_MDS_OP_NEWINODE(op) (op == CEPH_MDS_OP_CREATE || \ diff --git a/src/mds/CDir.cc b/src/mds/CDir.cc index 89e2e5e4be9..f000da7928a 100644 --- a/src/mds/CDir.cc +++ b/src/mds/CDir.cc @@ -3461,16 +3461,23 @@ bool CDir::can_auth_pin(int *err_ret) const { int err; if (!is_auth()) { + dout(20) << __func__ << ": error - no auth" << dendl; err = ERR_NOT_AUTH; } else if (is_freezing_dir() || is_frozen_dir()) { + dout(20) << __func__ << ": error - fragmenting dir (" + << (is_freezing_dir() ? "freezing" : "frozen") + << ")" << dendl; err = ERR_FRAGMENTING_DIR; } else { auto p = is_freezing_or_frozen_tree(); if (p.first) { + dout(20) << __func__ << ": error - exporting tree" << dendl; err = ERR_EXPORTING_TREE; } else if (p.second) { + dout(20) << __func__ << ": error - exporting tree" << dendl; err = ERR_EXPORTING_TREE; } else { + dout(20) << __func__ << ": auth!" << dendl; err = 0; } } diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index c2ea2facbd0..2bb493b79fa 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -2987,12 +2987,15 @@ void CInode::clear_ambiguous_auth() bool CInode::can_auth_pin(int *err_ret) const { int err; if (!is_auth()) { + dout(20) << __func__ << ": error - no auth" << dendl; err = ERR_NOT_AUTH; } else if (is_freezing_inode() || is_frozen_inode() || is_frozen_auth_pin()) { + dout(20) << __func__ << ": error - exporting inode" << dendl; err = ERR_EXPORTING_INODE; } else { if (parent) return parent->can_auth_pin(err_ret); + dout(20) << __func__ << ": auth!" << dendl; err = 0; } if (err && err_ret) diff --git a/src/mds/CInode.h b/src/mds/CInode.h index cf2322998e3..92486d0f343 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -1058,6 +1058,15 @@ class CInode : public MDSCacheObject, public InodeStoreBase, public Counter<CIno MDSContext *fin); static void dump_validation_results(const validated_data& results, ceph::Formatter *f); + bool has_inline_data() { + if (is_normal() && is_file()) { + auto pin = get_projected_inode(); + if (pin->inline_data.version != CEPH_INLINE_NONE) { + return true; + } + } + return false; + } //bool hack_accessed = false; //utime_t hack_load_stamp; diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index e9cb50c6e00..ab5f9c96105 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -9894,6 +9894,9 @@ void MDCache::dispatch_request(const MDRequestRef& mdr) case CEPH_MDS_OP_RDLOCK_FRAGSSTATS: rdlock_dirfrags_stats_work(mdr); break; + case CEPH_MDS_OP_UNINLINE_DATA: + uninline_data_work(mdr); + break; default: ceph_abort(); } @@ -13172,6 +13175,164 @@ void MDCache::enqueue_scrub_work(const MDRequestRef& mdr) mds->server->respond_to_request(mdr, r); } +class C_MDC_DataUninlinedSubmitted : public MDCacheLogContext { + MDRequestRef mdr; + + public: + C_MDC_DataUninlinedSubmitted(MDRequestRef r, MDSRank *mds) : + MDCacheLogContext(mds->mdcache), mdr(r) {} + + void finish(int r) { + auto mds = get_mds(); // to keep dout happy + auto in = mds->server->rdlock_path_pin_ref(mdr, true); + + if (r) { + dout(20) << "(uninline_data) log submission failed; r=" << r + << " (" << cpp_strerror(r) << ")" << dendl; + } else { + dout(20) << "(uninline_data) log submission succeeded" << dendl; + } + + mdr->apply(); + mds->server->respond_to_request(mdr, r); + } +}; + +struct C_IO_DataUninlined : public MDSIOContext { + MDRequestRef mdr; + + public: + C_IO_DataUninlined(MDRequestRef r, MDSRank *mds) : MDSIOContext(mds), mdr(r) {} + + virtual void print(std::ostream& os) const { + os << "data uninlined"; + } + + void finish(int r) override { + auto mds = get_mds(); // to keep dout/derr happy + auto in = mds->server->rdlock_path_pin_ref(mdr, true); + + // return faster if operation has failed (non-zero) status + if (r) { + derr << "(uninline_data) mutation failed: r=" << r + << "(" << cpp_strerror(r) << ")" << dendl; + mds->server->respond_to_request(mdr, r); + return; + } + + dout(20) << "(uninline_data) mutation succeeded for " << *in << dendl; + + // journal the inode changes + MDLog *mdlog = mds->mdlog; + + dout(20) << "(uninline_data) writing to journal for " << *in << dendl; + + EUpdate *le = new EUpdate(mdlog, "uninline"); + mdr->ls = mdlog->get_current_segment(); + + auto pi = in->project_inode(mdr); + pi.inode->version = in->pre_dirty(); + pi.inode->inline_data.free_data(); + pi.inode->inline_data.version = CEPH_INLINE_NONE; + pi.inode->ctime = mdr->get_op_stamp(); + if (mdr->get_op_stamp() > pi.inode->rstat.rctime) { + pi.inode->rstat.rctime = mdr->get_op_stamp(); + } + pi.inode->change_attr++; + + in->mdcache->predirty_journal_parents(mdr, &le->metablob, in, nullptr, + PREDIRTY_PRIMARY); + in->mdcache->journal_dirty_inode(mdr.get(), &le->metablob, in); + + mdr->committing = true; + + string event_str("submit entry: "); + event_str += __func__; + mdr->mark_event(event_str); + + auto fin = new C_MDC_DataUninlinedSubmitted(mdr, mds); + mdlog->submit_entry(le, fin); + } +}; + +void MDCache::uninline_data_work(MDRequestRef mdr) +{ + CInode *in = mds->server->rdlock_path_pin_ref(mdr, true); + + if (!in) { + return; + } + + MutationImpl::LockOpVec lov; + lov.add_xlock(&in->authlock); + lov.add_xlock(&in->filelock); + lov.add_xlock(&in->versionlock); + + if (!mds->locker->acquire_locks(mdr, lov)) { + dout(20) << "(uninline_data) acquire_locks failed; will retry later for " << *in << dendl; + return; // lock not available immediately + } + + if (!in->has_inline_data()) { + dout(20) << "(uninline_data) inode doesn't have inline data anymore " << *in << dendl; + mds->server->respond_to_request(mdr, 0); + return; + } + + auto ino = [&]() { return in->ino(); }; + auto pi = in->get_projected_inode(); + auto objecter = mds->objecter; + + dout(20) << "(uninline_data) testing inline_data.version for " << *in << dendl; + ceph_assert(objecter); + ceph_assert(pi->inline_data.version != CEPH_INLINE_NONE); + + object_t oid = InodeStoreBase::get_object_name(ino(), frag_t(), ""); + SnapContext snapc; + SnapRealm *snaprealm = in->find_snaprealm(); + auto& snapc_ref = (snaprealm ? snaprealm->get_snap_context() : snapc); + + ObjectOperation create_ops; + create_ops.create(false); + + dout(20) << "(uninline_data) dispatching objecter to create \"" + << mdr->get_filepath() << "\" for " << *in << dendl; + + objecter->mutate(oid, + OSDMap::file_to_object_locator(pi->layout), + create_ops, + snapc_ref, + ceph::real_clock::now(), + 0, + nullptr); + + bufferlist inline_version_bl; + + in->encode(inline_version_bl, pi->inline_data.version); + + ObjectOperation uninline_ops; + uninline_ops.cmpxattr("inline_version", + CEPH_OSD_CMPXATTR_OP_GT, + CEPH_OSD_CMPXATTR_MODE_U64, + inline_version_bl); + + if (pi->inline_data.length() > 0) { + dout(10) << "(uninline_data) moving inline data for \"" << mdr->get_filepath() << "\" to file for " << *in << dendl; + bufferlist inline_data; + pi->inline_data.get_data(inline_data); + uninline_ops.write(0, inline_data, pi->truncate_size, pi->truncate_seq); + } + uninline_ops.setxattr("inline_version", std::to_string(CEPH_INLINE_NONE)); + + objecter->mutate(oid, + OSDMap::file_to_object_locator(pi->layout), + uninline_ops, + snapc_ref, + ceph::real_clock::now(), + 0, + new C_IO_DataUninlined(mdr, mds)); +} + struct C_MDC_RespondInternalRequest : public MDCacheLogContext { MDRequestRef mdr; C_MDC_RespondInternalRequest(MDCache *c, const MDRequestRef& m) : diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 8ddadcd716a..c54d9e0cf28 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1094,6 +1094,8 @@ private: void repair_dirfrag_stats(CDir *dir); void rdlock_dirfrags_stats(CInode *diri, MDSInternalContext *fin); + void uninline_data_work(MDRequestRef mdr); + // my leader MDSRank *mds; diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index 28392f53366..45188402a68 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -17,6 +17,7 @@ #include "mds/MDSRank.h" #include "mds/MDCache.h" #include "mds/MDSContinuation.h" +#include "osdc/Objecter.h" #define dout_context g_ceph_context #define dout_subsys ceph_subsys_mds @@ -227,6 +228,7 @@ void ScrubStack::kick_off_scrubs() // it's a regular file, symlink, or hard link dequeue(in); // we only touch it this once, so remove from stack + uninline_data(in, new C_MDSInternalNoop); scrub_file_inode(in); } else { bool added_children = false; @@ -1197,3 +1199,16 @@ void ScrubStack::handle_mds_failure(mds_rank_t mds) if (kick) kick_off_scrubs(); } + +void ScrubStack::uninline_data(CInode *in, Context *fin) +{ + dout(10) << "(uninline_data) starting data uninlining for " << *in << dendl; + + MDRequestRef mdr = in->mdcache->request_start_internal(CEPH_MDS_OP_UNINLINE_DATA); + mdr->set_filepath(filepath(in->ino())); + mdr->snapid = CEPH_NOSNAP; + mdr->no_early_reply = true; + mdr->internal_op_finish = fin; + + in->mdcache->dispatch_request(mdr); +} diff --git a/src/mds/ScrubStack.h b/src/mds/ScrubStack.h index 756ebd9cb0e..5e3c810ceca 100644 --- a/src/mds/ScrubStack.h +++ b/src/mds/ScrubStack.h @@ -22,6 +22,7 @@ #include "ScrubHeader.h" #include "common/LogClient.h" +#include "common/Cond.h" #include "include/elist.h" #include "messages/MMDSScrub.h" #include "messages/MMDSScrubStats.h" @@ -267,6 +268,7 @@ private: void handle_scrub(const cref_t<MMDSScrub> &m); void handle_scrub_stats(const cref_t<MMDSScrubStats> &m); + void uninline_data(CInode *in, Context *fin); State state = STATE_IDLE; bool clear_stack = false; |