diff options
author | Daniel Gryniewicz <dang@redhat.com> | 2023-08-30 20:09:16 +0200 |
---|---|---|
committer | Daniel Gryniewicz <dang@fprintf.net> | 2024-07-30 14:03:35 +0200 |
commit | 4f321056dec216a2578364468d64b8c6023c0496 (patch) | |
tree | 31bdcff2970c36309b33528a3438bf6469a8e030 /src/rgw/driver | |
parent | Merge pull request #58748 from pereman2/replayer-merge-files (diff) | |
download | ceph-4f321056dec216a2578364468d64b8c6023c0496.tar.xz ceph-4f321056dec216a2578364468d64b8c6023c0496.zip |
RGW - Add Multipart and Versioned to POSIXDriver
This is a rework of the POSIXDriver. It refactors out the actual posix
parts into a set of classes that provide access to underlying
directory/file/symlink, etc. These primatives are used to build up full
support for Bucket, Object, Multipart, and VersionedObjects.
Signed-off-by: Daniel Gryniewicz <dang@redhat.com>
Diffstat (limited to 'src/rgw/driver')
-rw-r--r-- | src/rgw/driver/posix/bucket_cache.h | 92 | ||||
-rw-r--r-- | src/rgw/driver/posix/rgw_sal_posix.cc | 3177 | ||||
-rw-r--r-- | src/rgw/driver/posix/rgw_sal_posix.h | 448 |
3 files changed, 2639 insertions, 1078 deletions
diff --git a/src/rgw/driver/posix/bucket_cache.h b/src/rgw/driver/posix/bucket_cache.h index 3cbca7c58de..cbfe619b994 100644 --- a/src/rgw/driver/posix/bucket_cache.h +++ b/src/rgw/driver/posix/bucket_cache.h @@ -29,6 +29,7 @@ #include "fmt/format.h" +#define dout_subsys ceph_subsys_rgw namespace file::listing { namespace bi = boost::intrusive; @@ -281,7 +282,7 @@ public: typedef std::tuple<BucketCacheEntry<D, B>*, uint32_t> GetBucketResult; - GetBucketResult get_bucket(const std::string& name, uint32_t flags) + GetBucketResult get_bucket(const DoutPrefixProvider* dpp, const std::string& name, uint32_t flags) { /* this fn returns a bucket locked appropriately, having atomically * found or inserted the required BucketCacheEntry in_avl*/ @@ -309,10 +310,11 @@ public: } else { /* BucketCacheEntry not in cache */ if (! (flags & BucketCache<D, B>::FLAG_CREATE)) { - /* the caller does not want to instantiate a new cache + /* the caller does not want to instantiate a new cache * entry (i.e., only wants to notify on an existing one) */ - return result; - } + lat.lock->unlock(); + return result; + } /* we need to create it */ b = static_cast<BucketCacheEntry<D, B>*>( lru.insert(&fac, cohort::lru::Edge::MRU, iflags)); @@ -399,7 +401,7 @@ public: int rc __attribute__((unused)) = 0; GetBucketResult gbr = - get_bucket(sal_bucket->get_name(), + get_bucket(dpp, sal_bucket->get_name(), BucketCache<D, B>::FLAG_LOCK | BucketCache<D, B>::FLAG_CREATE); auto [b /* BucketCacheEntry */, flags] = gbr; if (b /* XXX again, can this fail? */) { @@ -450,6 +452,10 @@ public: } else { /* position at start of index */ auto rc = cursor.get(key, data, MDB_FIRST); + if (rc == MDB_NOTFOUND) { + /* no initial key */ + return 0; + } if (rc == MDB_SUCCESS) { proc_result(); } @@ -472,12 +478,12 @@ public: using namespace LMDBSafe; int rc{0}; - GetBucketResult gbr = get_bucket(bname, BucketCache<D, B>::FLAG_LOCK); + GetBucketResult gbr = get_bucket(nullptr, bname, BucketCache<D, B>::FLAG_LOCK); auto [b /* BucketCacheEntry */, flags] = gbr; if (b) { unique_lock ulk{b->mtx, std::adopt_lock}; if ((b->name != bname) || - (b != opaque) || + (opaque && (b != opaque)) || (! (b->flags & BucketCacheEntry<D, B>::FLAG_FILLED))) { /* do nothing */ return 0; @@ -544,6 +550,78 @@ public: return rc; } /* notify */ + int add_entry(const DoutPrefixProvider* dpp, std::string bname, rgw_bucket_dir_entry bde) { + using namespace LMDBSafe; + + GetBucketResult gbr = get_bucket(dpp, bname, BucketCache<D, B>::FLAG_LOCK); + auto [b /* BucketCacheEntry */, flags] = gbr; + if (b) { + unique_lock ulk{b->mtx, std::adopt_lock}; + ulk.unlock(); + + auto txn = b->env->getRWTransaction(); + auto concat_k = concat_key(bde.key); + std::string ser_data; + zpp::bits::out out(ser_data); + struct timespec ts { + ceph::real_clock::to_timespec(bde.meta.mtime) + }; + auto errc = + out(bde.key.name, bde.key.instance, /* XXX bde.key.ns, */ + bde.ver.pool, bde.ver.epoch, bde.exists, bde.meta.category, + bde.meta.size, ts.tv_sec, ts.tv_nsec, bde.meta.owner, + bde.meta.owner_display_name, bde.meta.accounted_size, + bde.meta.storage_class, bde.meta.appendable, bde.meta.etag); + if (errc.code != std::errc{0}) { + abort(); + } + txn->put(b->dbi, concat_k, ser_data); + + txn->commit(); + lru.unref(b, cohort::lru::FLAG_NONE); + } /* b */ + + return 0; + } /* add_entry */ + + int remove_entry(const DoutPrefixProvider* dpp, std::string bname, cls_rgw_obj_key key) { + using namespace LMDBSafe; + + GetBucketResult gbr = get_bucket(dpp, bname, BucketCache<D, B>::FLAG_LOCK); + auto [b /* BucketCacheEntry */, flags] = gbr; + if (b) { + unique_lock ulk{b->mtx, std::adopt_lock}; + ulk.unlock(); + + auto txn = b->env->getRWTransaction(); + auto concat_k = concat_key(key); + txn->del(b->dbi, concat_k); + txn->commit(); + + lru.unref(b, cohort::lru::FLAG_NONE); + } /* b */ + + return 0; + } /* remove_entry */ + + int invalidate_bucket(const DoutPrefixProvider* dpp, std::string bname) { + using namespace LMDBSafe; + + GetBucketResult gbr = get_bucket(dpp, bname, BucketCache<D, B>::FLAG_LOCK); + auto [b /* BucketCacheEntry */, flags] = gbr; + if (b) { + unique_lock ulk{b->mtx, std::adopt_lock}; + + auto txn = b->env->getRWTransaction(); + mdb_drop(*txn, b->dbi, 0); + txn->commit(); + b->flags &= ~BucketCacheEntry<D, B>::FLAG_FILLED; + + ulk.unlock(); + } /* b */ + + return 0; + } /* invalidate_bucket */ }; /* BucketCache */ } // namespace file::listing diff --git a/src/rgw/driver/posix/rgw_sal_posix.cc b/src/rgw/driver/posix/rgw_sal_posix.cc index 145a5789653..8ffe4a0d0ca 100644 --- a/src/rgw/driver/posix/rgw_sal_posix.cc +++ b/src/rgw/driver/posix/rgw_sal_posix.cc @@ -19,7 +19,6 @@ #include <sys/xattr.h> #include <unistd.h> #include "rgw_multi.h" -#include "rgw_acl_s3.h" #include "include/scope_guard.h" #define dout_subsys ceph_subsys_rgw @@ -27,37 +26,149 @@ namespace rgw { namespace sal { -const int64_t READ_SIZE = 8 * 1024; +const int64_t READ_SIZE = 128 * 1024; const std::string ATTR_PREFIX = "user.X-RGW-"; #define RGW_POSIX_ATTR_BUCKET_INFO "POSIX-Bucket-Info" #define RGW_POSIX_ATTR_MPUPLOAD "POSIX-Multipart-Upload" #define RGW_POSIX_ATTR_OWNER "POSIX-Owner" +#define RGW_POSIX_ATTR_OBJECT_TYPE "POSIX-Object-Type" const std::string mp_ns = "multipart"; const std::string MP_OBJ_PART_PFX = "part-"; const std::string MP_OBJ_HEAD_NAME = MP_OBJ_PART_PFX + "00000"; +struct POSIXOwner { + rgw_user user; + std::string display_name; + + POSIXOwner() {} + + POSIXOwner(const rgw_user& _u, const std::string& _n) : + user(_u), + display_name(_n) + {} + + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + encode(user, bl); + encode(display_name, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &bl) { + DECODE_START(1, bl); + decode(user, bl); + decode(display_name, bl); + DECODE_FINISH(bl); + } + friend inline std::ostream &operator<<(std::ostream &out, + const POSIXOwner &o) { + out << o.user << ":" << o.display_name; + return out; + } +}; +WRITE_CLASS_ENCODER(POSIXOwner); + +std::string get_key_fname(rgw_obj_key& key, bool use_version) +{ + std::string oid; + if (use_version) { + oid = key.get_oid(); + } else { + oid = key.get_index_key_name(); + } + std::string fname = url_encode(oid, true); + + if (!key.get_ns().empty()) { + /* Namespaced objects are hidden */ + fname.insert(0, 1, '.'); + } + + return fname; +} + +static inline std::string gen_rand_instance_name() +{ + enum { OBJ_INSTANCE_LEN = 32 }; + char buf[OBJ_INSTANCE_LEN + 1]; + +#if 0 + gen_rand_alphanumeric_no_underscore(driver->ctx(), buf, OBJ_INSTANCE_LEN); +#else + static uint64_t last_id = UINT64_MAX; + snprintf(buf, OBJ_INSTANCE_LEN, "%lx", last_id); + last_id--; +#endif + + return buf; +} + +static inline std::string bucket_fname(std::string name, std::optional<std::string>& ns) +{ + std::string bname; + + if (ns) + bname = "." + *ns + "_" + url_encode(name, true); + else + bname = url_encode(name, true); + + return bname; +} + +static inline bool get_attr(Attrs& attrs, const char* name, bufferlist& bl) +{ + auto iter = attrs.find(name); + if (iter == attrs.end()) { + return false; + } + + bl = iter->second; + return true; +} + +template <typename F> +static bool decode_attr(Attrs &attrs, const char *name, F &f) { + bufferlist bl; + if (!get_attr(attrs, name, bl)) { + return false; + } + try { + auto bufit = bl.cbegin(); + decode(f, bufit); + } catch (buffer::error &err) { + return false; + } + + return true; +} + static inline rgw_obj_key decode_obj_key(const char* fname) { std::string dname, oname, ns; dname = url_decode(fname); - rgw_obj_key::parse_index_key(dname, &oname, &ns); - rgw_obj_key key(oname, std::string(), ns); + rgw_obj_key key; + rgw_obj_key::parse_raw_oid(dname, &key); return key; } -static inline ceph::real_time from_statx_timestamp(const struct statx_timestamp& xts) +static inline rgw_obj_key decode_obj_key(const std::string& fname) { - struct timespec ts{xts.tv_sec, xts.tv_nsec}; - return ceph::real_clock::from_timespec(ts); + return decode_obj_key(fname.c_str()); } -static inline void bucket_statx_save(struct statx& stx, RGWBucketEnt& ent, ceph::real_time& mtime) +int decode_owner(Attrs& attrs, POSIXOwner& owner) { - mtime = ceph::real_clock::from_time_t(stx.stx_mtime.tv_sec); - ent.creation_time = ceph::real_clock::from_time_t(stx.stx_btime.tv_sec); - // TODO Calculate size of bucket (or save it somewhere?) - //ent.size = stx.stx_size; - //ent.size_rounded = stx.stx_blocks * 512; + bufferlist bl; + if (!decode_attr(attrs, RGW_POSIX_ATTR_OWNER, owner)) { + return -EINVAL; + } + + return 0; +} + +static inline ceph::real_time from_statx_timestamp(const struct statx_timestamp& xts) +{ + struct timespec ts{xts.tv_sec, xts.tv_nsec}; + return ceph::real_clock::from_timespec(ts); } static inline int copy_dir_fd(int old_fd) @@ -152,6 +263,23 @@ static int write_x_attr(const DoutPrefixProvider* dpp, optional_yield y, int fd, return 0; } +static int remove_x_attr(const DoutPrefixProvider *dpp, optional_yield y, + int fd, const std::string &key, + const std::string &display) +{ + int ret; + std::string attrname{ATTR_PREFIX + key}; + + ret = fremovexattr(fd, attrname.c_str()); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not remove attribute " << attrname << " for " << display << ": " << cpp_strerror(ret) << dendl; + return -ret; + } + + return 0; +} + static int delete_directory(int parent_fd, const char* dname, bool delete_children, const DoutPrefixProvider* dpp) { @@ -235,6 +363,1496 @@ static int delete_directory(int parent_fd, const char* dname, bool delete_childr return 0; } +int FSEnt::stat(const DoutPrefixProvider* dpp, bool force) +{ + if (force) { + stat_done = false; + } + + if (stat_done) { + return 0; + } + + int ret = statx(parent->get_fd(), fname.c_str(), AT_SYMLINK_NOFOLLOW, + STATX_ALL, &stx); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not stat " << get_name() << ": " + << cpp_strerror(ret) << dendl; + exist = false; + return -ret; + } + + exist = true; + stat_done = true; + return 0; +} + +int FSEnt::write_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs, Attrs* extra_attrs) +{ + int ret = open(dpp); + if (ret < 0) { + return ret; + } + + /* Set the type */ + bufferlist type_bl; + ObjectType type{get_type()}; + type.encode(type_bl); + attrs[RGW_POSIX_ATTR_OBJECT_TYPE] = type_bl; + + if (extra_attrs) { + for (auto &it : *extra_attrs) { + ret = write_x_attr(dpp, y, fd, it.first, it.second, get_name()); + if (ret < 0) { + return ret; + } + } + } + + for (auto& it : attrs) { + ret = write_x_attr(dpp, y, fd, it.first, it.second, get_name()); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +int FSEnt::read_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs) +{ + int ret = open(dpp); + if (ret < 0) { + return ret; + } + + return get_x_attrs(y, dpp, get_fd(), attrs, get_name()); +} + +int FSEnt::fill_cache(const DoutPrefixProvider *dpp, optional_yield y, fill_cache_cb_t& cb) +{ + rgw_bucket_dir_entry bde{}; + + rgw_obj_key key = decode_obj_key(get_name()); + if (parent->get_type() == ObjectType::MULTIPART) { + key.ns = mp_ns; + } + key.get_index_key(&bde.key); + bde.ver.pool = 1; + bde.ver.epoch = 1; + + switch (parent->get_type().type) { + case ObjectType::VERSIONED: + bde.flags = rgw_bucket_dir_entry::FLAG_VER; + bde.exists = true; + if (!key.have_instance()) { + bde.flags |= rgw_bucket_dir_entry::FLAG_CURRENT; + } + break; + case ObjectType::MULTIPART: + case ObjectType::DIRECTORY: + bde.exists = true; + break; + case ObjectType::UNKNOWN: + case ObjectType::FILE: + case ObjectType::SYMLINK: + return -EINVAL; + } + + Attrs attrs; + int ret = open(dpp); + if (ret < 0) + return ret; + + ret = get_x_attrs(y, dpp, get_fd(), attrs, get_name()); + if (ret < 0) + return ret; + + POSIXOwner o; + ret = decode_owner(attrs, o); + if (ret < 0) { + bde.meta.owner = "unknown"; + bde.meta.owner_display_name = "unknown"; + } else { + bde.meta.owner = o.user.to_str(); + bde.meta.owner_display_name = o.display_name; + } + bde.meta.category = RGWObjCategory::Main; + bde.meta.size = stx.stx_size; + bde.meta.accounted_size = stx.stx_size; + bde.meta.mtime = from_statx_timestamp(stx.stx_mtime); + bde.meta.storage_class = RGW_STORAGE_CLASS_STANDARD; + bde.meta.appendable = true; + bufferlist etag_bl; + if (rgw::sal::get_attr(attrs, RGW_ATTR_ETAG, etag_bl)) { + bde.meta.etag = etag_bl.to_str(); + } + + return cb(dpp, bde); +} + +int File::create(const DoutPrefixProvider *dpp, bool* existed, bool temp_file) +{ + int flags, ret; + std::string path; + if(temp_file) { + flags = O_TMPFILE | O_RDWR; + path = "."; + } else { + flags = O_CREAT | O_RDWR; + path = get_name(); + } + + ret = openat(parent->get_fd(), path.c_str(), flags | O_NOFOLLOW, S_IRWXU); + if (ret < 0) { + ret = errno; + if (ret == EEXIST) { + return 0; + } + ldpp_dout(dpp, 0) << "ERROR: could not open object " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + fd = ret; + + return 0; +} + +int File::open(const DoutPrefixProvider* dpp) +{ + if (fd >= 0) { + return 0; + } + + int ret = openat(parent->get_fd(), fname.c_str(), O_RDWR, S_IRWXU); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not open object " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + fd = ret; + + return 0; +} + +int File::close() +{ + if (fd < 0) { + return 0; + } + + int ret = ::fsync(fd); + if(ret < 0) { + return ret; + } + + ret = ::close(fd); + if(ret < 0) { + return ret; + } + fd = -1; + + return 0; +} + + +int File::stat(const DoutPrefixProvider* dpp, bool force) +{ + int ret = FSEnt::stat(dpp, force); + if (ret < 0) { + return ret; + } + + if (!S_ISREG(stx.stx_mode)) { + /* Not a file */ + ldpp_dout(dpp, 0) << "ERROR: " << get_name() << " is not a file" << dendl; + return -EINVAL; + } + + return 0; +} + +int File::write(int64_t ofs, bufferlist& bl, const DoutPrefixProvider* dpp, + optional_yield y) +{ + int64_t left = bl.length(); + char* curp = bl.c_str(); + ssize_t ret; + + ret = fchmod(fd, S_IRUSR|S_IWUSR); + if(ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not change permissions on object " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return ret; + } + + + ret = lseek(fd, ofs, SEEK_SET); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not seek object " << get_name() << " to " + << ofs << " :" << cpp_strerror(ret) << dendl; + return -ret; + } + + while (left > 0) { + ret = ::write(fd, curp, left); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not write object " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + curp += ret; + left -= ret; + } + + return 0; +} + +int File::read(int64_t ofs, int64_t left, bufferlist& bl, + const DoutPrefixProvider* dpp, optional_yield y) +{ + int64_t len = std::min(left, READ_SIZE); + ssize_t ret; + + ret = lseek(fd, ofs, SEEK_SET); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not seek object " << get_name() << " to " + << ofs << " :" << cpp_strerror(ret) << dendl; + return -ret; + } + + char read_buf[READ_SIZE]; + ret = ::read(fd, read_buf, len); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not read object " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + bl.append(read_buf, ret); + + return ret; +} + +int File::copy(const DoutPrefixProvider *dpp, optional_yield y, + Directory* dst_dir, const std::string& dst_name) +{ + off64_t scount = 0, dcount = 0; + + int ret = stat(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not stat source file " << get_name() + << dendl; + return ret; + } + + ret = open(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not open source file " << get_name() + << dendl; + return ret; + } + + // Delete the target + { + std::unique_ptr<FSEnt> del; + ret = dst_dir->get_ent(dpp, y, dst_name, std::string(), del); + if (ret >= 0) { + ret = del->remove(dpp, y, /*delete_children=*/true); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not remove dest " << dst_name + << dendl; + return ret; + } + } + } + + std::unique_ptr<File> dest = clone(); + dest->parent = dst_dir; + dest->fname = dst_name; + + ret = dest->create(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not create dest file " + << dest->get_name() << dendl; + return ret; + } + ret = dest->open(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not open dest file " + << dest->get_name() << dendl; + return ret; + } + + ret = copy_file_range(fd, &scount, dest->get_fd(), &dcount, get_size(), 0); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not copy object " << dest->get_name() + << ": " << cpp_strerror(ret) << dendl; + return -ret; + } + + return 0; +} + +int File::remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) +{ + if (!exists()) { + return 0; + } + + int ret = unlinkat(parent->get_fd(), fname.c_str(), 0); + if (ret < 0) { + ret = errno; + if (errno != ENOENT) { + ldpp_dout(dpp, 0) << "ERROR: could not remove object " << get_name() + << ": " << cpp_strerror(ret) << dendl; + return -ret; + } + } + + return 0; +} + +int File::link_temp_file(const DoutPrefixProvider *dpp, optional_yield y, std::string temp_fname) +{ + if (fd < 0) { + return 0; + } + + char temp_file_path[PATH_MAX]; + // Only works on Linux - Non-portable + snprintf(temp_file_path, PATH_MAX, "/proc/self/fd/%d", fd); + + int ret = linkat(AT_FDCWD, temp_file_path, parent->get_fd(), temp_fname.c_str(), AT_SYMLINK_FOLLOW); + if(ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: linkat for temp file could not finish: " + << cpp_strerror(ret) << dendl; + return -ret; + } + + ret = renameat(parent->get_fd(), temp_fname.c_str(), parent->get_fd(), get_name().c_str()); + if(ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: renameat for object could not finish: " + << cpp_strerror(ret) << dendl; + return -ret; + } + + return 0; +} + +bool Directory::file_exists(std::string& name) +{ + struct statx nstx; + int ret = statx(fd, name.c_str(), AT_SYMLINK_NOFOLLOW, STATX_ALL, &nstx); + + return (ret >= 0); +} + +int Directory::create(const DoutPrefixProvider* dpp, bool* existed, bool temp_file) +{ + if (temp_file) { + ldpp_dout(dpp, 0) << "ERROR: cannot create directory with temp_file " << get_name() << dendl; + return -EINVAL; + } + + int ret = mkdirat(parent->get_fd(), fname.c_str(), S_IRWXU); + if (ret < 0) { + ret = errno; + if (ret != EEXIST) { + if (dpp) + ldpp_dout(dpp, 0) << "ERROR: could not create bucket " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } else if (existed != nullptr) { + *existed = true; + } + } + + return 0; +} + +int Directory::open(const DoutPrefixProvider* dpp) +{ + if (fd >= 0) { + return 0; + } + + int pfd{AT_FDCWD}; + if (parent) + pfd = parent->get_fd(); + + int ret = openat(pfd, fname.c_str(), O_RDONLY | O_DIRECTORY | O_NOFOLLOW); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not open dir " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + fd = ret; + + return 0; +} + +int Directory::close() +{ + if (fd < 0) { + return 0; + } + + ::close(fd); + fd = -1; + + return 0; +} + +int Directory::stat(const DoutPrefixProvider* dpp, bool force) +{ + int ret = FSEnt::stat(dpp, force); + if (ret < 0) { + return ret; + } + + if (!S_ISDIR(stx.stx_mode)) { + /* Not a directory */ + ldpp_dout(dpp, 0) << "ERROR: " << get_name() << " is not a directory" << dendl; + return -EINVAL; + } + + return 0; +} + +int Directory::remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) +{ + return delete_directory(parent->get_fd(), fname.c_str(), delete_children, dpp); +} + +int Directory::write(int64_t ofs, bufferlist& bl, const DoutPrefixProvider* dpp, + optional_yield y) +{ + return -EINVAL; +} + +int Directory::read(int64_t ofs, int64_t left, bufferlist &bl, + const DoutPrefixProvider *dpp, optional_yield y) +{ + return -EINVAL; +} + +int Directory::link_temp_file(const DoutPrefixProvider *dpp, optional_yield y, + std::string temp_fname) +{ + return -EINVAL; +} + +template <typename F> +int Directory::for_each(const DoutPrefixProvider* dpp, const F& func) +{ + DIR* dir; + struct dirent* entry; + int ret; + + ret = open(dpp); + if (ret < 0) { + return ret; + } + + dir = fdopendir(fd); + if (dir == NULL) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not open dir " << get_name() << " for listing: " + << cpp_strerror(ret) << dendl; + return -ret; + } + + rewinddir(dir); + + ret = 0; + while ((entry = readdir(dir)) != NULL) { + std::string_view vname(entry->d_name); + + if (vname == "." || vname == "..") + continue; + + int r = func(entry->d_name); + if (r < 0) { + ret = r; + break; + } + } + + if (ret == -EAGAIN) { + /* Limit reached */ + ret = 0; + } + return ret; +} + +int Directory::rename(const DoutPrefixProvider* dpp, optional_yield y, Directory* dst_dir, std::string dst_name) +{ + int flags = 0; + int ret; + std::string src_name = fname; + int parent_fd = parent->get_fd(); + + if (dst_dir->file_exists(dst_name)) { + flags = RENAME_EXCHANGE; + } + // swap + ret = renameat2(parent_fd, src_name.c_str(), dst_dir->get_fd(), dst_name.c_str(), flags); + if(ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: renameat2 for shadow object could not finish: " + << cpp_strerror(ret) << dendl; + return -ret; + } + + /* Parent of this dir is now dest dir */ + parent = dst_dir; + /* Name has changed */ + fname = dst_name; + + // Delete old one (could be file or directory) + struct statx stx; + ret = statx(parent_fd, src_name.c_str(), AT_SYMLINK_NOFOLLOW, + STATX_ALL, &stx); + if (ret < 0) { + ret = errno; + if (ret == ENOENT) { + return 0; + } + ldpp_dout(dpp, 0) << "ERROR: could not stat object " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + if (S_ISREG(stx.stx_mode)) { + ret = unlinkat(parent_fd, src_name.c_str(), 0); + } else if (S_ISDIR(stx.stx_mode)) { + ret = delete_directory(parent_fd, src_name.c_str(), true, dpp); + } + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not remove old file " << get_name() + << ": " << cpp_strerror(ret) << dendl; + return -ret; + } + + return 0; +} + +int Directory::copy(const DoutPrefixProvider *dpp, optional_yield y, + Directory* dst_dir, const std::string& dst_name) +{ + int ret; + + // Delete the target + { + std::unique_ptr<FSEnt> del; + ret = dst_dir->get_ent(dpp, y, dst_name, std::string(), del); + if (ret >= 0) { + ret = del->remove(dpp, y, /*delete_children=*/true); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not remove dest " << dst_name + << dendl; + return ret; + } + } + } + + ret = dst_dir->open(dpp); + std::unique_ptr<Directory> dest = clone_dir(); + dest->parent = dst_dir; + dest->fname = dst_name; + + ret = dest->create(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not create dest " << dest->get_name() << dendl; + return ret; + } + + Attrs attrs; + ret = read_attrs(dpp, y, attrs); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not read attrs from " << get_name() << dendl; + return ret; + } + ret = dest->write_attrs(dpp, y, attrs, nullptr); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not write attrs to " << dest->get_name() << dendl; + return ret; + } + + ret = for_each(dpp, [this, &dest, &dpp, &y](const char* name) { + std::unique_ptr<FSEnt> sobj; + + if (name[0] == '.') { + /* Skip dotfiles */ + return 0; + } + + int r = this->get_ent(dpp, y, name, std::string(), sobj); + if (r < 0) + return r; + return sobj->copy(dpp, y, dest.get(), name); + }); + + return ret; +} + +int Directory::get_ent(const DoutPrefixProvider *dpp, optional_yield y, const std::string &name, const std::string& instance, std::unique_ptr<FSEnt>& ent) +{ + struct statx nstx; + std::unique_ptr<FSEnt> nent; + + int ret = open(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not open directory " << name << dendl; + return ret; + } + + ret = statx(get_fd(), name.c_str(), + AT_SYMLINK_NOFOLLOW, STATX_ALL, &nstx); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not stat object " << name << " in dir " + << get_name() << " : " << cpp_strerror(ret) << dendl; + return -ret; + } + if (S_ISREG(nstx.stx_mode)) { + nent = std::make_unique<File>(name, this, nstx, ctx); + } else if (S_ISDIR(nstx.stx_mode)) { + ObjectType type{ObjectType::MULTIPART}; + int tmpfd; + Attrs attrs; + + tmpfd = openat(get_fd(), name.c_str(), O_RDONLY | O_DIRECTORY | O_NOFOLLOW); + if (tmpfd > 0) { + ret = get_x_attrs(y, dpp, tmpfd, attrs, name); + if (ret >= 0) { + decode_attr(attrs, RGW_POSIX_ATTR_OBJECT_TYPE, type); + } + } + switch (type.type) { + case ObjectType::VERSIONED: + nent = std::make_unique<VersionedDirectory>(name, this, instance, nstx, ctx); + break; + case ObjectType::MULTIPART: + nent = std::make_unique<MPDirectory>(name, this, nstx, ctx); + break; + case ObjectType::DIRECTORY: + nent = std::make_unique<Directory>(name, this, nstx, ctx); + break; + default: + ldpp_dout(dpp, 0) << "ERROR: invalid type " << type << dendl; + return -EINVAL; + } + } else if (S_ISLNK(nstx.stx_mode)) { + nent = std::make_unique<Symlink>(name, this, nstx, ctx); + } else { + return -EINVAL; + } + + ent.swap(nent); + return 0; +} + +int Directory::fill_cache(const DoutPrefixProvider *dpp, optional_yield y, + fill_cache_cb_t &cb) +{ + int ret = for_each(dpp, [this, &cb, &dpp, &y](const char *name) { + std::unique_ptr<FSEnt> ent; + + if (name[0] == '.') { + /* Skip dotfiles */ + return 0; + } + + int ret = get_ent(dpp, y, name, std::string(), ent); + if (ret < 0) + return ret; + + ent->stat(dpp); // Stat the object to get the type + + ret = ent->fill_cache(dpp, y, cb); + if (ret < 0) + return ret; + return 0; + }); + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not list directory " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return ret; + } + + return 0; +} + +int Symlink::create(const DoutPrefixProvider* dpp, bool* existed, bool temp_file) +{ + if (temp_file) { + ldpp_dout(dpp, 0) << "ERROR: cannot create symlink with temp_file " << get_name() << dendl; + return -EINVAL; + } + + int ret = symlinkat(target->get_name().c_str(), parent->get_fd(), fname.c_str()); + if (ret < 0) { + ret = errno; + if (ret == EEXIST && existed != nullptr) { + *existed = true; + } + ldpp_dout(dpp, 0) << "ERROR: could not create bucket " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + return 0; +} + +int Symlink::fill_target(const DoutPrefixProvider *dpp, Directory* parent, std::string sname, std::string tname, std::unique_ptr<FSEnt>& ent, CephContext* _ctx) +{ + int ret; + + if (!tname.empty()) { + ret = parent->get_ent(dpp, null_yield, tname, std::string(), ent); + if (ret < 0) { + ent = std::make_unique<File>(tname, parent, _ctx); + } + return 0; + } + + char link[PATH_MAX]; + memset(link, 0, sizeof(link)); + ret = readlinkat(parent->get_fd(), sname.c_str(), link, sizeof(link)); + if (ret < 0) { + ret = errno; + return -ret; + } + ret = parent->get_ent(dpp, null_yield, link, std::string(), ent); + if (ret < 0) { + ent = std::make_unique<File>(link, parent, _ctx); + } + return 0; +} + +int Symlink::stat(const DoutPrefixProvider* dpp, bool force) +{ + int ret = FSEnt::stat(dpp, force); + if (ret < 0) { + return ret; + } + + if (!S_ISLNK(stx.stx_mode)) { + /* Not a symlink */ + ldpp_dout(dpp, 0) << "ERROR: " << get_name() << " is not a symlink" << dendl; + return -EINVAL; + } + + struct statx sstx; + ret = statx(parent->get_fd(), fname.c_str(), 0, STATX_BASIC_STATS, &sstx); + if (ret >= 0) { + stx.stx_size = sstx.stx_size; + } + + exist = true; + return fill_target(dpp, parent, get_name(), std::string(), target, ctx); +} + +int Symlink::fill_cache(const DoutPrefixProvider *dpp, optional_yield y, fill_cache_cb_t& cb) +{ + rgw_bucket_dir_entry bde{}; + int ret; + + rgw_obj_key key = decode_obj_key(get_name()); + key.get_index_key(&bde.key); + bde.ver.pool = 1; + bde.ver.epoch = 1; + + bde.flags = rgw_bucket_dir_entry::FLAG_VER; + bde.exists = true; + bde.flags |= rgw_bucket_dir_entry::FLAG_CURRENT; + + if (!target) { + ret = stat(dpp, /*force=*/false); + if (ret < 0) + return ret; + } + + Attrs attrs; + ret = target->read_attrs(dpp, y, attrs); + if (ret < 0) + return ret; + + POSIXOwner o; + ret = decode_owner(attrs, o); + if (ret < 0) { + bde.meta.owner = "unknown"; + bde.meta.owner_display_name = "unknown"; + } else { + bde.meta.owner = o.user.to_str(); + bde.meta.owner_display_name = o.display_name; + } + bde.meta.category = RGWObjCategory::Main; + bde.meta.size = stx.stx_size; + bde.meta.accounted_size = stx.stx_size; + bde.meta.mtime = from_statx_timestamp(stx.stx_mtime); + bde.meta.storage_class = RGW_STORAGE_CLASS_STANDARD; + bde.meta.appendable = true; + bufferlist etag_bl; + if (rgw::sal::get_attr(attrs, RGW_ATTR_ETAG, etag_bl)) { + bde.meta.etag = etag_bl.to_str(); + } + + return cb(dpp, bde); +} + +int Symlink::read_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs) +{ + if (target) + return target->read_attrs(dpp, y, attrs); + + return FSEnt::read_attrs(dpp, y, attrs); +} + +int Symlink::copy(const DoutPrefixProvider *dpp, optional_yield y, + Directory* dst_dir, const std::string& dst_name) +{ + int ret = stat(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not stat source file " << get_name() + << dendl; + return ret; + } + rgw_obj_key skey = decode_obj_key(target->get_name()); + rgw_obj_key dkey = decode_obj_key(dst_name); + dkey.instance = skey.instance; + std::string tgtname = get_key_fname(dkey, /*use_version=*/true); + + ret = symlinkat(tgtname.c_str(), dst_dir->get_fd(), dst_name.c_str()); + + return 0; +} + +int MPDirectory::create(const DoutPrefixProvider* dpp, bool* existed, bool temp_file) +{ + std::string path; + + if(temp_file) { + tmpname = path = "._tmpname_" + + std::to_string(ceph::util::generate_random_number<uint64_t>()); + } else { + path = get_name(); + } + + int ret = mkdirat(parent->get_fd(), path.c_str(), S_IRWXU); + if (ret < 0) { + ret = errno; + if (ret != EEXIST) { + if (dpp) + ldpp_dout(dpp, 0) << "ERROR: could not create bucket " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } else if (existed != nullptr) { + *existed = true; + } + } + + return 0; +} + +int MPDirectory::read(int64_t ofs, int64_t left, bufferlist &bl, + const DoutPrefixProvider *dpp, optional_yield y) +{ + std::string pname; + for (auto part : parts) { + if (ofs < part.second) { + pname = part.first; + break; + } + + ofs -= part.second; + } + + if (pname.empty()) { + // ofs is past the end + return 0; + } + + if (!cur_read_part || cur_read_part->get_name() != pname) { + cur_read_part = std::make_unique<File>(pname, this, ctx); + } + int ret = cur_read_part->open(dpp); + if (ret < 0) { + return ret; + } + + return cur_read_part->read(ofs, left, bl, dpp, y); +} + +int MPDirectory::link_temp_file(const DoutPrefixProvider *dpp, optional_yield y, + std::string temp_fname) +{ + if (tmpname.empty()) { + return 0; + } + + /* Temporarily change name to tmpname, so we can reuse rename() */ + std::string savename = fname; + fname = tmpname; + tmpname.clear(); + + return rename(dpp, y, parent, savename); +} + +int MPDirectory::remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) +{ + return Directory::remove(dpp, y, /*delete_children=*/true); +} + +int MPDirectory::stat(const DoutPrefixProvider* dpp, bool force) +{ + int ret = Directory::stat(dpp, force); + if (ret < 0) { + return ret; + } + + uint64_t total_size{0}; + for_each(dpp, [this, &total_size, &dpp](const char *name) { + int ret; + struct statx stx; + std::string sname = name; + + if (sname.rfind(MP_OBJ_PART_PFX, 0) != 0) { + /* Skip non-parts */ + return 0; + } + + ret = statx(fd, name, AT_SYMLINK_NOFOLLOW, STATX_ALL, &stx); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not stat object " << name << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + if (!S_ISREG(stx.stx_mode)) { + /* Skip non-files */ + return 0; + } + + parts[name] = stx.stx_size; + total_size += stx.stx_size; + return 0; + }); + + stx.stx_size = total_size; + + return 0; +} + + +std::unique_ptr<File> MPDirectory::get_part_file(int partnum) +{ + std::string partname = MP_OBJ_PART_PFX + fmt::format("{:0>5}", partnum); + rgw_obj_key part_key(partname); + + return std::make_unique<File>(partname, this, ctx); +} + +int MPDirectory::fill_cache(const DoutPrefixProvider *dpp, optional_yield y, + fill_cache_cb_t &cb) +{ + int ret = FSEnt::fill_cache(dpp, y, cb); + if (ret < 0) + return ret; + + return Directory::fill_cache(dpp, y, cb); +} + +int VersionedDirectory::open(const DoutPrefixProvider* dpp) +{ + if (fd > 0) { + return 0; + } + int ret = Directory::open(dpp); + if (ret < 0) { + return 0; + } + + if (!instance_id.empty()) { + rgw_obj_key key = decode_obj_key(get_name()); + key.instance = instance_id; + get_ent(dpp, null_yield, get_key_fname(key, /*use_version=*/true), std::string(), cur_version); + } + + if (!cur_version) { + /* Can't open File, probably doesn't exist yet */ + return 0; + } + + return cur_version->open(dpp); +} + +int VersionedDirectory::create(const DoutPrefixProvider* dpp, bool* existed, bool temp_file) +{ + int ret = mkdirat(parent->get_fd(), fname.c_str(), S_IRWXU); + if (ret < 0) { + ret = errno; + if (ret != EEXIST) { + if (dpp) + ldpp_dout(dpp, 0) << "ERROR: could not create versioned directory " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + } + + ret = open(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not open versioned directory " << get_name() + << dendl; + return ret; + } + + /* Need type attribute written */ + Attrs attrs; + ret = write_attrs(dpp, null_yield, attrs, nullptr); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not write attrs for versioned directory " << get_name() + << dendl; + return ret; + } + + if (temp_file) { + /* Want to create an actual versioned object */ + rgw_obj_key key = decode_obj_key(get_name()); + key.instance = instance_id; + std::unique_ptr<FSEnt> file = + std::make_unique<File>(get_key_fname(key, /*use_version=*/true), this, ctx); + ret = add_file(dpp, std::move(file), existed, temp_file); + if (ret < 0) { + return ret; + } + } + + return 0; +} + +std::string VersionedDirectory::get_new_instance() +{ + return gen_rand_instance_name(); +} + +int VersionedDirectory::add_file(const DoutPrefixProvider* dpp, std::unique_ptr<FSEnt>&& file, bool* existed, bool temp_file) +{ + int ret = open(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not open versioned directory " << get_name() + << dendl; + return ret; + } + + ret = file->create(dpp, existed, temp_file); + if (ret < 0) { + return ret; + } + + if (!temp_file) { + return set_cur_version_ent(dpp, file.get()); + } + + cur_version = std::move(file); + return 0; +} + +int VersionedDirectory::set_cur_version_ent(const DoutPrefixProvider* dpp, FSEnt* file) +{ + /* Delete current version symlink */ + std::unique_ptr<FSEnt> del; + int ret = get_ent(dpp, null_yield, get_name(), std::string(), del); + if (ret >= 0) { + ret = del->remove(dpp, null_yield, /*delete_children=*/true); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not remove cur_version " << get_name() + << dendl; + return ret; + } + } + + /* Create new current version symlink */ + std::unique_ptr<Symlink> sl = + std::make_unique<Symlink>(get_name(), this, file->get_name(), ctx); + ret = sl->create(dpp, /*existed=*/nullptr, /*temp_file=*/false); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not create cur_version symlink " + << get_name() << dendl; + return ret; + } + + return 0; +} + +int VersionedDirectory::stat(const DoutPrefixProvider* dpp, bool force) +{ + int ret = Directory::stat(dpp, force); + if (ret < 0) { + return ret; + } + + ret = open(dpp); + if (ret < 0) + return ret; + + if (cur_version) { + /* Already have a File for the current version, use it */ + ret = cur_version->stat(dpp); + if (ret < 0) + return ret; + stx.stx_size = cur_version->get_stx().stx_size; + + return 0; + } + + /* Try to read the symlink */ + std::unique_ptr<Symlink> sl = std::make_unique<Symlink>(get_name(), this, ctx); + ret = sl->stat(dpp); + if (ret < 0) { + if (ret == -ENOENT) + return 0; + return ret; + } + + if (!sl->exists()) { + stx.stx_size = 0; + return 0; + } + + cur_version = sl->get_target()->clone_base(); + ret = cur_version->open(dpp); + if (ret < 0) { + /* If target doesn't exist, it's a delete marker */ + cur_version.reset(); + stx.stx_size = 0; + return 0; + } + ret = cur_version->stat(dpp); + if (ret < 0) + return ret; + stx.stx_size = cur_version->get_stx().stx_size; + + return 0; +} + +int VersionedDirectory::read_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs) +{ + if (!cur_version) + return FSEnt::read_attrs(dpp, y, attrs); + + int ret = cur_version->read_attrs(dpp, y, attrs); + if (ret < 0) { + return ret; + } + + /* Override type, it should be VERSIONED */ + bufferlist type_bl; + ObjectType type{get_type()}; + type.encode(type_bl); + attrs[RGW_POSIX_ATTR_OBJECT_TYPE] = type_bl; + + return 0; +} + +int VersionedDirectory::write_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs, Attrs* extra_attrs) +{ + if (cur_version) { + int ret = cur_version->write_attrs(dpp, y, attrs, extra_attrs); + if (ret < 0) + return ret; + } + + return FSEnt::write_attrs(dpp, y, attrs, extra_attrs); +} + +int VersionedDirectory::write(int64_t ofs, bufferlist &bl, + const DoutPrefixProvider *dpp, optional_yield y) +{ + if (!cur_version) + return 0; + return cur_version->write(ofs, bl, dpp, y); +} + +int VersionedDirectory::read(int64_t ofs, int64_t left, bufferlist &bl, + const DoutPrefixProvider *dpp, optional_yield y) +{ + if (!cur_version) + return 0; + return cur_version->read(ofs, left, bl, dpp, y); +} + +int VersionedDirectory::link_temp_file(const DoutPrefixProvider *dpp, optional_yield y, + std::string temp_fname) +{ + if (!cur_version) + return -EINVAL; + int ret = cur_version->link_temp_file(dpp, y, temp_fname); + if (ret < 0) + return ret; + + return set_cur_version_ent(dpp, cur_version.get()); +} + +int VersionedDirectory::copy(const DoutPrefixProvider *dpp, optional_yield y, + Directory* dst_dir, const std::string& dst_name) +{ + int ret; + rgw_obj_key dest_key = decode_obj_key(dst_name); + std::string basename = get_key_fname(dest_key, /*use_version=*/false); + + // Delete the target + { + std::unique_ptr<FSEnt> del; + ret = dst_dir->get_ent(dpp, y, basename, std::string(), del); + if (ret >= 0) { + ret = del->remove(dpp, y, /*delete_children=*/true); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not remove dest " << basename + << dendl; + return ret; + } + } + } + + ret = dst_dir->open(dpp); + std::unique_ptr<VersionedDirectory> dest = clone(); + dest->parent = dst_dir; + dest->fname = basename; + + ret = dest->create(dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not create dest " << dest->get_name() << dendl; + return ret; + } + + Attrs attrs; + ret = read_attrs(dpp, y, attrs); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not read attrs from " << get_name() << dendl; + return ret; + } + ret = dest->write_attrs(dpp, y, attrs, nullptr); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not write attrs to " << dest->get_name() << dendl; + return ret; + } + + std::string tgtname; + ret = for_each(dpp, [this, &dest, &dest_key, &tgtname, &dpp, &y](const char* name) { + std::unique_ptr<FSEnt> sobj; + + if (name[0] == '.') { + /* Skip dotfiles */ + return 0; + } + rgw_obj_key key = decode_obj_key(name); + if (!dest_key.instance.empty() && dest_key.instance != key.instance) { + /* Were asked to copy a single version, and this is not it */ + return 0; + } + + int r = this->get_ent(dpp, y, name, std::string(), sobj); + if (r < 0) + return r; + key.name = dest_key.name; + tgtname = get_key_fname(key, /*use_version=*/true); + return sobj->copy(dpp, y, dest.get(), tgtname); + }); + + if (!dest_key.instance.empty()) { + /* We didn't copy the symlink, make a new one */ + std::unique_ptr<Symlink> sl = std::make_unique<Symlink>(basename, dest.get(), tgtname, ctx); + ret = sl->create(dpp, /*existed=*/nullptr, /*temp_file=*/false); + } + + return ret; +} + +int VersionedDirectory::remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) +{ + std::string tgtname; + bool newlink = false; + + int ret = open(dpp); + if (ret < 0) + return ret; + + if (instance_id.empty()) { + /* Check if directory is empty */ + ret = for_each(dpp, [](const char *n) { + return -ENOENT; + }); + + if (ret == 0) { + /* We're empty, nuke us */ + return Directory::remove(dpp, y, /*delete_children=*/true); + } + + /* Add a delete marker */ + rgw_obj_key key = decode_obj_key(get_name()); + key.instance = gen_rand_instance_name(); + tgtname = get_key_fname(key, /*use_version=*/true); + newlink = true; + ret = remove_symlink(dpp, y); + if (ret < 0) { + return ret; + } + } else { + /* Delete specific version */ + rgw_obj_key key = decode_obj_key(get_name()); + key.instance = instance_id; + std::string name = get_key_fname(key, /*use_version=*/true); + + std::unique_ptr<FSEnt> f; + ret = get_ent(dpp, y, name, std::string(), f); + if (ret == 0) { + ret = f->stat(dpp); + if (ret < 0) + return ret; + ret = f->remove(dpp, y, /*delete_children=*/true); + if (ret < 0) + return ret; + } else if (ret == -ENOENT) { + /* See if we're removing a delete marker */ + std::unique_ptr<Symlink> sl = + std::make_unique<Symlink>(get_name(), this, ctx); + ret = sl->stat(dpp); + if (ret == 0) { + if (name != sl->get_target()->get_name()) { + /* Symlink didn't match, don't change anything */ + return 0; + } + } + /* FALLTHROUGH */ + } else { + return ret; + } + + /* Possibly move symlink */ + ret = remove_symlink(dpp, y, name); + if (ret < 0) { + if (ret == -ENOKEY) { + return 0; + } + return ret; + } + newlink = true; + /* Create new current version symlink */ + ret = for_each(dpp, [&tgtname](const char *n) { + if (n[0] == '.') { + /* Skip dotfiles */ + return 0; + } + + tgtname = n; + return 0; + }); + + if (tgtname.empty()) { + /* We're empty, nuke us */ + exist = false; + return Directory::remove(dpp, y, /*delete_children=*/true); + } + } + + if (newlink) { + exist = true; + std::unique_ptr<Symlink> sl = + std::make_unique<Symlink>(get_name(), this, tgtname, ctx); + return sl->create(dpp, /*existed=*/nullptr, /*temp_file=*/false); + } + return 0; +} + +int VersionedDirectory::fill_cache(const DoutPrefixProvider *dpp, optional_yield y, + fill_cache_cb_t &cb) +{ + int ret = for_each(dpp, [this, &cb, &dpp, &y](const char *name) { + std::unique_ptr<FSEnt> ent; + + if (name[0] == '.') { + /* Skip dotfiles */ + return 0; + } + + int ret = get_ent(dpp, y, name, std::string(), ent); + if (ret < 0) + return ret; + + ent->stat(dpp); // Stat the object to get the type + + ret = ent->fill_cache(dpp, y, cb); + if (ret < 0) + return ret; + return 0; + }); + + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not list directory " << get_name() << ": " + << cpp_strerror(ret) << dendl; + return ret; + } + + return 0; +} + +std::string VersionedDirectory::get_cur_version() +{ + if (!cur_version) + return ""; + + rgw_obj_key key = decode_obj_key(cur_version->get_name()); + + return key.instance; +} + +int VersionedDirectory::remove_symlink(const DoutPrefixProvider *dpp, optional_yield y, std::string match) +{ + int ret; + + std::unique_ptr<Symlink> sl = + std::make_unique<Symlink>(get_name(), this, ctx); + ret = sl->stat(dpp); + if (ret < 0) { + /* Doesn't exist, nothing to do */ + if (ret == -ENOENT) + return 0; + return ret; + } + + if (!match.empty()) { + if (match != sl->get_target()->get_name()) + return -ENOKEY; + } + + ret = sl->remove(dpp, y, /*delete_children=*/false); + if (ret < 0) { + return ret; + } + + return 0; +} + int POSIXDriver::initialize(CephContext *cct, const DoutPrefixProvider *dpp) { FilterDriver::initialize(cct, dpp); @@ -253,30 +1871,23 @@ int POSIXDriver::initialize(CephContext *cct, const DoutPrefixProvider *dpp) g_conf().get_val<int64_t>("rgw_posix_cache_partitions"), g_conf().get_val<int64_t>("rgw_posix_cache_lmdb_count"))); - root_fd = openat(-1, base_path.c_str(), O_RDONLY | O_DIRECTORY | O_NOFOLLOW); - if (root_fd == -1) { - int err = errno; - if (err == ENOTDIR) { + root_dir = std::make_unique<Directory>(base_path, nullptr, ctx()); + int ret = root_dir->open(dpp); + if (ret < 0) { + if (ret == -ENOTDIR) { ldpp_dout(dpp, 0) << " ERROR: base path (" << base_path << "): was not a directory." << dendl; - return -err; - } else if (err == ENOENT) { - err = mkdir(base_path.c_str(), S_IRWXU); - if (err < 0) { - err = errno; + return ret; + } else if (ret == -ENOENT) { + ret = root_dir->create(dpp); + if (ret < 0) { ldpp_dout(dpp, 0) << " ERROR: could not create base path (" - << base_path << "): " << cpp_strerror(err) << dendl; - return -err; + << base_path << "): " << cpp_strerror(-ret) << dendl; + return ret; } - root_fd = ::open(base_path.c_str(), O_RDONLY | O_DIRECTORY | O_NOFOLLOW); } } - if (root_fd == -1) { - int err = errno; - ldpp_dout(dpp, 0) << " ERROR: could not open base path (" - << base_path << "): " << cpp_strerror(err) << dendl; - return -err; - } + ldpp_dout(dpp, 20) << "root_fd: " << root_dir->get_fd() << dendl; ldpp_dout(dpp, 20) << "SUCCESS" << dendl; return 0; @@ -338,14 +1949,14 @@ std::unique_ptr<Object> POSIXDriver::get_object(const rgw_obj_key& k) int POSIXDriver::load_bucket(const DoutPrefixProvider* dpp, const rgw_bucket& b, std::unique_ptr<Bucket>* bucket, optional_yield y) { - *bucket = std::make_unique<POSIXBucket>(this, root_fd, b); + *bucket = std::make_unique<POSIXBucket>(this, root_dir.get(), b); return (*bucket)->load_bucket(dpp, y); } std::unique_ptr<Bucket> POSIXDriver::get_bucket(const RGWBucketInfo& i) { /* Don't need to fetch the bucket info, use the provided one */ - return std::make_unique<POSIXBucket>(this, root_fd, i); + return std::make_unique<POSIXBucket>(this, root_dir.get(), i); } std::string POSIXDriver::zone_unique_trans_id(const uint64_t unique_num) @@ -420,18 +2031,6 @@ std::unique_ptr<Notification> POSIXDriver::get_notification( _user_id, _user_tenant, _req_id, y); } -int POSIXDriver::close() -{ - if (root_fd < 0) { - return 0; - } - - ::close(root_fd); - root_fd = -1; - - return 0; -} - // TODO: marker and other params int POSIXDriver::list_buckets(const DoutPrefixProvider* dpp, const rgw_owner& owner, const std::string& tenant, const std::string& marker, @@ -542,7 +2141,7 @@ int POSIXBucket::create(const DoutPrefixProvider* dpp, info.quota = *params.quota; } - int ret = set_attrs(attrs); + int ret = set_attrs(params.attrs); if (ret < 0) { return ret; } @@ -587,34 +2186,9 @@ std::unique_ptr<Object> POSIXBucket::get_object(const rgw_obj_key& k) return std::make_unique<POSIXObject>(driver, k, this); } -int POSIXObject::fill_bde(const DoutPrefixProvider *dpp, optional_yield y, rgw_bucket_dir_entry& bde) +int POSIXObject::fill_cache(const DoutPrefixProvider *dpp, optional_yield y, fill_cache_cb_t& cb) { - std::unique_ptr<User> owner; - (void)get_owner(dpp, y, &owner); - - get_key().get_index_key(&bde.key); - bde.ver.pool = 1; - bde.ver.epoch = 1; - bde.exists = true; - bde.meta.category = RGWObjCategory::Main; - bde.meta.size = get_size(); - bde.meta.mtime = get_mtime(); - if (owner) { - bde.meta.owner = owner->get_id().to_str(); - bde.meta.owner_display_name = owner->get_display_name(); - } else { - bde.meta.owner = "unknown"; - bde.meta.owner_display_name = "unknown"; - } - bde.meta.accounted_size = get_size(); - bde.meta.storage_class = RGW_STORAGE_CLASS_STANDARD; - bde.meta.appendable = true; - bufferlist etag_bl; - if (get_attr(RGW_ATTR_ETAG, etag_bl)) { - bde.meta.etag = etag_bl.to_str(); - } - - return 0; + return ent->fill_cache(dpp, y, cb); } int POSIXDriver::mint_listing_entry(const std::string &bname, @@ -629,7 +2203,7 @@ int POSIXDriver::mint_listing_entry(const std::string &bname, if (ret < 0) return ret; - obj = b->get_object(decode_obj_key(bde.key.name.c_str())); + obj = b->get_object(decode_obj_key(bde.key.name)); pobj = static_cast<POSIXObject *>(obj.get()); if (!pobj->check_exists(nullptr)) { @@ -641,90 +2215,60 @@ int POSIXDriver::mint_listing_entry(const std::string &bname, if (ret < 0) return ret; - ret = pobj->fill_bde(nullptr, null_yield, bde); - if (ret < 0) - return ret; + ret = pobj->fill_cache(nullptr, null_yield, + [&bde](const DoutPrefixProvider *dpp, rgw_bucket_dir_entry &nbde) -> int { + bde = nbde; + return 0; + }); - return 0; + return ret; } int POSIXBucket::fill_cache(const DoutPrefixProvider* dpp, optional_yield y, - fill_cache_cb_t cb) + fill_cache_cb_t& cb) { - int ret = for_each(dpp, [this, &cb, &dpp, &y](const char* name) { - int ret; - std::unique_ptr<Object> obj; - POSIXObject* pobj; - - if (name[0] == '.') { - /* Skip dotfiles */ - return 0; - } - - obj = get_object(decode_obj_key(name)); - pobj = static_cast<POSIXObject*>(obj.get()); - - if (!pobj->check_exists(dpp)) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not stat object " << name << ": " - << cpp_strerror(ret) << dendl; - return -ret; - } - - ret = pobj->get_obj_attrs(y, dpp); - if (ret < 0) - return ret; - - rgw_bucket_dir_entry bde{}; - ret = pobj->fill_bde(dpp, y, bde); - if (ret < 0) - return ret; - - cb(dpp, bde); - - return 0; - }); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not list bucket " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return ret; - } - - return 0; + return dir->fill_cache(dpp, y, cb); } -// TODO marker and other params int POSIXBucket::list(const DoutPrefixProvider* dpp, ListParams& params, int max, ListResults& results, optional_yield y) { int count{0}; bool in_prefix{false}; // Names in the cache are in OID format + rgw_obj_key marker_key(params.marker); + params.marker = marker_key.get_oid(); { - rgw_obj_key key(params.marker); - params.marker = key.get_oid(); - key.set(params.prefix); - params.prefix = key.get_oid(); - } - // Names are url_encoded, so encode prefix and delimiter - // Names seem to not be url_encoded in cache - //params.prefix = url_encode(params.prefix); - //params.delim = url_encode(params.delim); + rgw_obj_key key(params.prefix); + params.prefix = key.name; + } if (max <= 0) { return 0; } + //params.list_versions int ret = driver->get_bucket_cache()->list_bucket( dpp, y, this, params.marker.name, [&](const rgw_bucket_dir_entry& bde) -> bool { std::string ns; // bde.key can be encoded with the namespace. Decode it here - if (!params.marker.empty() && params.marker == bde.key.name) { + rgw_obj_key bde_key{bde.key}; + if (!params.list_versions && !bde.is_visible()) { + return true; + } + if (params.list_versions && versioned() && bde_key.instance.empty()) { + return true; + } + if (bde_key.ns != params.ns) { + // Namespace must match + return true; + } + if (!marker_key.empty() && marker_key == bde_key.name) { // Skip marker return true; } if (!params.prefix.empty()) { // We have a prefix, only match - if (!bde.key.name.starts_with(params.prefix)) { + if (!bde_key.name.starts_with(params.prefix)) { // Prefix doesn't match; skip if (in_prefix) { return false; @@ -743,7 +2287,7 @@ int POSIXBucket::list(const DoutPrefixProvider* dpp, ListParams& params, } return true; } - auto delim_pos = bde.key.name.find(params.delim, params.prefix.size()); + auto delim_pos = bde_key.name.find(params.delim, params.prefix.size()); if (delim_pos == std::string_view::npos) { // Straight prefix match results.next_marker.set(bde.key); @@ -755,10 +2299,8 @@ int POSIXBucket::list(const DoutPrefixProvider* dpp, ListParams& params, } return true; } - std::string prefix_key = - bde.key.name.substr(0, delim_pos + params.delim.length()); - rgw_obj_key::parse_raw_oid(prefix_key, &results.next_marker); - // Use results.next_marker.name for prefix_key, since it's been decoded + results.next_marker = + bde_key.name.substr(0, delim_pos + params.delim.length()); if (!results.common_prefixes.contains(results.next_marker.name)) { results.common_prefixes[results.next_marker.name] = true; count++; // Count will be checked when we exit prefix @@ -776,7 +2318,7 @@ int POSIXBucket::list(const DoutPrefixProvider* dpp, ListParams& params, } if (!params.delim.empty()) { // Delimiter, but no prefix - auto delim_pos = bde.key.name.find(params.delim) ; + auto delim_pos = bde_key.name.find(params.delim) ; if (delim_pos == std::string_view::npos) { // Delimiter doesn't match, insert results.next_marker.set(bde.key); @@ -789,8 +2331,8 @@ int POSIXBucket::list(const DoutPrefixProvider* dpp, ListParams& params, return true; } std::string prefix_key = - bde.key.name.substr(0, delim_pos + params.delim.length()); - if (!params.marker.empty() && params.marker == prefix_key) { + bde_key.name.substr(0, delim_pos + params.delim.length()); + if (!marker_key.empty() && marker_key == prefix_key) { // Skip marker return true; } @@ -847,8 +2389,14 @@ int POSIXBucket::remove(const DoutPrefixProvider* dpp, bool delete_children, optional_yield y) { - return delete_directory(parent_fd, get_fname().c_str(), - delete_children, dpp); + int ret = dir->remove(dpp, y, delete_children); + if (ret < 0) { + return ret; + } + + driver->get_bucket_cache()->invalidate_bucket(dpp, get_name()); + + return ret; } int POSIXBucket::remove_bypass_gc(int concurrent_max, @@ -867,34 +2415,32 @@ int POSIXBucket::load_bucket(const DoutPrefixProvider* dpp, optional_yield y) /* Skip dotfiles */ return -ERR_INVALID_OBJECT_NAME; } - ret = stat(dpp); + ret = dir->stat(dpp); if (ret < 0) { return ret; } - mtime = ceph::real_clock::from_time_t(stx.stx_mtime.tv_sec); - info.creation_time = ceph::real_clock::from_time_t(stx.stx_btime.tv_sec); + mtime = ceph::real_clock::from_time_t(dir->get_stx().stx_mtime.tv_sec); + info.creation_time = ceph::real_clock::from_time_t(dir->get_stx().stx_btime.tv_sec); - ret = open(dpp); + ret = dir->open(dpp); if (ret < 0) { return ret; } - get_x_attrs(y, dpp, dir_fd, attrs, get_name()); - auto iter = attrs.find(RGW_POSIX_ATTR_BUCKET_INFO); - if (iter != attrs.end()) { - // Proper bucket with saved info - try { - auto bufit = iter->second.cbegin(); - decode(info, bufit); - } catch (buffer::error &err) { - ldout(driver->ctx(), 0) << "ERROR: " << __func__ << ": failed to decode " RGW_POSIX_ATTR_BUCKET_INFO " attr" << dendl; - return -EINVAL; - } - // info isn't stored in attrs - attrs.erase(RGW_POSIX_ATTR_BUCKET_INFO); - } else { + ret = dir->read_attrs(dpp, y, attrs); + if (ret < 0) { + return ret; + } + + RGWBucketInfo bak_info = info;; + ret = decode_attr(attrs, RGW_POSIX_ATTR_BUCKET_INFO, info); + if (ret < 0) { // TODO dang: fake info up (UID to owner conversion?) + info = bak_info; + } else { + // Don't leave info visible in attributes + attrs.erase(RGW_POSIX_ATTR_BUCKET_INFO); } return 0; @@ -924,14 +2470,22 @@ int POSIXBucket::read_stats(const DoutPrefixProvider *dpp, auto& main = stats[RGWObjCategory::Main]; // TODO: bucket stats shouldn't have to list all objects - return for_each(dpp, [this, dpp, &main] (const char* name) { + return dir->for_each(dpp, [this, dpp, &main] (const char* name) { if (name[0] == '.') { /* Skip dotfiles */ return 0; } - struct statx lstx; - int ret = statx(dir_fd, name, AT_SYMLINK_NOFOLLOW, STATX_ALL, &lstx); + std::unique_ptr<FSEnt> dent; + int ret = dir->get_ent(dpp, null_yield, name, std::string(), dent); + if (ret < 0) { + ret = errno; + ldpp_dout(dpp, 0) << "ERROR: could not get ent for object " << name << ": " + << cpp_strerror(ret) << dendl; + return -ret; + } + + ret = dent->stat(dpp); if (ret < 0) { ret = errno; ldpp_dout(dpp, 0) << "ERROR: could not stat object " << name << ": " @@ -939,6 +2493,8 @@ int POSIXBucket::read_stats(const DoutPrefixProvider *dpp, return -ret; } + struct statx& lstx = dent->get_stx(); + if (S_ISREG(lstx.stx_mode) || S_ISDIR(lstx.stx_mode)) { main.num_objects++; main.size += lstx.stx_size; @@ -948,6 +2504,7 @@ int POSIXBucket::read_stats(const DoutPrefixProvider *dpp, return 0; }); + return 0; } int POSIXBucket::read_stats_async(const DoutPrefixProvider *dpp, @@ -982,7 +2539,7 @@ int POSIXBucket::put_info(const DoutPrefixProvider* dpp, bool exclusive, ceph::r struct timespec ts[2]; ts[0].tv_nsec = UTIME_OMIT; ts[1] = ceph::real_clock::to_timespec(mtime); - int ret = utimensat(parent_fd, get_fname().c_str(), ts, AT_SYMLINK_NOFOLLOW); + int ret = utimensat(dir->get_parent()->get_fd(), get_fname().c_str(), ts, AT_SYMLINK_NOFOLLOW); if (ret < 0) { ret = errno; ldpp_dout(dpp, 0) << "ERROR: could not set mtime on bucket " << get_name() << ": " @@ -995,57 +2552,26 @@ int POSIXBucket::put_info(const DoutPrefixProvider* dpp, bool exclusive, ceph::r int POSIXBucket::write_attrs(const DoutPrefixProvider* dpp, optional_yield y) { - int ret = open(dpp); + int ret = dir->open(dpp); if (ret < 0) { return ret; } - // Bucket info is stored as an attribute, but on in attrs[] + // Bucket info is stored as an attribute, but not in attrs[] bufferlist bl; encode(info, bl); - ret = write_x_attr(dpp, y, dir_fd, RGW_POSIX_ATTR_BUCKET_INFO, bl, get_name()); - if (ret < 0) { - return ret; - } + Attrs extra_attrs; + extra_attrs[RGW_POSIX_ATTR_BUCKET_INFO] = bl; - for (auto& it : attrs) { - ret = write_x_attr(dpp, y, dir_fd, it.first, it.second, get_name()); - if (ret < 0) { - return ret; - } - } - return 0; + return dir->write_attrs(dpp, y, attrs, &extra_attrs); } int POSIXBucket::check_empty(const DoutPrefixProvider* dpp, optional_yield y) { - DIR* dir; - struct dirent* entry; - int ret; - - ret = open(dpp); - if (ret < 0) { - return ret; - } - - dir = fdopendir(dir_fd); - if (dir == NULL) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not open bucket " << get_name() << " for listing: " - << cpp_strerror(ret) << dendl; - return -ret; - } - - errno = 0; - while ((entry = readdir(dir)) != NULL) { - if (entry->d_name[0] != '.') { - return -ENOTEMPTY; - } - if (entry->d_name[1] == '.' || entry->d_name[1] == '\0') { - continue; - } - } - return 0; + return dir->for_each(dpp, [](const char* name) { + /* for_each filters out "." and "..", so reaching here is not empty */ + return -ENOTEMPTY; + }); } int POSIXBucket::check_quota(const DoutPrefixProvider *dpp, RGWQuota& quota, uint64_t obj_size, @@ -1058,13 +2584,12 @@ int POSIXBucket::try_refresh_info(const DoutPrefixProvider* dpp, ceph::real_time { *pmtime = mtime; - int ret = open(dpp); + int ret = dir->open(dpp); if (ret < 0) { return ret; } - get_x_attrs(y, dpp, dir_fd, attrs, get_name()); - return 0; + return dir->read_attrs(dpp, y, attrs); } int POSIXBucket::read_usage(const DoutPrefixProvider *dpp, uint64_t start_epoch, @@ -1122,297 +2647,85 @@ int POSIXBucket::list_multiparts(const DoutPrefixProvider *dpp, std::map<std::string, bool> *common_prefixes, bool *is_truncated, optional_yield y) { - //std::vector<std::unique_ptr<MultipartUpload>> nup; - //int ret; -// - //ret = next->list_multiparts(dpp, prefix, marker, delim, max_uploads, nup, - //common_prefixes, is_truncated); - //if (ret < 0) - //return ret; -// - //for (auto& ent : nup) { - //uploads.emplace_back(std::make_unique<POSIXMultipartUpload>(std::move(ent), this, driver)); - //} - - return 0; -} - -int POSIXBucket::abort_multiparts(const DoutPrefixProvider* dpp, CephContext* cct, optional_yield y) -{ - return 0; -} - -int POSIXBucket::create(const DoutPrefixProvider* dpp, optional_yield y, bool* existed) -{ - int ret = mkdirat(parent_fd, get_fname().c_str(), S_IRWXU); - if (ret < 0) { - ret = errno; - if (ret != EEXIST) { - if (dpp) - ldpp_dout(dpp, 0) << "ERROR: could not create bucket " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return -ret; - } else if (existed != nullptr) { - *existed = true; - } - return -ret; - } - - return write_attrs(dpp, y); -} - -std::string POSIXBucket::get_fname() -{ - std::string name; - - if (ns) - name = "." + *ns + "_" + url_encode(get_name(), true); - else - name = url_encode(get_name(), true); - - return name; -} - -int POSIXBucket::get_shadow_bucket(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& ns, - const std::string& tenant, const std::string& name, - bool create, std::unique_ptr<POSIXBucket>* shadow) -{ - std::optional<std::string> ons{std::nullopt}; + int count = 0; int ret; - POSIXBucket* bp; - rgw_bucket b; - - b.tenant = tenant; - b.name = name; - - if (!ns.empty()) { - ons = ns; - } - - open(dpp); - - bp = new POSIXBucket(driver, dir_fd, b, ons); - ret = bp->load_bucket(dpp, y); - if (ret == -ENOENT && create) { - /* Create it if it doesn't exist */ - ret = bp->create(dpp, y, nullptr); - } - if (ret < 0) { - delete bp; - return ret; - } - shadow->reset(bp); - return 0; -} + ret = dir->for_each(dpp, [this, dpp, y, &count, &max_uploads, &is_truncated, &uploads] (const char* name) { + std::string_view d_name = name; + static std::string mp_pre{"." + mp_ns + "_"}; + if (!d_name.starts_with(mp_pre)) { + /* Skip non-uploads */ + return 0; + } -template <typename F> -int POSIXBucket::for_each(const DoutPrefixProvider* dpp, const F& func) -{ - DIR* dir; - struct dirent* entry; - int ret; + if (count >= max_uploads) { + if (is_truncated) { + *is_truncated = true; + } - ret = open(dpp); - if (ret < 0) { - return ret; - } + return -EAGAIN; + } - dir = fdopendir(dir_fd); - if (dir == NULL) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not open bucket " << get_name() << " for listing: " - << cpp_strerror(ret) << dendl; - return -ret; - } + d_name.remove_prefix(mp_pre.size()); - rewinddir(dir); + ACLOwner owner; + std::unique_ptr<MultipartUpload> upload = + std::make_unique<POSIXMultipartUpload>( + driver, this, std::string(d_name), std::nullopt, owner, + real_clock::now()); + rgw_placement_rule* rule{nullptr}; + int ret = upload->get_info(dpp, y, &rule, nullptr); + if (ret < 0) + return 0; + uploads.emplace(uploads.end(), std::move(upload)); + count++; - while ((entry = readdir(dir)) != NULL) { - int r = func(entry->d_name); - if (r < 0) { - ret = r; - } - } + return 0; + }); - if (ret == -EAGAIN) { - /* Limit reached */ - ret = 0; - } return ret; } -int POSIXBucket::open(const DoutPrefixProvider* dpp) +int POSIXBucket::abort_multiparts(const DoutPrefixProvider* dpp, CephContext* cct, optional_yield y) { - if (dir_fd >= 0) { - return 0; - } - - int ret = openat(parent_fd, get_fname().c_str(), - O_RDONLY | O_DIRECTORY | O_NOFOLLOW); - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not open bucket " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return -ret; - } - - dir_fd = ret; - return 0; } -// This is for renaming a shadow bucket to a MP object. It won't work work for a normal bucket -int POSIXBucket::rename(const DoutPrefixProvider* dpp, optional_yield y, Object* target_obj) +int POSIXBucket::create(const DoutPrefixProvider* dpp, optional_yield y, bool* existed) { - POSIXObject *to = static_cast<POSIXObject*>(target_obj); - POSIXBucket *tb = static_cast<POSIXBucket*>(target_obj->get_bucket()); - std::string src_fname = get_fname(); - std::string dst_fname = to->get_fname(); - int flags = 0; - - if (to->check_exists(dpp)) { - flags = RENAME_EXCHANGE; - } - // swap - int ret = renameat2(tb->get_dir_fd(dpp), src_fname.c_str(), tb->get_dir_fd(dpp), dst_fname.c_str(), flags); - if(ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: renameat2 for shadow object could not finish: " - << cpp_strerror(ret) << dendl; - return -ret; - } - - // Update saved bucket info - info.bucket.name = to->get_name(); - bufferlist bl; - encode(info, bl); - ret = write_x_attr(dpp, y, dir_fd, RGW_POSIX_ATTR_BUCKET_INFO, bl, get_name()); + int ret = dir->create(dpp, existed); if (ret < 0) { return ret; } - // Delete old one (could be file or directory) - struct statx stx; - ret = statx(parent_fd, src_fname.c_str(), AT_SYMLINK_NOFOLLOW, - STATX_ALL, &stx); - if (ret < 0) { - ret = errno; - if (ret == ENOENT) { - return 0; - } - ldpp_dout(dpp, 0) << "ERROR: could not stat object " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return -ret; - } - - if (S_ISREG(stx.stx_mode)) { - ret = unlinkat(parent_fd, src_fname.c_str(), 0); - } else if (S_ISDIR(stx.stx_mode)) { - ret = delete_directory(parent_fd, src_fname.c_str(), true, dpp); - } - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not remove old file " << get_name() - << ": " << cpp_strerror(ret) << dendl; - return -ret; - } - - return 0; -} - -int POSIXBucket::close() -{ - if (dir_fd < 0) { - return 0; - } - - ::close(dir_fd); - dir_fd = -1; - - return 0; + return write_attrs(dpp, y); } -int POSIXBucket::stat(const DoutPrefixProvider* dpp) +std::string POSIXBucket::get_fname() { - if (stat_done) { - return 0; - } - - int ret = statx(parent_fd, get_fname().c_str(), AT_SYMLINK_NOFOLLOW, - STATX_ALL, &stx); - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not stat bucket " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return -ret; - } - if (!S_ISDIR(stx.stx_mode)) { - /* Not a bucket */ - return -EINVAL; - } - - stat_done = true; - return 0; + return bucket_fname(get_name(), ns); } -/* This is a shadow bucket. Copy it into a new shadow bucket in the destination - * bucket */ -int POSIXBucket::copy(const DoutPrefixProvider *dpp, optional_yield y, - POSIXBucket* db, POSIXObject* dest) +int POSIXBucket::rename(const DoutPrefixProvider* dpp, optional_yield y, Object* target_obj) { - std::unique_ptr<POSIXBucket> dsb; - - // Delete the target, in case it's not a multipart - int ret = dest->delete_object(dpp, y, rgw::sal::FLAG_LOG_OP); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not remove dest object " - << dest->get_name() << dendl; - return ret; - } - - ret = db->get_shadow_bucket(dpp, y, std::string(), std::string(), dest->get_fname(), true, &dsb); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not create shadow bucket " << dest->get_name() - << " in bucket " << db->get_name() << dendl; - return ret; - } + int ret; + Directory* dst_dir = dir->get_parent(); - ret = for_each(dpp, [this, &dsb, &dpp, &y](const char *name) { - int ret; - std::unique_ptr<Object> sobj; - POSIXObject* sop; - std::unique_ptr<Object> dobj; - POSIXObject* dop; + info.bucket.name = target_obj->get_key().get_oid(); + ns.reset(); - if (name[0] == '.') { - /* Skip dotfiles */ - return 0; - } - - sobj = this->get_object(decode_obj_key(name)); - sop = static_cast<POSIXObject*>(sobj.get()); - if (!sop->check_exists(dpp)) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not stat object " << name << ": " - << cpp_strerror(ret) << dendl; - return -ret; - } - ret = sop->open(dpp, true); + if (!target_obj->get_instance().empty()) { + /* This is a versioned object. Need to handle versioneddirectory */ + POSIXObject *to = static_cast<POSIXObject *>(target_obj); + ret = to->open(dpp, true, false); if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not open source object " << get_name() - << dendl; + ldpp_dout(dpp, 0) << "ERROR: could not open target obj " << to->get_name() << dendl; return ret; } + dst_dir = static_cast<Directory *>(to->get_fsent()); + } - dobj = dsb->get_object(decode_obj_key(name)); - dop = static_cast<POSIXObject*>(dobj.get()); - - return sop->copy(dpp, y, this, dsb.get(), dop); - }); - - return ret; + return dir->rename(dpp, y, dst_dir, get_fname()); } int POSIXObject::delete_object(const DoutPrefixProvider* dpp, @@ -1434,46 +2747,17 @@ int POSIXObject::delete_object(const DoutPrefixProvider* dpp, return ret; } - if (!b->versioned()) { - if (shadow) { - ret = shadow->remove(dpp, true, y); - if (ret < 0) { - return ret; - } - shadow.reset(nullptr); - } - - int ret = unlinkat(b->get_dir_fd(dpp), get_fname().c_str(), 0); - if (ret < 0) { - ret = errno; - if (errno != ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: could not remove object " << get_name() - << ": " << cpp_strerror(ret) << dendl; - return -ret; - } - } - return 0; - } + ret = ent->remove(dpp, y, /*delete_children=*/false); - // Versioned directory. Need to remove all objects matching - b->for_each(dpp, [this, &dpp, &b](const char* name) { - int ret; - std::string_view vname(name); - - if (vname.find(get_fname().c_str()) != std::string_view::npos) { - ret = unlinkat(b->get_dir_fd(dpp), name, 0); - if (ret < 0) { - ret = errno; - if (errno != ENOENT) { - ldpp_dout(dpp, 0) << "ERROR: could not remove object " << name - << ": " << cpp_strerror(ret) << dendl; - return -ret; - } - } - } - return 0; - }); + cls_rgw_obj_key key; + get_key().get_index_key(&key); + driver->get_bucket_cache()->remove_entry(dpp, b->get_name(), key); + if (!key.instance.empty() && !ent->exists()) { + /* Remove the non-versiond key as well */ + key.instance.clear(); + driver->get_bucket_cache()->remove_entry(dpp, b->get_name(), key); + } return 0; } @@ -1516,6 +2800,7 @@ int POSIXObject::copy_object(const ACLOwner& owner, << dendl; return -EINVAL; } + bool has_instance = !get_key().instance.empty(); // Source must exist, and we need to know if it's a shadow obj if (!check_exists(dpp)) { @@ -1525,11 +2810,84 @@ int POSIXObject::copy_object(const ACLOwner& owner, return -ret; } - if (shadow) { - return shadow->copy(dpp, y, db, dobj); - } else { - return copy(dpp, y, sb, db, dobj); + if (!get_key().instance.empty() && !has_instance) { + /* For copy, no instance meance copy all instances. Clear intance id if it + * was passed in clear. */ + get_key().instance.clear(); + } + + if (state.obj != dobj->state.obj) { + /* An actual copy, copy the data */ + ret = copy(dpp, y, sb, db, dobj); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to copy object " << get_key() + << dendl; + return ret; + } + } + dobj->make_ent(ent->get_type()); + + /* Set up attributes for destination */ + Attrs src_attrs = state.attrset; + /* Come attrs are never copied */ + src_attrs.erase(RGW_ATTR_DELETE_AT); + src_attrs.erase(RGW_ATTR_OBJECT_RETENTION); + src_attrs.erase(RGW_ATTR_OBJECT_LEGAL_HOLD); + /* Some attrs, if they exist, always come from the call */ + src_attrs[RGW_ATTR_ACL] = attrs[RGW_ATTR_ACL]; + bufferlist rt; + if (get_attr(RGW_ATTR_OBJECT_RETENTION, rt)) { + src_attrs[RGW_ATTR_OBJECT_RETENTION] = rt; + } + bufferlist lh; + if (get_attr(RGW_ATTR_OBJECT_LEGAL_HOLD, lh)) { + src_attrs[RGW_ATTR_OBJECT_LEGAL_HOLD] = lh; + } + + bufferlist tt; + switch (attrs_mod) { + case ATTRSMOD_REPLACE: + /* Keep tags if not set */ + if (!attrs[RGW_ATTR_ETAG].length()) { + attrs[RGW_ATTR_ETAG] = src_attrs[RGW_ATTR_ETAG]; + } + if (!attrs[RGW_ATTR_TAIL_TAG].length() && + rgw::sal::get_attr(src_attrs, RGW_ATTR_TAIL_TAG, tt)) { + attrs[RGW_ATTR_TAIL_TAG] = tt; + } + break; + + case ATTRSMOD_MERGE: + for (auto it = src_attrs.begin(); it != src_attrs.end(); ++it) { + if (attrs.find(it->first) == attrs.end()) { + attrs[it->first] = it->second; + } + } + break; + case ATTRSMOD_NONE: + attrs = src_attrs; + ret = 0; + break; + } + + /* Some attrs always come from the source */ + bufferlist com; + if (rgw::sal::get_attr(src_attrs, RGW_ATTR_COMPRESSION, com)) { + attrs[RGW_ATTR_COMPRESSION] = com; + } + bufferlist mpu; + if (rgw::sal::get_attr(src_attrs, RGW_POSIX_ATTR_MPUPLOAD, mpu)) { + attrs[RGW_POSIX_ATTR_MPUPLOAD] = mpu; } + bufferlist ownerbl; + if (rgw::sal::get_attr(src_attrs, RGW_POSIX_ATTR_OWNER, ownerbl)) { + attrs[RGW_POSIX_ATTR_OWNER] = ownerbl; + } + bufferlist pot; + if (rgw::sal::get_attr(src_attrs, RGW_POSIX_ATTR_OBJECT_TYPE, pot)) { + attrs[RGW_POSIX_ATTR_OBJECT_TYPE] = pot; + } + return dobj->set_obj_attrs(dpp, &attrs, nullptr, y, rgw::sal::FLAG_LOG_OP); } int POSIXObject::load_obj_state(const DoutPrefixProvider* dpp, optional_yield y, bool follow_olh) @@ -1547,40 +2905,51 @@ int POSIXObject::set_obj_attrs(const DoutPrefixProvider* dpp, Attrs* setattrs, { if (delattrs) { for (auto& it : *delattrs) { + if (it.first == RGW_POSIX_ATTR_OBJECT_TYPE) { + // Don't delete type + continue; + } state.attrset.erase(it.first); } } if (setattrs) { for (auto& it : *setattrs) { + if (it.first == RGW_POSIX_ATTR_OBJECT_TYPE) { + // Don't overwrite type + continue; + } state.attrset[it.first] = it.second; } } - for (auto& it : state.attrset) { - int ret = write_attr(dpp, y, it.first, it.second); - if (ret < 0) { - return ret; - } - } + write_attrs(dpp, y); return 0; } int POSIXObject::get_obj_attrs(optional_yield y, const DoutPrefixProvider* dpp, rgw_obj* target_obj) { + //int fd; + int ret = open(dpp, false); if (ret < 0) { return ret; } - return get_x_attrs(y, dpp, obj_fd, state.attrset, get_name()); + ret = ent->read_attrs(dpp, y, state.attrset); + if (ret == 0) + state.has_attrs = true; + else + state.has_attrs = false; + + return ret; } int POSIXObject::modify_obj_attrs(const char* attr_name, bufferlist& attr_val, optional_yield y, const DoutPrefixProvider* dpp) { state.attrset[attr_name] = attr_val; - return write_attr(dpp, y, attr_name, attr_val); + return write_attrs(dpp, y); } int POSIXObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* attr_name, @@ -1588,12 +2957,12 @@ int POSIXObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* att { state.attrset.erase(attr_name); - int ret = open(dpp, true); + int ret = open(dpp); if (ret < 0) { return ret; } - ret = fremovexattr(obj_fd, attr_name); + ret = remove_x_attr(dpp, y, ent->get_fd(), attr_name, get_name()); if (ret < 0) { ret = errno; ldpp_dout(dpp, 0) << "ERROR: could not remover attribute " << attr_name << " for " << get_name() << ": " << cpp_strerror(ret) << dendl; @@ -1605,20 +2974,16 @@ int POSIXObject::delete_obj_attrs(const DoutPrefixProvider* dpp, const char* att bool POSIXObject::is_expired() { - bufferlist bl; - if (get_attr(RGW_ATTR_DELETE_AT, bl)) { - utime_t delete_at; - try { - auto bufit = bl.cbegin(); - decode(delete_at, bufit); - } catch (buffer::error& err) { - ldout(driver->ctx(), 0) << "ERROR: " << __func__ << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl; - return false; - } + utime_t delete_at; + if (!decode_attr(state.attrset, RGW_ATTR_DELETE_AT, delete_at)) { + ldout(driver->ctx(), 0) + << "ERROR: " << __func__ + << ": failed to decode " RGW_ATTR_DELETE_AT " attr" << dendl; + return false; + } - if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) { - return true; - } + if (delete_at <= ceph_clock_now() && !delete_at.is_zero()) { + return true; } return false; @@ -1626,11 +2991,7 @@ bool POSIXObject::is_expired() void POSIXObject::gen_rand_obj_instance_name() { - enum { OBJ_INSTANCE_LEN = 32 }; - char buf[OBJ_INSTANCE_LEN + 1]; - - gen_rand_alphanumeric_no_underscore(driver->ctx(), buf, OBJ_INSTANCE_LEN); - state.obj.key.set_instance(buf); + state.obj.key.set_instance(gen_rand_instance_name()); } std::unique_ptr<MPSerializer> POSIXObject::get_serializer(const DoutPrefixProvider *dpp, const std::string& lock_name) @@ -1644,7 +3005,12 @@ int MPPOSIXSerializer::try_lock(const DoutPrefixProvider *dpp, utime_t dur, opti return -ENOENT; } - return 0; + POSIXBucket* b = static_cast<POSIXBucket*>(obj->get_bucket()); + if (b->get_dir()->get_type() == ObjectType::MULTIPART && b->get_dir_fd(dpp) > 0) { + return 0; + } + + return -ENOENT; } int POSIXObject::transition(Bucket* bucket, @@ -1718,7 +3084,7 @@ int POSIXObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_y int uid = 0; int gid = 0; - int ret = fchownat(b->get_dir_fd(dpp), get_fname().c_str(), uid, gid, AT_SYMLINK_NOFOLLOW); + int ret = fchownat(b->get_dir_fd(dpp), get_fname(/*use_version=*/true).c_str(), uid, gid, AT_SYMLINK_NOFOLLOW); if (ret < 0) { ret = errno; ldpp_dout(dpp, 0) << "ERROR: could not remove object " << get_name() << ": " @@ -1729,103 +3095,101 @@ int POSIXObject::chown(User& new_user, const DoutPrefixProvider* dpp, optional_y return 0; } +int POSIXObject::get_cur_version(const DoutPrefixProvider* dpp, rgw_obj_key& key) +{ + return 0; +} + +int POSIXObject::set_cur_version(const DoutPrefixProvider *dpp) +{ + VersionedDirectory* vdir = static_cast<VersionedDirectory*>(ent.get()); + std::unique_ptr<FSEnt> child; + int ret = vdir->get_ent(dpp, null_yield, get_fname(true), std::string(), child); + if (ret < 0) + return ret; + + ret = vdir->set_cur_version_ent(dpp, child.get()); + return ret; +} + int POSIXObject::stat(const DoutPrefixProvider* dpp) { - if (stat_done) { - return 0; - } + int ret; - state.exists = false; - POSIXBucket *b = static_cast<POSIXBucket*>(get_bucket()); - if (!b) { - ldpp_dout(dpp, 0) << "ERROR: could not get bucket for " << get_name() << dendl; - return -EINVAL; + if (!ent) { + ret = static_cast<POSIXBucket *>(bucket)->get_dir()->get_ent( + dpp, null_yield, get_fname(/*use_version=*/false), state.obj.key.instance, ent); + if (ret < 0) { + state.exists = false; + return ret; + } } - int ret = statx(b->get_dir_fd(dpp), get_fname().c_str(), AT_SYMLINK_NOFOLLOW, - STATX_ALL, &stx); + ret = ent->stat(dpp); if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not stat object " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return -ret; + state.exists = false; + return ret; } - if (S_ISREG(stx.stx_mode)) { - /* Normal object */ - state.accounted_size = state.size = stx.stx_size; - state.mtime = from_statx_timestamp(stx.stx_mtime); - } else if (S_ISDIR(stx.stx_mode)) { - /* multipart object */ - /* Get the shadow bucket */ - POSIXBucket* pb = static_cast<POSIXBucket*>(bucket); - ret = pb->get_shadow_bucket(dpp, null_yield, std::string(), - std::string(), get_fname(), false, &shadow); - if (ret < 0) { - return ret; - } - state.mtime = from_statx_timestamp(stx.stx_mtime); - /* Add up size of parts */ - uint64_t total_size{0}; - int fd = shadow->get_dir_fd(dpp); - shadow->for_each(dpp, [this, &total_size, fd, &dpp](const char* name) { - int ret; - struct statx stx; - std::string sname = name; + if (state.obj.key.instance.empty()) { + state.obj.key.instance = ent->get_cur_version(); + } - if (sname.rfind(MP_OBJ_PART_PFX, 0) != 0) { - /* Skip non-parts */ - return 0; - } + state.exists = ent->exists(); + if (!state.exists) { + return 0; + } - ret = statx(fd, name, AT_SYMLINK_NOFOLLOW, STATX_ALL, &stx); - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not stat object " << name << ": " << cpp_strerror(ret) << dendl; - return -ret; - } + state.accounted_size = state.size = ent->get_stx().stx_size; + state.mtime = from_statx_timestamp(ent->get_stx().stx_mtime); - if (!S_ISREG(stx.stx_mode)) { - /* Skip non-files */ - return 0; - } + return 0; +} - parts[name] = stx.stx_size; - total_size += stx.stx_size; - return 0; - }); - state.accounted_size = state.size = total_size; - } else { - /* Not an object */ - return -EINVAL; - } +int POSIXObject::make_ent(ObjectType type) +{ + if (ent) + return 0; - stat_done = true; - state.exists = true; + switch (type.type) { + case ObjectType::UNKNOWN: + return -EINVAL; + case ObjectType::FILE: + ent = std::make_unique<File>( + get_fname(/*use_version=*/true), static_cast<POSIXBucket *>(bucket)->get_dir(), driver->ctx()); + break; + case ObjectType::DIRECTORY: + ent = std::make_unique<Directory>( + get_fname(/*use_version=*/true), static_cast<POSIXBucket *>(bucket)->get_dir(), driver->ctx()); + break; + case ObjectType::SYMLINK: + ent = std::make_unique<Symlink>( + get_fname(/*use_version=*/true), static_cast<POSIXBucket *>(bucket)->get_dir(), driver->ctx()); + break; + case ObjectType::MULTIPART: + ent = std::make_unique<MPDirectory>( + get_fname(/*use_version=*/true), static_cast<POSIXBucket *>(bucket)->get_dir(), driver->ctx()); + break; + case ObjectType::VERSIONED: + ent = std::make_unique<VersionedDirectory>( + get_fname(/*use_version=*/false), static_cast<POSIXBucket *>(bucket)->get_dir(), get_instance(), driver->ctx()); + break; + } return 0; } int POSIXObject::get_owner(const DoutPrefixProvider *dpp, optional_yield y, std::unique_ptr<User> *owner) { - bufferlist bl; - rgw_user u; - if (!get_attr(RGW_POSIX_ATTR_OWNER, bl)) { + POSIXOwner o; + int ret = decode_owner(get_attrs(), o); + if (ret < 0) { ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": No " RGW_POSIX_ATTR_OWNER " attr" << dendl; - return -EINVAL; - } - - try { - auto bufit = bl.cbegin(); - decode(u, bufit); - } catch (buffer::error &err) { - ldpp_dout(dpp, 0) << "ERROR: " << __func__ - << ": failed to decode " RGW_POSIX_ATTR_OWNER " attr" << dendl; - return -EINVAL; + return ret; } - *owner = driver->get_user(u); + *owner = driver->get_user(o.user); (*owner)->load_user(dpp, y); return 0; } @@ -1842,109 +3206,63 @@ std::unique_ptr<Object::DeleteOp> POSIXObject::get_delete_op() int POSIXObject::open(const DoutPrefixProvider* dpp, bool create, bool temp_file) { - if (obj_fd >= 0) { - return 0; - } + int ret{0}; - stat(dpp); - - if (shadow) { - obj_fd = shadow->get_dir_fd(dpp); - return obj_fd; - } - - POSIXBucket *b = static_cast<POSIXBucket*>(get_bucket()); - if (!b) { - ldpp_dout(dpp, 0) << "ERROR: could not get bucket for " << get_name() << dendl; - return -EINVAL; - } - - int ret, flags; - std::string path; - - if(temp_file) { - flags = O_TMPFILE | O_RDWR; - path = "."; - } else { - flags = O_RDWR | O_NOFOLLOW; - if (create) - flags |= O_CREAT; - path = get_fname(); + if (!ent) { + ret = stat(dpp); + if (ret < 0) { + if (!create) { + return ret; + } + if (versioned()) { + ret = make_ent(ObjectType::VERSIONED); + } else { + ret = make_ent(ObjectType::FILE); + } + } } - ret = openat(b->get_dir_fd(dpp), path.c_str(), flags, S_IRWXU); if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not open object " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return -ret; + return ret; } - obj_fd = ret; + if (create) { + ret = ent->create(dpp, nullptr, temp_file); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: could not create " << ent->get_name() << dendl; + return ret; + } + } - return 0; + return ent->open(dpp); } -int POSIXObject::link_temp_file(const DoutPrefixProvider *dpp, optional_yield y, uint32_t flags) +int POSIXObject::link_temp_file(const DoutPrefixProvider *dpp, optional_yield y) { - if (obj_fd < 0) { - return 0; - } - - char temp_file_path[PATH_MAX]; - // Only works on Linux - Non-portable - snprintf(temp_file_path, PATH_MAX, "/proc/self/fd/%d", obj_fd); - - POSIXBucket *b = static_cast<POSIXBucket*>(get_bucket()); - - if (!b) { - ldpp_dout(dpp, 0) << "ERROR: could not get bucket for " << get_name() << dendl; - return -EINVAL; - } - - int ret = linkat(AT_FDCWD, temp_file_path, b->get_dir_fd(dpp), get_temp_fname().c_str(), AT_SYMLINK_FOLLOW); - if(ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: linkat for temp file could not finish: " - << cpp_strerror(ret) << dendl; - return -ret; - } - - // Delete the target, in case it's a multipart - ret = delete_object(dpp, y, flags); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not remove dest object " - << get_name() << dendl; + std::string temp_fname = gen_temp_fname(); + int ret = ent->link_temp_file(dpp, y, temp_fname); + if (ret < 0) return ret; + + POSIXBucket *b = static_cast<POSIXBucket *>(get_bucket()); + if (!b) { + ldpp_dout(dpp, 0) << "ERROR: could not get bucket for " << get_name() + << dendl; + return -EINVAL; } - ret = renameat(b->get_dir_fd(dpp), get_temp_fname().c_str(), b->get_dir_fd(dpp), get_fname().c_str()); - if(ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: renameat for object could not finish: " - << cpp_strerror(ret) << dendl; - return -ret; - } - + fill_cache( nullptr, null_yield, + [&](const DoutPrefixProvider *dpp, rgw_bucket_dir_entry &bde) -> int { + driver->get_bucket_cache()->add_entry(dpp, b->get_name(), bde); + return 0; + }); return 0; } int POSIXObject::close() { - if (obj_fd < 0) { - return 0; - } - - int ret = ::fsync(obj_fd); - if(ret < 0) { - return ret; - } - - ret = ::close(obj_fd); - if(ret < 0) { - return ret; - } - obj_fd = -1; + if (ent) + return ent->close(); return 0; } @@ -1952,115 +3270,20 @@ int POSIXObject::close() int POSIXObject::read(int64_t ofs, int64_t left, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) { - if (!shadow) { - // Normal file, just read it - int64_t len = std::min(left + 1, READ_SIZE); - ssize_t ret; - - ret = lseek(obj_fd, ofs, SEEK_SET); - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not seek object " << get_name() << " to " - << ofs << " :" << cpp_strerror(ret) << dendl; - return -ret; - } - - char read_buf[READ_SIZE]; - ret = ::read(obj_fd, read_buf, len); - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not read object " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return -ret; - } - - bl.append(read_buf, ret); - - return ret; - } - - // It's a multipart object, find the correct file, open it, and read it - std::string pname; - for (auto part : parts) { - if (ofs < part.second) { - pname = part.first; - break; - } - - ofs -= part.second; - } - - if (pname.empty()) { - // ofs is past the end - return 0; - } - - POSIXObject* shadow_obj; - std::unique_ptr<rgw::sal::Object> obj = shadow->get_object(rgw_obj_key(pname)); - shadow_obj = static_cast<POSIXObject*>(obj.get()); - int ret = shadow_obj->open(dpp, false); - if (ret < 0) { - return ret; - } - - return shadow_obj->read(ofs, left, bl, dpp, y); + if (!ent) + return -ENOENT; + return ent->read(ofs, left, bl, dpp, y); } int POSIXObject::write(int64_t ofs, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) { - if (shadow) { - // Can't write to a MP file - return -EINVAL; - } - - int64_t left = bl.length(); - char* curp = bl.c_str(); - ssize_t ret; - - ret = fchmod(obj_fd, S_IRUSR|S_IWUSR); - if(ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not change permissions on object " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return ret; - } - - - ret = lseek(obj_fd, ofs, SEEK_SET); - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not seek object " << get_name() << " to " - << ofs << " :" << cpp_strerror(ret) << dendl; - return -ret; - } - - while (left > 0) { - ret = ::write(obj_fd, curp, left); - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not write object " << get_name() << ": " - << cpp_strerror(ret) << dendl; - return -ret; - } - - curp += ret; - left -= ret; - } - - return 0; + return ent->write(ofs, bl, dpp, y); } -int POSIXObject::write_attr(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, bufferlist& value) +int POSIXObject::write_attrs(const DoutPrefixProvider* dpp, optional_yield y) { - int ret; - std::string attrname; - - ret = open(dpp, true); - if (ret < 0) { - return ret; - } - - return write_x_attr(dpp, y, obj_fd, key, value, get_name()); + return ent->write_attrs(dpp, y, state.attrset, nullptr); } int POSIXObject::POSIXReadOp::prepare(optional_yield y, const DoutPrefixProvider* dpp) @@ -2163,82 +3386,12 @@ int POSIXObject::generate_attrs(const DoutPrefixProvider* dpp, optional_yield y) { int ret; - /* Generate an ETAG */ - if (shadow) { - ret = generate_mp_etag(dpp, y); - } else { - ret = generate_etag(dpp, y); - } - + ret = generate_etag(dpp, y); return ret; } int POSIXObject::generate_mp_etag(const DoutPrefixProvider* dpp, optional_yield y) { - int32_t count = 0; - char etag_buf[CEPH_CRYPTO_MD5_DIGESTSIZE]; - char final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 16]; - std::string etag; - bufferlist etag_bl; - MD5 hash; - // Allow use of MD5 digest in FIPS mode for non-cryptographic purposes - hash.SetFlags(EVP_MD_CTX_FLAG_NON_FIPS_ALLOW); - int ret; - rgw::sal::Bucket::ListParams params; - rgw::sal::Bucket::ListResults results; - - do { - static constexpr auto MAX_LIST_OBJS = 100u; - ret = shadow->list(dpp, params, MAX_LIST_OBJS, results, y); - if (ret < 0) { - return ret; - } - for (rgw_bucket_dir_entry& ent : results.objs) { - std::unique_ptr<rgw::sal::Object> obj; - POSIXObject* shadow_obj; - - if (MP_OBJ_PART_PFX.compare(0, std::string::npos, ent.key.name, - MP_OBJ_PART_PFX.size() != 0)) { - // Skip non-parts - continue; - } - - obj = shadow->get_object(rgw_obj_key(ent.key)); - shadow_obj = static_cast<POSIXObject*>(obj.get()); - ret = shadow_obj->get_obj_attrs(y, dpp); - if (ret < 0) { - return ret; - } - bufferlist etag_bl; - if (!shadow_obj->get_attr(RGW_ATTR_ETAG, etag_bl)) { - // Generate part's etag - ret = shadow_obj->generate_etag(dpp, y); - if (ret < 0) - return ret; - } - if (!shadow_obj->get_attr(RGW_ATTR_ETAG, etag_bl)) { - // Can't get etag. - return -EINVAL; - } - hex_to_buf(etag_bl.c_str(), etag_buf, CEPH_CRYPTO_MD5_DIGESTSIZE); - hash.Update((const unsigned char *)etag_buf, sizeof(etag_buf)); - count++; - } - } while (results.is_truncated); - - hash.Final((unsigned char *)etag_buf); - - buf_to_hex((unsigned char *)etag_buf, sizeof(etag_buf), final_etag_str); - snprintf(&final_etag_str[CEPH_CRYPTO_MD5_DIGESTSIZE * 2], - sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, - "-%" PRId32, count); - etag = final_etag_str; - ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl; - - etag_bl.append(etag); - (void)write_attr(dpp, y, RGW_ATTR_ETAG, etag_bl); - get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl)); - return 0; } @@ -2274,36 +3427,25 @@ int POSIXObject::generate_etag(const DoutPrefixProvider* dpp, optional_yield y) hash.Final(m); buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5); etag_bl.append(calc_md5, sizeof(calc_md5)); - (void)write_attr(dpp, y, RGW_ATTR_ETAG, etag_bl); get_attrs().emplace(std::move(RGW_ATTR_ETAG), std::move(etag_bl)); - - return 0; + return write_attrs(dpp, y); } -const std::string POSIXObject::get_fname() +const std::string POSIXObject::get_fname(bool use_version) { - std::string fname = url_encode(get_obj().get_oid(), true); - - if (!get_obj().key.get_ns().empty()) { - /* Namespaced objects are hidden */ - fname.insert(0, 1, '.'); - } - - return fname; + return get_key_fname(state.obj.key, use_version); } -void POSIXObject::gen_temp_fname() +std::string POSIXObject::gen_temp_fname() { + std::string temp_fname; enum { RAND_SUFFIX_SIZE = 8 }; char buf[RAND_SUFFIX_SIZE + 1]; gen_rand_alphanumeric_no_underscore(driver->ctx(), buf, RAND_SUFFIX_SIZE); - temp_fname = "." + get_fname() + "."; + temp_fname = "." + get_fname(/*use_version=*/true) + "."; temp_fname.append(buf); -} -const std::string POSIXObject::get_temp_fname() -{ return temp_fname; } @@ -2333,7 +3475,7 @@ int POSIXObject::POSIXReadOp::iterate(const DoutPrefixProvider* dpp, int64_t ofs /* Read some */ int ret = cb->handle_data(bl, 0, len); if (ret < 0) { - ldpp_dout(dpp, 0) << " ERROR: callback failed on " << source->get_name() << dendl; + ldpp_dout(dpp, 0) << " ERROR: callback failed on " << source->get_name() << ": " << ret << dendl; return ret; } @@ -2369,53 +3511,11 @@ int POSIXObject::POSIXDeleteOp::delete_obj(const DoutPrefixProvider* dpp, int POSIXObject::copy(const DoutPrefixProvider *dpp, optional_yield y, POSIXBucket *sb, POSIXBucket *db, POSIXObject *dobj) { - off64_t scount = 0, dcount = 0; - - int ret = open(dpp, false); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not open source object " << get_name() - << dendl; - return ret; - } - - // Delete the target, in case it's a multipart - ret = dobj->delete_object(dpp, y, rgw::sal::FLAG_LOG_OP); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not remove dest object " - << dobj->get_name() << dendl; - return ret; - } - - ret = dobj->open(dpp, true); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not open dest object " - << dobj->get_name() << dendl; - return ret; - } - - ret = copy_file_range(obj_fd, &scount, dobj->get_fd(), &dcount, stx.stx_size, 0); - if (ret < 0) { - ret = errno; - ldpp_dout(dpp, 0) << "ERROR: could not copy object " << dobj->get_name() - << ": " << cpp_strerror(ret) << dendl; - return -ret; - } + rgw_obj_key dst_key = dobj->get_key(); + if (!get_key().instance.empty()) + dst_key.instance = get_key().instance; - ret = get_obj_attrs(y, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not get attrs for source object " - << get_name() << dendl; - return ret; - } - - ret = dobj->set_obj_attrs(dpp, &get_attrs(), NULL, y, rgw::sal::FLAG_LOG_OP); - if (ret < 0) { - ldpp_dout(dpp, 0) << "ERROR: could not write attrs to dest object " - << dobj->get_name() << dendl; - return ret; - } - - return 0; + return ent->copy(dpp, y, db->get_dir(), get_key_fname(dst_key, /*use_version=*/true)); } void POSIXMPObj::init_gen(POSIXDriver* driver, const std::string& _oid, ACLOwner& _owner) @@ -2432,55 +3532,57 @@ void POSIXMPObj::init_gen(POSIXDriver* driver, const std::string& _oid, ACLOwner int POSIXMultipartPart::load(const DoutPrefixProvider* dpp, optional_yield y, POSIXDriver* driver, rgw_obj_key& key) { - if (shadow) { + if (part_file) { /* Already loaded */ return 0; } - shadow = std::make_unique<POSIXObject>(driver, key, upload->get_shadow()); + part_file = std::make_unique<File>(get_key_fname(key, false), upload->get_shadow()->get_dir(), driver->ctx()); - // Stat the shadow object to get things like size - int ret = shadow->load_obj_state(dpp, y); + // Stat the part_file object to get things like size + int ret = part_file->stat(dpp, y); if (ret < 0) { return ret; } - ret = shadow->get_obj_attrs(y, dpp); + Attrs attrs; + ret = part_file->read_attrs(dpp, y, attrs); if (ret < 0) { return ret; } - auto ait = shadow->get_attrs().find(RGW_POSIX_ATTR_MPUPLOAD); - if (ait == shadow->get_attrs().end()) { - ldout(driver->ctx(), 0) << "ERROR: " << __func__ << ": Not a part: " << key << dendl; - return -EINVAL; - } - - try { - auto bit = ait->second.cbegin(); - decode(info, bit); - } catch (buffer::error& err) { - ldout(driver->ctx(), 0) << "ERROR: " << __func__ << ": failed to decode part info: " << key << dendl; - return -EINVAL; + ret = decode_attr(attrs, RGW_POSIX_ATTR_MPUPLOAD, info); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: " << __func__ << ": failed to decode part info: " << key << dendl; + return ret; } return 0; } -int POSIXMultipartUpload::load(bool create) +int POSIXMultipartUpload::load(const DoutPrefixProvider *dpp, bool create) { + int ret = 0; if (!shadow) { POSIXBucket* pb = static_cast<POSIXBucket*>(bucket); - return pb->get_shadow_bucket(nullptr, null_yield, mp_ns, - std::string(), get_meta(), create, &shadow); + std::optional<std::string> ns{mp_ns}; + + std::unique_ptr<Directory> mpdir = std::make_unique<MPDirectory>(bucket_fname(get_meta(), ns), pb->get_dir(), driver->ctx()); + + shadow = std::make_unique<POSIXBucket>(driver, std::move(mpdir), rgw_bucket(std::string(), get_meta()), mp_ns); + + ret = shadow->load_bucket(dpp, null_yield); + if (ret == -ENOENT && create) { + ret = shadow->create(dpp, null_yield, nullptr); + } } - return 0; + return ret; } std::unique_ptr<rgw::sal::Object> POSIXMultipartUpload::get_meta_obj() { - load(); + load(nullptr); if (!shadow) { // This upload doesn't exist, but the API doesn't check this until it calls // on the *serializer*. So make a fake object in the parent bucket that @@ -2497,9 +3599,9 @@ int POSIXMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, int ret; /* Create the shadow bucket */ - ret = load(true); + ret = load(dpp, true); if (ret < 0) { - ldpp_dout(dpp, 0) << " ERROR: could not get shadow bucket for mp upload " + ldpp_dout(dpp, 0) << " ERROR: could not get shadow dir for mp upload " << get_key() << dendl; return ret; } @@ -2509,8 +3611,14 @@ int POSIXMultipartUpload::init(const DoutPrefixProvider *dpp, optional_yield y, meta_obj = get_meta_obj(); + ret = static_cast<POSIXObject*>(meta_obj.get())->open(dpp, true); + if (ret < 0) { + return ret; + } + mp_obj.upload_info.cksum_type = cksum_type; mp_obj.upload_info.dest_placement = dest_placement; + mp_obj.owner = owner; bufferlist bl; encode(mp_obj, bl); @@ -2528,7 +3636,7 @@ int POSIXMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext int ret; int last_num = 0; - ret = load(); + ret = load(dpp); if (ret < 0) { return ret; } @@ -2538,6 +3646,8 @@ int POSIXMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext params.prefix = MP_OBJ_PART_PFX; params.marker = MP_OBJ_PART_PFX + fmt::format("{:0>5}", marker); + params.marker.ns = mp_ns; + params.ns = mp_ns; ret = shadow->list(dpp, params, num_parts + 1, results, y); if (ret < 0) { @@ -2548,6 +3658,8 @@ int POSIXMultipartUpload::list_parts(const DoutPrefixProvider *dpp, CephContext POSIXMultipartPart* ppart = static_cast<POSIXMultipartPart*>(part.get()); rgw_obj_key key(ent.key); + // Parts are namespaced in the bucket listing + key.ns.clear(); ret = ppart->load(dpp, y, driver, key); if (ret == 0) { /* Skip anything that's not a part */ @@ -2571,8 +3683,10 @@ int POSIXMultipartUpload::abort(const DoutPrefixProvider *dpp, CephContext *cct, { int ret; - ret = load(); + ret = load(dpp); if (ret < 0) { + if (ret == -ENOENT) + ret = ERR_NO_SUCH_UPLOAD; return ret; } @@ -2609,6 +3723,8 @@ int POSIXMultipartUpload::complete(const DoutPrefixProvider *dpp, auto etags_iter = part_etags.begin(); rgw::sal::Attrs& attrs = target_obj->get_attrs(); + ofs = accounted_size = 0; + do { ret = list_parts(dpp, cct, max_parts, marker, &marker, &truncated, y); if (ret == -ENOENT) { @@ -2700,7 +3816,6 @@ int POSIXMultipartUpload::complete(const DoutPrefixProvider *dpp, sizeof(final_etag_str) - CEPH_CRYPTO_MD5_DIGESTSIZE * 2, "-%lld", (long long)part_etags.size()); etag = final_etag_str; - ldpp_dout(dpp, 10) << "calculated etag: " << etag << dendl; etag_bl.append(etag); @@ -2719,7 +3834,22 @@ int POSIXMultipartUpload::complete(const DoutPrefixProvider *dpp, } // Rename to target_obj - return shadow->rename(dpp, y, target_obj); + ret = shadow->rename(dpp, y, target_obj); + if (ret < 0) { + ldpp_dout(dpp, 0) << "ERROR: failed to rename to final name " << target_obj->get_name() + << ": " << cpp_strerror(ret) << dendl; + return ret; + } + + POSIXObject *to = static_cast<POSIXObject*>(target_obj); + POSIXBucket *sb = static_cast<POSIXBucket*>(target_obj->get_bucket()); + if (sb->versioned()) { + ret = to->set_cur_version(dpp); + if (ret < 0) { + return ret; + } + } + return 0; } int POSIXMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield y, @@ -2744,24 +3874,22 @@ int POSIXMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield } if (rule) { - if (mp_obj.oid.empty()) { + if (mp_obj.upload_info.dest_placement.name.empty()) { if (!meta_obj) { meta_obj = get_meta_obj(); - ret = meta_obj->get_obj_attrs(y, dpp); - if (ret < 0) { - ldpp_dout(dpp, 0) << " ERROR: could not get meta object for mp upload " - << get_key() << dendl; - return ret; - } } - bufferlist bl; - if (!meta_obj->get_attr(RGW_POSIX_ATTR_MPUPLOAD, bl)) { + ret = meta_obj->get_obj_attrs(y, dpp); + if (ret < 0) { + ldpp_dout(dpp, 0) << " ERROR: could not get meta object for mp upload " + << get_key() << dendl; + return ret; + } + ret = decode_attr(meta_obj->get_attrs(), RGW_POSIX_ATTR_MPUPLOAD, mp_obj); + if (ret < 0) { ldpp_dout(dpp, 0) << " ERROR: could not get meta object attrs for mp upload " << get_key() << dendl; return ret; } - auto biter = bl.cbegin(); - decode(mp_obj, biter); } *rule = &mp_obj.upload_info.dest_placement; } @@ -2769,6 +3897,15 @@ int POSIXMultipartUpload::get_info(const DoutPrefixProvider *dpp, optional_yield return 0; } +std::string POSIXMultipartUpload::get_fname() +{ + std::string name; + + name = "." + mp_ns + "_" + url_encode(get_meta(), true); + + return name; +} + std::unique_ptr<Writer> POSIXMultipartUpload::get_writer( const DoutPrefixProvider *dpp, optional_yield y, @@ -2781,20 +3918,26 @@ std::unique_ptr<Writer> POSIXMultipartUpload::get_writer( std::string fname = MP_OBJ_PART_PFX + fmt::format("{:0>5}", part_num); rgw_obj_key part_key(fname); - load(); + load(dpp); - return std::make_unique<POSIXMultipartWriter>(dpp, y, shadow->clone(), part_key, driver, - owner, ptail_placement_rule, part_num); + return std::make_unique<POSIXMultipartWriter>(dpp, y, shadow.get(), part_key, + driver, owner, + ptail_placement_rule, part_num); } int POSIXMultipartWriter::prepare(optional_yield y) { - return obj->open(dpp, true); + int ret = part_file->create(dpp, /*existed=*/nullptr, /*tempfile=*/false); + if (ret < 0) { + return ret; + } + + return part_file->open(dpp); } int POSIXMultipartWriter::process(bufferlist&& data, uint64_t offset) { - return obj->write(offset, data, dpp, null_yield); + return part_file->write(offset, data, dpp, null_yield); } int POSIXMultipartWriter::complete( @@ -2816,12 +3959,17 @@ int POSIXMultipartWriter::complete( if (if_match) { if (strcmp(if_match, "*") == 0) { // test the object is existing - if (!obj->check_exists(dpp)) { + if (!part_file->exists()) { return -ERR_PRECONDITION_FAILED; } } else { + Attrs attrs; bufferlist bl; - if (!obj->get_attr(RGW_ATTR_ETAG, bl)) { + ret = part_file->read_attrs(rctx.dpp, rctx.y, attrs); + if (ret < 0) { + return -ERR_PRECONDITION_FAILED; + } + if (!get_attr(attrs, RGW_ATTR_ETAG, bl)) { return -ERR_PRECONDITION_FAILED; } if (strncmp(if_match, bl.c_str(), bl.length()) != 0) { @@ -2839,15 +3987,13 @@ int POSIXMultipartWriter::complete( encode(info, bl); attrs[RGW_POSIX_ATTR_MPUPLOAD] = bl; - for (auto& attr : attrs) { - ret = obj->write_attr(rctx.dpp, rctx.y, attr.first, attr.second); - if (ret < 0) { - ldpp_dout(rctx.dpp, 20) << "ERROR: failed writing attr " << attr.first << dendl; - return ret; - } + ret = part_file->write_attrs(rctx.dpp, rctx.y, attrs, /*extra_attrs=*/nullptr); + if (ret < 0) { + ldpp_dout(rctx.dpp, 20) << "ERROR: failed writing attrs for " << part_file->get_name() << dendl; + return ret; } - ret = obj->close(); + ret = part_file->close(); if (ret < 0) { ldpp_dout(rctx.dpp, 20) << "ERROR: failed closing file" << dendl; return ret; @@ -2858,15 +4004,24 @@ int POSIXMultipartWriter::complete( int POSIXAtomicWriter::prepare(optional_yield y) { - obj.get_obj_attrs(y, dpp); - obj.close(); - obj.gen_temp_fname(); - return obj.open(dpp, true, true); + int ret; + + if (obj->versioned()) { + ret = obj->make_ent(ObjectType::VERSIONED); + } else { + ret = obj->make_ent(ObjectType::FILE); + } + if (ret < 0) { + return ret; + } + obj->get_obj_attrs(y, dpp); + obj->close(); + return obj->open(dpp, true, true); } int POSIXAtomicWriter::process(bufferlist&& data, uint64_t offset) { - return obj.write(offset, data, dpp, null_yield); + return obj->write(offset, data, dpp, null_yield); } int POSIXAtomicWriter::complete(size_t accounted_size, const std::string& etag, @@ -2885,12 +4040,12 @@ int POSIXAtomicWriter::complete(size_t accounted_size, const std::string& etag, if (if_match) { if (strcmp(if_match, "*") == 0) { // test the object is existing - if (!obj.check_exists(dpp)) { + if (!obj->check_exists(dpp)) { return -ERR_PRECONDITION_FAILED; } } else { bufferlist bl; - if (!obj.get_attr(RGW_ATTR_ETAG, bl)) { + if (!get_attr(obj->get_attrs(), RGW_ATTR_ETAG, bl)) { return -ERR_PRECONDITION_FAILED; } if (strncmp(if_match, bl.c_str(), bl.length()) != 0) { @@ -2901,12 +4056,12 @@ int POSIXAtomicWriter::complete(size_t accounted_size, const std::string& etag, if (if_nomatch) { if (strcmp(if_nomatch, "*") == 0) { // test the object is not existing - if (obj.check_exists(dpp)) { + if (obj->check_exists(dpp)) { return -ERR_PRECONDITION_FAILED; } } else { bufferlist bl; - if (!obj.get_attr(RGW_ATTR_ETAG, bl)) { + if (!get_attr(obj->get_attrs(), RGW_ATTR_ETAG, bl)) { return -ERR_PRECONDITION_FAILED; } if (strncmp(if_nomatch, bl.c_str(), bl.length()) == 0) { @@ -2915,25 +4070,37 @@ int POSIXAtomicWriter::complete(size_t accounted_size, const std::string& etag, } } - bufferlist bl; - encode(owner, bl); - attrs[RGW_POSIX_ATTR_OWNER] = bl; + bufferlist owner_bl; + std::unique_ptr<User> user; + user = driver->get_user(std::get<rgw_user>(owner.id)); + user->load_user(rctx.dpp, rctx.y); + POSIXOwner po{std::get<rgw_user>(owner.id), user->get_display_name()}; + encode(po, owner_bl); + attrs[RGW_POSIX_ATTR_OWNER] = owner_bl; - for (auto attr : attrs) { - ret = obj.write_attr(rctx.dpp, rctx.y, attr.first, attr.second); - if (ret < 0) { - ldpp_dout(rctx.dpp, 20) << "ERROR: POSIXAtomicWriter failed writing attr " << attr.first << dendl; - return ret; - } + bufferlist type_bl; + + obj->set_attrs(attrs); + ret = obj->write_attrs(rctx.dpp, rctx.y); + if (ret < 0) { + ldpp_dout(rctx.dpp, 20) << "ERROR: POSIXAtomicWriter failed writing attrs for " + << obj->get_name() << dendl; + return ret; } - ret = obj.link_temp_file(rctx.dpp, rctx.y, flags); + ret = obj->link_temp_file(rctx.dpp, rctx.y); if (ret < 0) { ldpp_dout(dpp, 20) << "ERROR: POSIXAtomicWriter failed writing temp file" << dendl; return ret; } - ret = obj.close(); + ret = obj->open(dpp); + if (ret < 0) { + ldpp_dout(rctx.dpp, 20) << "ERROR: POSIXAtomicWriter failed opening file" << dendl; + return ret; + } + + ret = obj->stat(dpp); if (ret < 0) { ldpp_dout(rctx.dpp, 20) << "ERROR: POSIXAtomicWriter failed closing file" << dendl; return ret; diff --git a/src/rgw/driver/posix/rgw_sal_posix.h b/src/rgw/driver/posix/rgw_sal_posix.h index 7483139da33..5291ba500f9 100644 --- a/src/rgw/driver/posix/rgw_sal_posix.h +++ b/src/rgw/driver/posix/rgw_sal_posix.h @@ -29,17 +29,337 @@ class POSIXObject; using BucketCache = file::listing::BucketCache<POSIXDriver, POSIXBucket>; +/* integration w/bucket listing cache */ +using fill_cache_cb_t = file::listing::fill_cache_cb_t; + +struct ObjectType { + enum Type { + UNKNOWN = 0, + FILE = 1, + DIRECTORY = 2, + VERSIONED = 3, + MULTIPART = 4, + SYMLINK = 5, + }; + uint32_t type{UNKNOWN}; + + ObjectType &operator=(ObjectType::Type &&_t) { + type = _t; + return *this; + }; + + ObjectType() {} + ObjectType(Type _t) : type(_t){} + + bool operator==(const ObjectType &t) const { return (type == t.type); } + bool operator==(const ObjectType::Type &t) const { return (type == t); } + + void encode(bufferlist &bl) const { + ENCODE_START(1, 1, bl); + encode(type, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &bl) { + DECODE_START(1, bl); + ceph::decode(type, bl); + DECODE_FINISH(bl); + } + friend inline std::ostream &operator<<(std::ostream &out, + const ObjectType &t) { + switch (t.type) { + case UNKNOWN: + out << "UNKNOWN"; + break; + case FILE: + out << "FILE"; + break; + case DIRECTORY: + out << "DIRECTORY"; + break; + case VERSIONED: + out << "VERSIONED"; + break; + case MULTIPART: + out << "MULTIPART"; + break; + case SYMLINK: + out << "SYMLINK"; + break; + } + return out; + } +}; +WRITE_CLASS_ENCODER(ObjectType); + +class Directory; + +class FSEnt { +protected: + std::string fname; + Directory* parent; + int fd{-1}; + bool exist{false}; + struct statx stx; + bool stat_done{false}; + CephContext* ctx; + +public: + FSEnt(std::string _name, Directory* _parent, CephContext* _ctx) : fname(_name), parent(_parent), ctx(_ctx) {} + FSEnt(std::string _name, Directory* _parent, struct statx& _stx, CephContext* _ctx) : fname(_name), parent(_parent), exist(true), stx(_stx), stat_done(true), ctx(_ctx) {} + FSEnt(const FSEnt& _e) : + fname(_e.fname), + parent(_e.parent), + exist(_e.exist), + stx(_e.stx), + stat_done(_e.stat_done), + ctx(_e.ctx) + { } + + virtual ~FSEnt() { } + + int get_fd() { return fd; }; + std::string& get_name() { return fname; } + Directory* get_parent() { return parent; } + bool exists() { return exist; } + struct statx& get_stx() { return stx; } + virtual ObjectType get_type() { return ObjectType::UNKNOWN; }; + + virtual int create(const DoutPrefixProvider *dpp, bool* existed = nullptr, bool temp_file = false) = 0; + virtual int open(const DoutPrefixProvider *dpp) = 0; + virtual int close() = 0; + virtual int stat(const DoutPrefixProvider *dpp, bool force = false); + virtual int remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) = 0; + virtual int write(int64_t ofs, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) = 0; + virtual int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) = 0; + virtual int write_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs, Attrs* extra_attrs); + virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs); + virtual int copy(const DoutPrefixProvider *dpp, optional_yield y, Directory* dst_dir, const std::string& name) = 0; + virtual int link_temp_file(const DoutPrefixProvider* dpp, optional_yield y, std::string target_fname) = 0; + virtual std::unique_ptr<FSEnt> clone_base() = 0; + virtual int fill_cache(const DoutPrefixProvider* dpp, optional_yield y, fill_cache_cb_t& cb); + virtual std::string get_cur_version() { return ""; }; +}; + +class File : public FSEnt { +protected: + +public: + File(std::string _name, Directory* _parent, CephContext* _ctx) : FSEnt(_name, _parent, _ctx) + {} + File(std::string _name, Directory* _parent, struct statx& _stx, CephContext* _ctx) : FSEnt(_name, _parent, _stx, _ctx) + {} + File(const File& _f) : FSEnt(_f) {} + virtual ~File() { close(); } + + virtual uint64_t get_size() { return stx.stx_size; } + virtual ObjectType get_type() override { return ObjectType::FILE; }; + + + virtual int create(const DoutPrefixProvider *dpp, bool* existed = nullptr, bool temp_file = false) override; + virtual int open(const DoutPrefixProvider *dpp) override; + virtual int close() override; + virtual int stat(const DoutPrefixProvider *dpp, bool force = false) override; + virtual int remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) override; + virtual int write(int64_t ofs, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int copy(const DoutPrefixProvider *dpp, optional_yield y, Directory* dst_dir, const std::string& name) override; + virtual int link_temp_file(const DoutPrefixProvider* dpp, optional_yield y, std::string target_fname) override; + virtual std::unique_ptr<FSEnt> clone_base() override { + return std::make_unique<File>(*this); + } + std::unique_ptr<File> clone() { + return std::make_unique<File>(*this); + } +}; + +class Directory : public FSEnt { +protected: + +public: + Directory(std::string _name, Directory* _parent, CephContext* _ctx) : FSEnt(_name, _parent, _ctx) + {} + Directory(std::string _name, Directory* _parent, struct statx& _stx, CephContext* _ctx) : FSEnt(_name, _parent, _stx, _ctx) + {} + Directory(const Directory& _d) : FSEnt(_d) {} + virtual ~Directory() { close(); } + + virtual ObjectType get_type() override { return ObjectType::DIRECTORY; }; + + virtual bool file_exists(std::string& name); + + virtual int create(const DoutPrefixProvider *dpp, bool* existed = nullptr, bool temp_file = false) override; + virtual int open(const DoutPrefixProvider *dpp) override; + virtual int close() override; + virtual int stat(const DoutPrefixProvider *dpp, bool force = false) override; + virtual int remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) override; + template <typename F> + int for_each(const DoutPrefixProvider* dpp, const F& func); + virtual int rename(const DoutPrefixProvider* dpp, optional_yield y, Directory* dst_dir, std::string dst_name); + virtual int write(int64_t ofs, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) override; + virtual std::unique_ptr<FSEnt> clone_base() override { + return std::make_unique<Directory>(*this); + } + virtual std::unique_ptr<Directory> clone_dir() { + return std::make_unique<Directory>(*this); + } + std::unique_ptr<Directory> clone() { + return std::make_unique<Directory>(*this); + } + virtual int copy(const DoutPrefixProvider *dpp, optional_yield y, Directory* dst_dir, const std::string& name) override; + virtual int link_temp_file(const DoutPrefixProvider* dpp, optional_yield y, std::string target_fname) override; + virtual int fill_cache(const DoutPrefixProvider* dpp, optional_yield y, fill_cache_cb_t& cb) override; + + int get_ent(const DoutPrefixProvider *dpp, optional_yield y, const std::string& name, const std::string& version, std::unique_ptr<FSEnt>& ent); +}; + +class Symlink: public File { + std::unique_ptr<FSEnt> target; +public: + Symlink(std::string _name, Directory* _parent, std::string _tgt, CephContext* _ctx) : + File(_name, _parent, _ctx) + { fill_target(nullptr, parent, fname,_tgt, target, _ctx); } + Symlink(std::string _name, Directory* _parent, CephContext* _ctx) : + File(_name, _parent, _ctx) + {} + Symlink(std::string _name, Directory* _parent, struct statx& _stx, std::string _tgt, CephContext* _ctx) : + File(_name, _parent, _stx, _ctx) + { fill_target(nullptr, parent, fname,_tgt, target, _ctx); } + Symlink(std::string _name, Directory* _parent, struct statx& _stx, CephContext* _ctx) : + File(_name, _parent, _stx, _ctx) + {} + Symlink(const Symlink& _s) : File(_s) {} + virtual ~Symlink() { close(); } + + static int fill_target(const DoutPrefixProvider *dpp, Directory* parent, std::string sname, std::string tname, std::unique_ptr<FSEnt>& ent, CephContext* _ctx); + + virtual ObjectType get_type() override { return ObjectType::SYMLINK; }; + virtual int create(const DoutPrefixProvider *dpp, bool* existed = nullptr, bool temp_file = false) override; + virtual int stat(const DoutPrefixProvider *dpp, bool force = false) override; + virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs) override; + FSEnt* get_target() { return target.get(); } + virtual std::unique_ptr<FSEnt> clone_base() override { + return std::make_unique<Symlink>(*this); + } + std::unique_ptr<Symlink> clone() { + return std::make_unique<Symlink>(*this); + } + virtual int copy(const DoutPrefixProvider *dpp, optional_yield y, Directory* dst_dir, const std::string& name) override; + virtual int fill_cache(const DoutPrefixProvider* dpp, optional_yield y, fill_cache_cb_t& cb) override; +}; + +class MPDirectory : public Directory { + std::string tmpname; +protected: + std::map<std::string, int64_t> parts; + std::unique_ptr<FSEnt> cur_read_part; + +public: + MPDirectory(std::string _name, Directory* _parent, CephContext* _ctx) : Directory(_name, _parent, _ctx) + {} + MPDirectory(std::string _name, Directory* _parent, struct statx& _stx, CephContext* _ctx) : Directory(_name, _parent, _stx, _ctx) + {} + MPDirectory(const MPDirectory& _d) : + Directory(_d), + parts(_d.parts) + { if (_d.cur_read_part) cur_read_part = _d.cur_read_part->clone_base(); } + virtual ~MPDirectory() { close(); } + + virtual ObjectType get_type() override { return ObjectType::MULTIPART; }; + virtual int create(const DoutPrefixProvider *dpp, bool* existed = nullptr, bool temp_file = false) override; + virtual int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int link_temp_file(const DoutPrefixProvider* dpp, optional_yield y, std::string target_fname) override; + virtual int remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) override; + virtual int stat(const DoutPrefixProvider *dpp, bool force = false) override; + std::unique_ptr<File> get_part_file(int partnum); + virtual std::unique_ptr<FSEnt> clone_base() override { + return std::make_unique<MPDirectory>(*this); + } + virtual std::unique_ptr<Directory> clone_dir() override { + return std::make_unique<MPDirectory>(*this); + } + std::unique_ptr<MPDirectory> clone() { + return std::make_unique<MPDirectory>(*this); + } + virtual int fill_cache(const DoutPrefixProvider* dpp, optional_yield y, fill_cache_cb_t& cb) override; +}; + +class VersionedDirectory : public Directory { +protected: + std::string instance_id; + std::unique_ptr<FSEnt> cur_version; + +public: + VersionedDirectory(std::string _name, Directory* _parent, CephContext* _ctx) : Directory(_name, _parent, _ctx) + {} + VersionedDirectory(std::string _name, Directory* _parent, std::string _instance_id, CephContext* _ctx) : + Directory(_name, _parent, _ctx), + instance_id(_instance_id) + {} + VersionedDirectory(std::string _name, Directory* _parent, std::unique_ptr<FSEnt>&& _cur, CephContext* _ctx) : + Directory(_name, _parent, _ctx), + cur_version(std::move(_cur)) + {} + VersionedDirectory(std::string _name, Directory* _parent, struct statx& _stx, CephContext* _ctx) : Directory(_name, _parent, _stx, _ctx) + {} + VersionedDirectory(std::string _name, Directory* _parent, std::string _instance_id, struct statx& _stx, CephContext* _ctx) : + Directory(_name, _parent, _stx, _ctx), + instance_id(_instance_id) + {} + VersionedDirectory(const VersionedDirectory& _d) : + Directory(_d), + instance_id(_d.instance_id), + cur_version(_d.cur_version ? _d.cur_version->clone_base() : nullptr) + { } + VersionedDirectory(const Directory& _d) : + Directory(_d) + { } + virtual ~VersionedDirectory() { close(); } + + virtual ObjectType get_type() override { return ObjectType::VERSIONED; }; + virtual int create(const DoutPrefixProvider *dpp, bool* existed = nullptr, bool temp_file = false) override; + virtual int open(const DoutPrefixProvider *dpp) override; + virtual int stat(const DoutPrefixProvider *dpp, bool force = false) override; + virtual int read_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs) override; + virtual int write_attrs(const DoutPrefixProvider* dpp, optional_yield y, Attrs& attrs, Attrs* extra_attrs) override; + virtual int write(int64_t ofs, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y) override; + virtual int link_temp_file(const DoutPrefixProvider* dpp, optional_yield y, std::string target_fname) override; + virtual int remove(const DoutPrefixProvider* dpp, optional_yield y, bool delete_children) override; + virtual std::string get_cur_version() override; + std::string get_new_instance(); + int remove_symlink(const DoutPrefixProvider *dpp, optional_yield y, std::string match = ""); + int add_file(const DoutPrefixProvider *dpp, std::unique_ptr<FSEnt>&& file, bool* existed = nullptr, bool temp_file = false); + FSEnt* get_cur_version_ent() { return cur_version.get(); }; + int set_cur_version_ent(const DoutPrefixProvider *dpp, FSEnt* file); + virtual std::unique_ptr<FSEnt> clone_base() override { + return std::make_unique<VersionedDirectory>(*this); + } + virtual std::unique_ptr<Directory> clone_dir() override { + return std::make_unique<VersionedDirectory>(*this); + } + std::unique_ptr<VersionedDirectory> clone() { + return std::make_unique<VersionedDirectory>(*this); + } + virtual int copy(const DoutPrefixProvider *dpp, optional_yield y, Directory* dst_dir, const std::string& name) override; + virtual int fill_cache(const DoutPrefixProvider* dpp, optional_yield y, fill_cache_cb_t& cb) override; +}; + +std::string get_key_fname(rgw_obj_key& key, bool use_version); + class POSIXDriver : public FilterDriver { -private: +protected: std::unique_ptr<BucketCache> bucket_cache; std::string base_path; + std::unique_ptr<Directory> root_dir; int root_fd; public: POSIXDriver(Driver* _next) : FilterDriver(_next) { } - virtual ~POSIXDriver() { close(); } + virtual ~POSIXDriver() { } virtual int initialize(CephContext *cct, const DoutPrefixProvider *dpp) override; virtual std::unique_ptr<User> get_user(const rgw_user& u) override; virtual int get_user_by_access_key(const DoutPrefixProvider* dpp, const @@ -98,12 +418,11 @@ public: optional_yield y) override; /* Internal APIs */ - int get_root_fd() { return root_fd; } + int get_root_fd() { return root_dir->get_fd(); } + Directory* get_root_dir() { return root_dir.get(); } const std::string& get_base_path() const { return base_path; } BucketCache* get_bucket_cache() { return bucket_cache.get(); } - int close(); - /* called by BucketCache layer when a new object is discovered * by inotify or similar */ int mint_listing_entry( @@ -135,40 +454,45 @@ public: class POSIXBucket : public StoreBucket { private: POSIXDriver* driver; - int parent_fd{-1}; - int dir_fd{-1}; - struct statx stx; - bool stat_done{false}; RGWAccessControlPolicy acls; - std::optional<std::string> ns; + std::optional<std::string> ns{std::nullopt}; + std::unique_ptr<Directory> dir; public: - POSIXBucket(POSIXDriver *_dr, int _p_fd, const rgw_bucket& _b, std::optional<std::string> _ns = std::nullopt) + POSIXBucket(POSIXDriver *_dr, Directory* _p_dir, const rgw_bucket& _b, std::optional<std::string> _ns = std::nullopt) + : StoreBucket(_b), + driver(_dr), + acls(), + ns(_ns), + dir(std::make_unique<Directory>(get_fname(), _p_dir, _dr->ctx())) + { } + + POSIXBucket(POSIXDriver *_dr, std::unique_ptr<Directory> _this_dir, const rgw_bucket& _b, std::optional<std::string> _ns = std::nullopt) : StoreBucket(_b), driver(_dr), - parent_fd(_p_fd), acls(), - ns(_ns) + ns(_ns), + dir(std::move(_this_dir)) { } - POSIXBucket(POSIXDriver *_dr, int _p_fd, const RGWBucketInfo& _i) + POSIXBucket(POSIXDriver *_dr, Directory* _p_dir, const RGWBucketInfo& _i) : StoreBucket(_i), driver(_dr), - parent_fd(_p_fd), - acls() + acls(), + ns(), + dir(std::make_unique<Directory>(get_fname(), _p_dir, _dr->ctx())) { } POSIXBucket(const POSIXBucket& _b) : StoreBucket(_b), driver(_b.driver), - parent_fd(_b.parent_fd), - /* Don't want to copy dir_fd */ - stx(_b.stx), - stat_done(_b.stat_done), acls(_b.acls), - ns(_b.ns) {} + ns(_b.ns) + { + dir.reset(static_cast<Directory*>(_b.dir->clone().get())); + } - virtual ~POSIXBucket() { close(); } + virtual ~POSIXBucket() { } virtual std::unique_ptr<Object> get_object(const rgw_obj_key& key) override; virtual int list(const DoutPrefixProvider* dpp, ListParams&, int, @@ -238,41 +562,26 @@ public: /* Internal APIs */ int create(const DoutPrefixProvider *dpp, optional_yield y, bool* existed); - void set_stat(struct statx _stx) { stx = _stx; stat_done = true; } - int get_dir_fd(const DoutPrefixProvider *dpp) { open(dpp); return dir_fd; } + Directory* get_dir() { return dir.get(); } + int get_dir_fd(const DoutPrefixProvider *dpp) { dir->open(dpp); return dir->get_fd(); } /* TODO dang Escape the bucket name for file use */ std::string get_fname(); - int get_shadow_bucket(const DoutPrefixProvider* dpp, optional_yield y, - const std::string& ns, const std::string& tenant, - const std::string& name, bool create, - std::unique_ptr<POSIXBucket>* shadow); - template <typename F> - int for_each(const DoutPrefixProvider* dpp, const F& func); - int open(const DoutPrefixProvider *dpp); - int close(); + std::optional<std::string> get_ns() { return ns; } int rename(const DoutPrefixProvider* dpp, optional_yield y, Object* target_obj); - int copy(const DoutPrefixProvider *dpp, optional_yield y, POSIXBucket* db, POSIXObject* dobj); - - /* integration w/bucket listing cache */ - using fill_cache_cb_t = file::listing::fill_cache_cb_t; /* enumerate all entries by callback, in any order */ - int fill_cache(const DoutPrefixProvider* dpp, optional_yield y, fill_cache_cb_t cb); + int fill_cache(const DoutPrefixProvider* dpp, optional_yield y, fill_cache_cb_t& cb); private: - int stat(const DoutPrefixProvider *dpp); int write_attrs(const DoutPrefixProvider *dpp, optional_yield y); }; /* POSIXBucket */ class POSIXObject : public StoreObject { +public: private: POSIXDriver* driver; RGWAccessControlPolicy acls; - int obj_fd{-1}; - struct statx stx; - bool stat_done{false}; - std::unique_ptr<rgw::sal::POSIXBucket> shadow; - std::string temp_fname; + std::unique_ptr<FSEnt> ent; std::map<std::string, int64_t> parts; public: @@ -305,16 +614,19 @@ public: POSIXObject(POSIXDriver *_dr, const rgw_obj_key& _k) : StoreObject(_k), driver(_dr), - acls() {} + acls() + {} POSIXObject(POSIXDriver* _driver, const rgw_obj_key& _k, Bucket* _b) : StoreObject(_k, _b), driver(_driver), - acls() {} + acls() + {} POSIXObject(const POSIXObject& _o) : StoreObject(_o), - driver(_o.driver) {} + driver(_o.driver) + {} virtual ~POSIXObject() { close(); } @@ -385,29 +697,31 @@ public: return std::unique_ptr<Object>(new POSIXObject(*this)); } - int open(const DoutPrefixProvider *dpp, bool create, bool temp_file = false); + FSEnt* get_fsent() { return ent.get(); } + int open(const DoutPrefixProvider *dpp, bool create = false, bool temp_file = false); int close(); int write(int64_t ofs, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y); - int write_attr(const DoutPrefixProvider* dpp, optional_yield y, const std::string& key, bufferlist& value); - int link_temp_file(const DoutPrefixProvider* dpp, optional_yield y, uint32_t flags); - void gen_temp_fname(); - /* TODO dang Escape the object name for file use */ - const std::string get_fname(); + int write_attrs(const DoutPrefixProvider* dpp, optional_yield y); + int link_temp_file(const DoutPrefixProvider* dpp, optional_yield y); + std::string gen_temp_fname(); + const std::string get_fname(bool use_version); bool check_exists(const DoutPrefixProvider* dpp) { stat(dpp); return state.exists; } int get_owner(const DoutPrefixProvider *dpp, optional_yield y, std::unique_ptr<User> *owner); int copy(const DoutPrefixProvider *dpp, optional_yield y, POSIXBucket *sb, POSIXBucket *db, POSIXObject *dobj); - int fill_bde(const DoutPrefixProvider *dpp, optional_yield y, rgw_bucket_dir_entry &bde); + int fill_cache(const DoutPrefixProvider *dpp, optional_yield y, fill_cache_cb_t& cb); + int set_cur_version(const DoutPrefixProvider *dpp); + int stat(const DoutPrefixProvider *dpp); + int make_ent(ObjectType type); + bool versioned() { return bucket->versioned(); } protected: int read(int64_t ofs, int64_t end, bufferlist& bl, const DoutPrefixProvider* dpp, optional_yield y); int generate_attrs(const DoutPrefixProvider* dpp, optional_yield y); - int get_fd() { return obj_fd; }; private: - const std::string get_temp_fname(); - int stat(const DoutPrefixProvider *dpp); int generate_mp_etag(const DoutPrefixProvider* dpp, optional_yield y); int generate_etag(const DoutPrefixProvider* dpp, optional_yield y); + int get_cur_version(const DoutPrefixProvider *dpp, rgw_obj_key &key); }; struct POSIXMPObj { @@ -508,7 +822,7 @@ class POSIXMultipartPart : public StoreMultipartPart { protected: POSIXUploadPartInfo info; POSIXMultipartUpload* upload; - std::unique_ptr<rgw::sal::POSIXObject> shadow; + std::unique_ptr<File> part_file; public: POSIXMultipartPart(POSIXMultipartUpload* _upload) : @@ -516,7 +830,7 @@ public: virtual ~POSIXMultipartPart() = default; virtual uint32_t get_num() { return info.num; } - virtual uint64_t get_size() { return shadow->get_size(); } + virtual uint64_t get_size() { return part_file->get_size(); } virtual const std::string& get_etag() { return info.etag; } virtual ceph::real_time& get_mtime() { return info.mtime; } virtual const std::optional<rgw::cksum::Cksum>& get_cksum() { @@ -579,7 +893,8 @@ public: POSIXBucket* get_shadow() { return shadow.get(); } private: - int load(bool create=false); + std::string get_fname(); + int load(const DoutPrefixProvider *dpp, bool create=false); }; class POSIXAtomicWriter : public StoreWriter { @@ -589,7 +904,7 @@ private: const rgw_placement_rule *ptail_placement_rule; uint64_t olh_epoch; const std::string& unique_tag; - POSIXObject obj; + POSIXObject* obj; public: POSIXAtomicWriter(const DoutPrefixProvider *dpp, @@ -606,7 +921,7 @@ public: ptail_placement_rule(_ptail_placement_rule), olh_epoch(_olh_epoch), unique_tag(_unique_tag), - obj(_driver, _head_obj->get_key(), _head_obj->get_bucket()) {} + obj(static_cast<POSIXObject*>(_head_obj)) {} virtual ~POSIXAtomicWriter() = default; virtual int prepare(optional_yield y) override; @@ -629,13 +944,13 @@ private: const ACLOwner& owner; const rgw_placement_rule *ptail_placement_rule; uint64_t part_num; - std::unique_ptr<Bucket> shadow_bucket; - std::unique_ptr<POSIXObject> obj; + std::unique_ptr<Directory> upload_dir; + std::unique_ptr<File> part_file; public: POSIXMultipartWriter(const DoutPrefixProvider *dpp, optional_yield y, - std::unique_ptr<Bucket> _shadow_bucket, + POSIXBucket* _shadow_bucket, rgw_obj_key& _key, POSIXDriver* _driver, const ACLOwner& _owner, @@ -646,8 +961,9 @@ public: owner(_owner), ptail_placement_rule(_ptail_placement_rule), part_num(_part_num), - shadow_bucket(std::move(_shadow_bucket)), - obj(std::make_unique<POSIXObject>(_driver, _key, shadow_bucket.get())) {} + upload_dir(_shadow_bucket->get_dir()->clone()), + part_file(std::make_unique<File>(get_key_fname(_key, false), upload_dir.get(), _driver->ctx())) + { upload_dir->open(dpp); } virtual ~POSIXMultipartWriter() = default; virtual int prepare(optional_yield y) override; |