diff options
-rw-r--r-- | src/os/bluestore/BlueFS.cc | 60 | ||||
-rw-r--r-- | src/test/objectstore/test_bluefs.cc | 81 |
2 files changed, 110 insertions, 31 deletions
diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 2f88acdc93b..50f293d45fd 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -3794,7 +3794,7 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ if (offset > fnode.size) { ceph_abort_msg("truncate up not supported"); } - ceph_assert(offset <= fnode.size); + _flush_bdev(h); { std::lock_guard ll(log.lock); @@ -3803,44 +3803,42 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/ vselector->sub_usage(h->file->vselector_hint, fnode); uint64_t x_off = 0; auto p = fnode.seek(offset, &x_off); - uint64_t cut_off = - (p == fnode.extents.end()) ? 0 : p2roundup(x_off, alloc_size[p->bdev]); - uint64_t new_allocated; - if (0 == cut_off) { - // whole pextent to remove - changed_extents = true; - new_allocated = offset; - } else if (cut_off < p->length) { - dirty.pending_release[p->bdev].insert(p->offset + cut_off, p->length - cut_off); - new_allocated = (offset - x_off) + cut_off; - p->length = cut_off; - changed_extents = true; - ++p; - } else { - ceph_assert(cut_off >= p->length); - new_allocated = (offset - x_off) + p->length; - // just leave it here - ++p; - } - while (p != fnode.extents.end()) { - dirty.pending_release[p->bdev].insert(p->offset, p->length); - p = fnode.extents.erase(p); - changed_extents = true; + if (p != fnode.extents.end()) { + uint64_t cut_off = p2roundup(x_off, alloc_size[p->bdev]); + if (0 == cut_off) { + // whole pextent to remove + fnode.allocated = offset; + changed_extents = true; + } else if (cut_off < p->length) { + dirty.pending_release[p->bdev].insert(p->offset + cut_off, + p->length - cut_off); + fnode.allocated = (offset - x_off) + cut_off; + p->length = cut_off; + changed_extents = true; + ++p; + } else { + // cut_off > p->length means that we misaligned the extent + ceph_assert(cut_off == p->length); + fnode.allocated = (offset - x_off) + p->length; + ++p; // leave extent untouched + } + while (p != fnode.extents.end()) { + dirty.pending_release[p->bdev].insert(p->offset, p->length); + p = fnode.extents.erase(p); + changed_extents = true; + } } if (changed_extents) { fnode.size = offset; - fnode.allocated = new_allocated; fnode.reset_delta(); fnode.recalc_allocated(); log.t.op_file_update(fnode); // sad, but is_dirty must be set to signal flushing of the log h->file->is_dirty = true; - } else { - if (offset != fnode.size) { - fnode.size = offset; - //skipping log.t.op_file_update_inc, it will be done by flush() - h->file->is_dirty = true; - } + } else if (offset != fnode.size) { + fnode.size = offset; + // skipping log.t.op_file_update_inc, it will be done by flush() + h->file->is_dirty = true; } vselector->add_usage(h->file->vselector_hint, fnode); } diff --git a/src/test/objectstore/test_bluefs.cc b/src/test/objectstore/test_bluefs.cc index 60147b5397c..32173d61afe 100644 --- a/src/test/objectstore/test_bluefs.cc +++ b/src/test/objectstore/test_bluefs.cc @@ -1426,6 +1426,87 @@ TEST(BlueFS, test_concurrent_dir_link_and_compact_log_56210) { } } +TEST(BlueFS, truncate_drops_allocations) { + constexpr uint64_t K = 1024; + constexpr uint64_t M = 1024 * K; + uuid_d fsid; + const char* DIR_NAME="dir"; + const char* FILE_NAME="file1"; + struct { + uint64_t preallocated_size; + uint64_t write_size; + uint64_t truncate_to; + uint64_t allocated_after_truncate; + uint64_t slow_size = 0; + uint64_t slow_alloc_size = 64*K; + uint64_t db_size = 128*M; + uint64_t db_alloc_size = 1*M; + } scenarios [] = { + // on DB(which is SLOW) : 1 => 1, 64K remains + { 1*M, 1, 1, 64*K }, + // on DB(which is SLOW), alloc 4K : 1 => 1, 4K remains + { 1*M, 1, 1, 4*K, 0, 4*K }, + // on DB(which is SLOW), truncation on AU boundary : 128K => 128K, 128K remains + { 1*M, 128*K, 128*K, 128*K }, + // on DB(which is SLOW), no prealloc, truncation to 0 : 1666K => 0, 0 remains + { 0, 1666*K, 0, 0 }, + // on DB, truncate to 123K, expect 1M occupied + { 1234*K, 123*K, 123*K, 1*M, 128*M, 64*K, 10*M, 1*M }, + // on DB, truncate to 0, expect 0 occupied + { 1234*K, 345*K, 0, 0, 128*M, 64*K, 10*M, 1*M }, + // on DB, truncate to AU boundary, expect exactly 1M occupied + { 1234*K, 1123*K, 1*M, 1*M, 128*M, 64*K, 10*M, 1*M }, + // on DB and SLOW, truncate only data on SLOW + { 0, 10*M+1, 10*M+1, 10*M+64*K, 128*M, 64*K, 10*M, 1*M }, + // on DB and SLOW, preallocate and truncate only data on SLOW + { 6*M, 12*M, 10*M+1, 10*M+64*K, 128*M, 64*K, 10*M, 1*M }, + // on DB and SLOW, preallocate and truncate all in SLOW and some on DB + // note! prealloc 6M is important, one allocation for 12M will fallback to SLOW + // in 6M + 6M we can be sure that 6M is on DB and 6M is on SLOW + { 6*M, 12*M, 3*M+1, 4*M, 128*M, 64*K, 11*M, 1*M }, + }; + for (auto& s : scenarios) { + ConfSaver conf(g_ceph_context->_conf); + conf.SetVal("bluefs_shared_alloc_size", stringify(s.slow_alloc_size).c_str()); + conf.SetVal("bluefs_alloc_size", stringify(s.db_alloc_size).c_str()); + + g_ceph_context->_conf.set_val("bluefs_shared_alloc_size", stringify(s.slow_alloc_size)); + g_ceph_context->_conf.set_val("bluefs_alloc_size", stringify(s.db_alloc_size)); + TempBdev bdev_db{s.db_size}; + TempBdev bdev_slow{s.slow_size}; + + BlueFS fs(g_ceph_context); + if (s.db_size != 0) { + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0)); + } + if (s.slow_size != 0) { + ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0)); + } + + ASSERT_EQ(0, fs.mkfs(fsid, {BlueFS::BDEV_DB, false, false})); + ASSERT_EQ(0, fs.mount()); + ASSERT_EQ(0, fs.maybe_verify_layout({BlueFS::BDEV_DB, false, false})); + BlueFS::FileWriter *h; + ASSERT_EQ(0, fs.mkdir("dir")); + ASSERT_EQ(0, fs.open_for_write(DIR_NAME, FILE_NAME, &h, false)); + uint64_t pre = fs.get_used(); + ASSERT_EQ(0, fs.preallocate(h->file, 0, s.preallocated_size)); + const std::string content(s.write_size, 'x'); + h->append(content.c_str(), content.length()); + fs.fsync(h); + ASSERT_EQ(0, fs.truncate(h, s.truncate_to)); + fs.fsync(h); + uint64_t post = fs.get_used(); + fs.close_writer(h); + EXPECT_EQ(pre, post - s.allocated_after_truncate); + + fs.umount(); + } +} + + + + TEST(BlueFS, test_log_runway) { uint64_t max_log_runway = 65536; ConfSaver conf(g_ceph_context->_conf); |