diff options
author | Igor Fedotov <ifedotov@suse.com> | 2019-09-05 16:43:40 +0200 |
---|---|---|
committer | Igor Fedotov <ifedotov@suse.com> | 2019-09-11 17:15:28 +0200 |
commit | ca2c4e5d6956a32a7ce83f28a01c825dde1ee117 (patch) | |
tree | 31d4e7d54661305a56063a0f46a6689961cef5fd /src | |
parent | os/bluestore: do not collect onodes when doing fsck. (diff) | |
download | ceph-ca2c4e5d6956a32a7ce83f28a01c825dde1ee117.tar.xz ceph-ca2c4e5d6956a32a7ce83f28a01c825dde1ee117.zip |
os/bluestore: introduce shallow (quick-fix) mode for bluestore fsck/repair.
Signed-off-by: Igor Fedotov <ifedotov@suse.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/os/ObjectStore.h | 3 | ||||
-rw-r--r-- | src/os/bluestore/BlueStore.cc | 532 | ||||
-rw-r--r-- | src/os/bluestore/BlueStore.h | 20 | ||||
-rw-r--r-- | src/os/bluestore/bluestore_tool.cc | 10 |
4 files changed, 304 insertions, 261 deletions
diff --git a/src/os/ObjectStore.h b/src/os/ObjectStore.h index fdb8b2dc428..b60993daab0 100644 --- a/src/os/ObjectStore.h +++ b/src/os/ObjectStore.h @@ -259,6 +259,9 @@ public: virtual int repair(bool deep) { return -EOPNOTSUPP; } + virtual int quick_fix() { + return -EOPNOTSUPP; + } virtual void set_cache_shards(unsigned num) { } diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 1bd583e07c8..9e529a78639 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -6906,7 +6906,8 @@ int BlueStore::_fsck_check_extents( mempool_dynamic_bitset &used_blocks, uint64_t granularity, BlueStoreRepairer* repairer, - store_statfs_t& expected_statfs) + store_statfs_t& expected_statfs, + FSCKDepth depth) { dout(30) << __func__ << " oid " << oid << " extents " << extents << dendl; int errors = 0; @@ -6917,34 +6918,36 @@ int BlueStore::_fsck_check_extents( if (compressed) { expected_statfs.data_compressed_allocated += e.length; } - bool already = false; - apply( - e.offset, e.length, granularity, used_blocks, - [&](uint64_t pos, mempool_dynamic_bitset &bs) { - ceph_assert(pos < bs.size()); - if (bs.test(pos)) { - if (repairer) { - repairer->note_misreference( - pos * min_alloc_size, min_alloc_size, !already); - } - if (!already) { - derr << "fsck error: " << oid << " extent " << e - << " or a subset is already allocated (misreferenced)" << dendl; - ++errors; - already = true; + if (depth != FSCK_SHALLOW) { + bool already = false; + apply( + e.offset, e.length, granularity, used_blocks, + [&](uint64_t pos, mempool_dynamic_bitset &bs) { + ceph_assert(pos < bs.size()); + if (bs.test(pos)) { + if (repairer) { + repairer->note_misreference( + pos * min_alloc_size, min_alloc_size, !already); + } + if (!already) { + derr << "fsck error: " << oid << " extent " << e + << " or a subset is already allocated (misreferenced)" << dendl; + ++errors; + already = true; + } } - } - else - bs.set(pos); - }); - if (repairer) { - repairer->get_space_usage_tracker().set_used( e.offset, e.length, cid, oid); - } + else + bs.set(pos); + }); + if (repairer) { + repairer->get_space_usage_tracker().set_used( e.offset, e.length, cid, oid); + } - if (e.end() > bdev->get_size()) { - derr << "fsck error: " << oid << " extent " << e - << " past end of block device" << dendl; - ++errors; + if (e.end() > bdev->get_size()) { + derr << "fsck error: " << oid << " extent " << e + << " past end of block device" << dendl; + ++errors; + } } } return errors; @@ -7103,12 +7106,14 @@ Detection stage (in processing order): (can be merged with the step above if misreferences were dectected) - Apply StatFS update */ -int BlueStore::_fsck(bool deep, bool repair) +int BlueStore::_fsck(BlueStore::FSCKDepth depth, bool repair) { dout(1) << __func__ << " <<<START>>>" << (repair ? " repair" : " check") - << (deep ? " (deep)" : " (shallow)") << " start" << dendl; + << (depth == FSCK_DEEP ? " (deep)" : + depth == FSCK_SHALLOW ? " (shallow)" : " (regular)") + << " start" << dendl; int64_t errors = 0; int64_t warnings = 0; unsigned repaired = 0; @@ -7149,7 +7154,7 @@ int BlueStore::_fsck(bool deep, bool repair) BlueStoreRepairer repairer; store_statfs_t* expected_statfs = nullptr; // in deep mode we need R/W write access to be able to replay deferred ops - bool read_only = !(repair || deep); + bool read_only = !(repair || depth == FSCK_DEEP); utime_t start = ceph_clock_now(); const auto& no_pps_mode = cct->_conf->bluestore_no_per_pool_stats_tolerance; @@ -7305,6 +7310,9 @@ int BlueStore::_fsck(bool deep, bool repair) dout(30) << __func__ << " key " << pretty_binary_string(it->key()) << dendl; if (is_extent_shard_key(it->key())) { + if (depth == FSCK_SHALLOW) { + continue; + } while (!expecting_shards.empty() && expecting_shards.front() < it->key()) { derr << "fsck error: missing shard key " @@ -7381,7 +7389,8 @@ int BlueStore::_fsck(bool deep, bool repair) << dendl; } - if (!expecting_shards.empty()) { + if (depth != FSCK_SHALLOW && + !expecting_shards.empty()) { for (auto &k : expecting_shards) { derr << "fsck error: missing shard key " << pretty_binary_string(k) << dendl; @@ -7394,7 +7403,7 @@ int BlueStore::_fsck(bool deep, bool repair) store_statfs_t onode_statfs; OnodeRef o; o.reset(Onode::decode(c, oid, it->key(), it->value())); - if (o->onode.nid) { + if (depth != FSCK_SHALLOW && o->onode.nid) { if (o->onode.nid > nid_max) { derr << "fsck error: " << oid << " nid " << o->onode.nid << " > nid_max " << nid_max << dendl; @@ -7416,19 +7425,22 @@ int BlueStore::_fsck(bool deep, bool repair) if (!o->extent_map.shards.empty()) { ++num_sharded_objects; num_object_shards += o->extent_map.shards.size(); + if (depth != FSCK_SHALLOW) { + for (auto& s : o->extent_map.shards) { + dout(20) << __func__ << " shard " << *s.shard_info << dendl; + expecting_shards.push_back(string()); + get_extent_shard_key(o->key, s.shard_info->offset, + &expecting_shards.back()); + if (s.shard_info->offset >= o->onode.size) { + derr << "fsck error: " << oid << " shard 0x" << std::hex + << s.shard_info->offset << " past EOF at 0x" << o->onode.size + << std::dec << dendl; + ++errors; + } + } + } } - for (auto& s : o->extent_map.shards) { - dout(20) << __func__ << " shard " << *s.shard_info << dendl; - expecting_shards.push_back(string()); - get_extent_shard_key(o->key, s.shard_info->offset, - &expecting_shards.back()); - if (s.shard_info->offset >= o->onode.size) { - derr << "fsck error: " << oid << " shard 0x" << std::hex - << s.shard_info->offset << " past EOF at 0x" << o->onode.size - << std::dec << dendl; - ++errors; - } - } + // lextents map<BlobRef,bluestore_blob_t::unused_t> referenced; uint64_t pos = 0; @@ -7443,7 +7455,8 @@ int BlueStore::_fsck(bool deep, bool repair) << std::dec << dendl; ++errors; } - if (o->extent_map.spans_shard(l.logical_offset, l.length)) { + if (depth != FSCK_SHALLOW && + o->extent_map.spans_shard(l.logical_offset, l.length)) { derr << "fsck error: " << oid << " lextent at 0x" << std::hex << l.logical_offset << "~" << l.length << " spans a shard boundary" @@ -7465,7 +7478,8 @@ int BlueStore::_fsck(bool deep, bool repair) l.blob_offset, l.length); ++num_extents; - if (blob.has_unused()) { + if (depth != FSCK_SHALLOW && + blob.has_unused()) { auto p = referenced.find(l.blob); bluestore_blob_t::unused_t *pu; if (p == referenced.end()) { @@ -7485,50 +7499,54 @@ int BlueStore::_fsck(bool deep, bool repair) } } } - for (auto &i : referenced) { - dout(20) << __func__ << " referenced 0x" << std::hex << i.second - << std::dec << " for " << *i.first << dendl; - const bluestore_blob_t& blob = i.first->get_blob(); - if (i.second & blob.unused) { - derr << "fsck error: " << oid << " blob claims unused 0x" - << std::hex << blob.unused - << " but extents reference 0x" << i.second << std::dec - << " on blob " << *i.first << dendl; - ++errors; - } - if (blob.has_csum()) { - uint64_t blob_len = blob.get_logical_length(); - uint64_t unused_chunk_size = blob_len / (sizeof(blob.unused)*8); - unsigned csum_count = blob.get_csum_count(); - unsigned csum_chunk_size = blob.get_csum_chunk_size(); - for (unsigned p = 0; p < csum_count; ++p) { - unsigned pos = p * csum_chunk_size; - unsigned firstbit = pos / unused_chunk_size; // [firstbit,lastbit] - unsigned lastbit = (pos + csum_chunk_size - 1) / unused_chunk_size; - unsigned mask = 1u << firstbit; - for (unsigned b = firstbit + 1; b <= lastbit; ++b) { - mask |= 1u << b; - } - if ((blob.unused & mask) == mask) { - // this csum chunk region is marked unused - if (blob.get_csum_item(p) != 0) { - derr << "fsck error: " << oid - << " blob claims csum chunk 0x" << std::hex << pos - << "~" << csum_chunk_size - << " is unused (mask 0x" << mask << " of unused 0x" - << blob.unused << ") but csum is non-zero 0x" - << blob.get_csum_item(p) << std::dec << " on blob " - << *i.first << dendl; - ++errors; + if (depth != FSCK_SHALLOW) { + for (auto &i : referenced) { + dout(20) << __func__ << " referenced 0x" << std::hex << i.second + << std::dec << " for " << *i.first << dendl; + const bluestore_blob_t& blob = i.first->get_blob(); + if (i.second & blob.unused) { + derr << "fsck error: " << oid << " blob claims unused 0x" + << std::hex << blob.unused + << " but extents reference 0x" << i.second << std::dec + << " on blob " << *i.first << dendl; + ++errors; + } + if (blob.has_csum()) { + uint64_t blob_len = blob.get_logical_length(); + uint64_t unused_chunk_size = blob_len / (sizeof(blob.unused)*8); + unsigned csum_count = blob.get_csum_count(); + unsigned csum_chunk_size = blob.get_csum_chunk_size(); + for (unsigned p = 0; p < csum_count; ++p) { + unsigned pos = p * csum_chunk_size; + unsigned firstbit = pos / unused_chunk_size; // [firstbit,lastbit] + unsigned lastbit = (pos + csum_chunk_size - 1) / unused_chunk_size; + unsigned mask = 1u << firstbit; + for (unsigned b = firstbit + 1; b <= lastbit; ++b) { + mask |= 1u << b; + } + if ((blob.unused & mask) == mask) { + // this csum chunk region is marked unused + if (blob.get_csum_item(p) != 0) { + derr << "fsck error: " << oid + << " blob claims csum chunk 0x" << std::hex << pos + << "~" << csum_chunk_size + << " is unused (mask 0x" << mask << " of unused 0x" + << blob.unused << ") but csum is non-zero 0x" + << blob.get_csum_item(p) << std::dec << " on blob " + << *i.first << dendl; + ++errors; + } } } } - } + } } for (auto &i : ref_map) { ++num_blobs; const bluestore_blob_t& blob = i.first->get_blob(); - bool equal = i.first->get_blob_use_tracker().equal(i.second); + bool equal = + depth == FSCK_SHALLOW ? true : + i.first->get_blob_use_tracker().equal(i.second); if (!equal) { derr << "fsck error: " << oid << " blob " << *i.first << " doesn't match expected ref_map " << i.second << dendl; @@ -7571,10 +7589,11 @@ int BlueStore::_fsck(bool deep, bool repair) used_blocks, fm->get_alloc_size(), repair ? &repairer : nullptr, - onode_statfs); + onode_statfs, + depth); } } - if (deep) { + if (depth == FSCK_DEEP) { bufferlist bl; uint64_t max_read_block = cct->_conf->bluestore_fsck_read_bytes_cap; uint64_t offset = 0; @@ -7595,7 +7614,7 @@ int BlueStore::_fsck(bool deep, bool repair) } while (offset < o->onode.size); } // omap - if (o->onode.has_omap()) { + if (depth != FSCK_SHALLOW && o->onode.has_omap()) { auto& m = o->onode.is_pgmeta_omap() ? used_pgmeta_omap_head : (o->onode.is_perpool_omap() ? used_per_pool_omap_head : used_omap_head); @@ -7681,6 +7700,7 @@ int BlueStore::_fsck(bool deep, bool repair) dout(1) << __func__ << " checking shared_blobs" << dendl; it = db->get_iterator(PREFIX_SHARED_BLOB); if (it) { + //FIXME minor: perhaps simplify for shallow mode? //fill global if not overriden below expected_statfs = &expected_store_statfs; @@ -7749,7 +7769,8 @@ int BlueStore::_fsck(bool deep, bool repair) used_blocks, fm->get_alloc_size(), repair ? &repairer : nullptr, - *expected_statfs); + *expected_statfs, + depth); sbi.passed = true; } } @@ -7951,27 +7972,29 @@ int BlueStore::_fsck(bool deep, bool repair) } // if (it) { } //if (repair && repairer.preprocess_misreference()) { - for (auto &p : sb_info) { - sb_info_t& sbi = p.second; - if (!sbi.passed) { - derr << "fsck error: missing " << *sbi.sb << dendl; - ++errors; - } - if (repair && (!sbi.passed || sbi.updated)) { - auto sbid = p.first; - if (sbi.ref_map.empty()) { - ceph_assert(sbi.passed); - dout(20) << __func__ << " " << *sbi.sb - << " is empty, removing" << dendl; - repairer.fix_shared_blob(db, sbid, nullptr); - } else { - bufferlist bl; - bluestore_shared_blob_t persistent(sbid, std::move(sbi.ref_map)); - encode(persistent, bl); - dout(20) << __func__ << " " << *sbi.sb - << " is " << bl.length() << " bytes, updating" << dendl; + if (depth != FSCK_SHALLOW) { + for (auto &p : sb_info) { + sb_info_t& sbi = p.second; + if (!sbi.passed) { + derr << "fsck error: missing " << *sbi.sb << dendl; + ++errors; + } + if (repair && (!sbi.passed || sbi.updated)) { + auto sbid = p.first; + if (sbi.ref_map.empty()) { + ceph_assert(sbi.passed); + dout(20) << __func__ << " " << *sbi.sb + << " is empty, removing" << dendl; + repairer.fix_shared_blob(db, sbid, nullptr); + } else { + bufferlist bl; + bluestore_shared_blob_t persistent(sbid, std::move(sbi.ref_map)); + encode(persistent, bl); + dout(20) << __func__ << " " << *sbi.sb + << " is " << bl.length() << " bytes, updating" << dendl; - repairer.fix_shared_blob(db, sbid, &bl); + repairer.fix_shared_blob(db, sbid, &bl); + } } } } @@ -7995,173 +8018,174 @@ int BlueStore::_fsck(bool deep, bool repair) errors, repair ? &repairer : nullptr); } - dout(1) << __func__ << " checking for stray omap data" << dendl; - it = db->get_iterator(PREFIX_OMAP); - if (it) { - uint64_t last_omap_head = 0; - for (it->lower_bound(string()); it->valid(); it->next()) { - uint64_t omap_head; - _key_decode_u64(it->key().c_str(), &omap_head); - if (used_omap_head.count(omap_head) == 0 && - omap_head != last_omap_head) { - derr << "fsck error: found stray omap data on omap_head " - << omap_head << dendl; - ++errors; - last_omap_head = omap_head; + if (depth != FSCK_SHALLOW) { + dout(1) << __func__ << " checking for stray omap data" << dendl; + it = db->get_iterator(PREFIX_OMAP); + if (it) { + uint64_t last_omap_head = 0; + for (it->lower_bound(string()); it->valid(); it->next()) { + uint64_t omap_head; + _key_decode_u64(it->key().c_str(), &omap_head); + if (used_omap_head.count(omap_head) == 0 && + omap_head != last_omap_head) { + derr << "fsck error: found stray omap data on omap_head " + << omap_head << dendl; + ++errors; + last_omap_head = omap_head; + } } } - } - it = db->get_iterator(PREFIX_PGMETA_OMAP); - if (it) { - uint64_t last_omap_head = 0; - for (it->lower_bound(string()); it->valid(); it->next()) { - uint64_t omap_head; - _key_decode_u64(it->key().c_str(), &omap_head); - if (used_pgmeta_omap_head.count(omap_head) == 0 && - omap_head != last_omap_head) { - derr << "fsck error: found stray (pgmeta) omap data on omap_head " - << omap_head << dendl; - last_omap_head = omap_head; - ++errors; + it = db->get_iterator(PREFIX_PGMETA_OMAP); + if (it) { + uint64_t last_omap_head = 0; + for (it->lower_bound(string()); it->valid(); it->next()) { + uint64_t omap_head; + _key_decode_u64(it->key().c_str(), &omap_head); + if (used_pgmeta_omap_head.count(omap_head) == 0 && + omap_head != last_omap_head) { + derr << "fsck error: found stray (pgmeta) omap data on omap_head " + << omap_head << dendl; + last_omap_head = omap_head; + ++errors; + } } } - } - it = db->get_iterator(PREFIX_PERPOOL_OMAP); - if (it) { - uint64_t last_omap_head = 0; - for (it->lower_bound(string()); it->valid(); it->next()) { - uint64_t pool; - uint64_t omap_head; - string k = it->key(); - const char *c = k.c_str(); - c = _key_decode_u64(c, &pool); - c = _key_decode_u64(c, &omap_head); - if (used_per_pool_omap_head.count(omap_head) == 0 && - omap_head != last_omap_head) { - derr << "fsck error: found stray (per-pool) omap data on omap_head " - << omap_head << dendl; - ++errors; - last_omap_head = omap_head; + it = db->get_iterator(PREFIX_PERPOOL_OMAP); + if (it) { + uint64_t last_omap_head = 0; + for (it->lower_bound(string()); it->valid(); it->next()) { + uint64_t pool; + uint64_t omap_head; + string k = it->key(); + const char *c = k.c_str(); + c = _key_decode_u64(c, &pool); + c = _key_decode_u64(c, &omap_head); + if (used_per_pool_omap_head.count(omap_head) == 0 && + omap_head != last_omap_head) { + derr << "fsck error: found stray (per-pool) omap data on omap_head " + << omap_head << dendl; + ++errors; + last_omap_head = omap_head; + } } } - } - - dout(1) << __func__ << " checking deferred events" << dendl; - it = db->get_iterator(PREFIX_DEFERRED); - if (it) { - for (it->lower_bound(string()); it->valid(); it->next()) { - bufferlist bl = it->value(); - auto p = bl.cbegin(); - bluestore_deferred_transaction_t wt; - try { - decode(wt, p); - } catch (buffer::error& e) { - derr << "fsck error: failed to decode deferred txn " - << pretty_binary_string(it->key()) << dendl; - if (repair) { - dout(20) << __func__ << " undecodable deferred TXN record, key: '" - << pretty_binary_string(it->key()) - << "', removing" << dendl; - repairer.remove_key(db, PREFIX_DEFERRED, it->key()); - } - continue; + dout(1) << __func__ << " checking deferred events" << dendl; + it = db->get_iterator(PREFIX_DEFERRED); + if (it) { + for (it->lower_bound(string()); it->valid(); it->next()) { + bufferlist bl = it->value(); + auto p = bl.cbegin(); + bluestore_deferred_transaction_t wt; + try { + decode(wt, p); + } catch (buffer::error& e) { + derr << "fsck error: failed to decode deferred txn " + << pretty_binary_string(it->key()) << dendl; + if (repair) { + dout(20) << __func__ << " undecodable deferred TXN record, key: '" + << pretty_binary_string(it->key()) + << "', removing" << dendl; + repairer.remove_key(db, PREFIX_DEFERRED, it->key()); + } + continue; + } + dout(20) << __func__ << " deferred " << wt.seq + << " ops " << wt.ops.size() + << " released 0x" << std::hex << wt.released << std::dec << dendl; + for (auto e = wt.released.begin(); e != wt.released.end(); ++e) { + apply( + e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks, + [&](uint64_t pos, mempool_dynamic_bitset &bs) { + ceph_assert(pos < bs.size()); + bs.set(pos); + } + ); + } } - dout(20) << __func__ << " deferred " << wt.seq - << " ops " << wt.ops.size() - << " released 0x" << std::hex << wt.released << std::dec << dendl; - for (auto e = wt.released.begin(); e != wt.released.end(); ++e) { + } + + dout(1) << __func__ << " checking freelist vs allocated" << dendl; + { + // remove bluefs_extents from used set since the freelist doesn't + // know they are allocated. + for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) { apply( e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks, [&](uint64_t pos, mempool_dynamic_bitset &bs) { - ceph_assert(pos < bs.size()); - bs.set(pos); + ceph_assert(pos < bs.size()); + bs.reset(pos); } ); } - } - } - - dout(1) << __func__ << " checking freelist vs allocated" << dendl; - { - // remove bluefs_extents from used set since the freelist doesn't - // know they are allocated. - for (auto e = bluefs_extents.begin(); e != bluefs_extents.end(); ++e) { - apply( - e.get_start(), e.get_len(), fm->get_alloc_size(), used_blocks, - [&](uint64_t pos, mempool_dynamic_bitset &bs) { - ceph_assert(pos < bs.size()); - bs.reset(pos); - } - ); - } - fm->enumerate_reset(); - uint64_t offset, length; - while (fm->enumerate_next(db, &offset, &length)) { - bool intersects = false; - apply( - offset, length, fm->get_alloc_size(), used_blocks, - [&](uint64_t pos, mempool_dynamic_bitset &bs) { - ceph_assert(pos < bs.size()); - if (bs.test(pos)) { - if (offset == SUPER_RESERVED && - length == min_alloc_size - SUPER_RESERVED) { - // this is due to the change just after luminous to min_alloc_size - // granularity allocations, and our baked in assumption at the top - // of _fsck that 0~round_up_to(SUPER_RESERVED,min_alloc_size) is used - // (vs luminous's round_up_to(SUPER_RESERVED,block_size)). harmless, - // since we will never allocate this region below min_alloc_size. - dout(10) << __func__ << " ignoring free extent between SUPER_RESERVED" - << " and min_alloc_size, 0x" << std::hex << offset << "~" - << length << std::dec << dendl; - } else { - intersects = true; - if (repair) { - repairer.fix_false_free(db, fm, - pos * min_alloc_size, - min_alloc_size); + fm->enumerate_reset(); + uint64_t offset, length; + while (fm->enumerate_next(db, &offset, &length)) { + bool intersects = false; + apply( + offset, length, fm->get_alloc_size(), used_blocks, + [&](uint64_t pos, mempool_dynamic_bitset &bs) { + ceph_assert(pos < bs.size()); + if (bs.test(pos)) { + if (offset == SUPER_RESERVED && + length == min_alloc_size - SUPER_RESERVED) { + // this is due to the change just after luminous to min_alloc_size + // granularity allocations, and our baked in assumption at the top + // of _fsck that 0~round_up_to(SUPER_RESERVED,min_alloc_size) is used + // (vs luminous's round_up_to(SUPER_RESERVED,block_size)). harmless, + // since we will never allocate this region below min_alloc_size. + dout(10) << __func__ << " ignoring free extent between SUPER_RESERVED" + << " and min_alloc_size, 0x" << std::hex << offset << "~" + << length << std::dec << dendl; + } else { + intersects = true; + if (repair) { + repairer.fix_false_free(db, fm, + pos * min_alloc_size, + min_alloc_size); + } } - } - } else { - bs.set(pos); + } else { + bs.set(pos); + } } + ); + if (intersects) { + derr << "fsck error: free extent 0x" << std::hex << offset + << "~" << length << std::dec + << " intersects allocated blocks" << dendl; + ++errors; } - ); - if (intersects) { - derr << "fsck error: free extent 0x" << std::hex << offset - << "~" << length << std::dec - << " intersects allocated blocks" << dendl; - ++errors; } - } - fm->enumerate_reset(); - size_t count = used_blocks.count(); - if (used_blocks.size() != count) { - ceph_assert(used_blocks.size() > count); - used_blocks.flip(); - size_t start = used_blocks.find_first(); - while (start != decltype(used_blocks)::npos) { - size_t cur = start; - while (true) { - size_t next = used_blocks.find_next(cur); - if (next != cur + 1) { - ++errors; - derr << "fsck error: leaked extent 0x" << std::hex - << ((uint64_t)start * fm->get_alloc_size()) << "~" - << ((cur + 1 - start) * fm->get_alloc_size()) << std::dec - << dendl; - if (repair) { - repairer.fix_leaked(db, - fm, - start * min_alloc_size, - (cur + 1 - start) * min_alloc_size); + fm->enumerate_reset(); + size_t count = used_blocks.count(); + if (used_blocks.size() != count) { + ceph_assert(used_blocks.size() > count); + used_blocks.flip(); + size_t start = used_blocks.find_first(); + while (start != decltype(used_blocks)::npos) { + size_t cur = start; + while (true) { + size_t next = used_blocks.find_next(cur); + if (next != cur + 1) { + ++errors; + derr << "fsck error: leaked extent 0x" << std::hex + << ((uint64_t)start * fm->get_alloc_size()) << "~" + << ((cur + 1 - start) * fm->get_alloc_size()) << std::dec + << dendl; + if (repair) { + repairer.fix_leaked(db, + fm, + start * min_alloc_size, + (cur + 1 - start) * min_alloc_size); + } + start = next; + break; } - start = next; - break; + cur = next; } - cur = next; - } + } + used_blocks.flip(); } - used_blocks.flip(); } } if (repair) { diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 76540a1cdb0..3f4992cff67 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2218,6 +2218,12 @@ public: mempool::bluestore_fsck::pool_allocator<uint64_t>>; private: + enum FSCKDepth { + FSCK_REGULAR, + FSCK_DEEP, + FSCK_SHALLOW + }; + int _fsck_check_extents( const coll_t& cid, const ghobject_t& oid, @@ -2226,7 +2232,8 @@ private: mempool_dynamic_bitset &used_blocks, uint64_t granularity, BlueStoreRepairer* repairer, - store_statfs_t& expected_statfs); + store_statfs_t& expected_statfs, + FSCKDepth depth); using per_pool_statfs = mempool::bluestore_fsck::map<uint64_t, store_statfs_t>; @@ -2236,6 +2243,9 @@ private: int64_t& errors, BlueStoreRepairer* repairer); + int _fsck(FSCKDepth depth, bool repair); + + void _buffer_cache_write( TransContext *txc, BlobRef b, @@ -2347,12 +2357,14 @@ public: int cold_close(); int fsck(bool deep) override { - return _fsck(deep, false); + return _fsck(deep ? FSCK_DEEP : FSCK_REGULAR, false); } int repair(bool deep) override { - return _fsck(deep, true); + return _fsck(deep ? FSCK_DEEP : FSCK_REGULAR, true); + } + int quick_fix() override { + return _fsck(FSCK_SHALLOW, true); } - int _fsck(bool deep, bool repair); void set_cache_shards(unsigned num) override; void dump_cache_stats(Formatter *f) override { diff --git a/src/os/bluestore/bluestore_tool.cc b/src/os/bluestore/bluestore_tool.cc index 4b7c707319f..63db8261f54 100644 --- a/src/os/bluestore/bluestore_tool.cc +++ b/src/os/bluestore/bluestore_tool.cc @@ -248,6 +248,7 @@ int main(int argc, char **argv) ("command", po::value<string>(&action), "fsck, " "repair, " + "quick-fix, " "bluefs-export, " "bluefs-bdev-sizes, " "bluefs-bdev-expand, " @@ -293,7 +294,7 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - if (action == "fsck" || action == "repair") { + if (action == "fsck" || action == "repair" || action == "quick-fix") { if (path.empty()) { cerr << "must specify bluestore path" << std::endl; exit(EXIT_FAILURE); @@ -417,14 +418,17 @@ int main(int argc, char **argv) common_init_finish(cct.get()); if (action == "fsck" || - action == "repair") { + action == "repair" || + action == "quick-fix") { validate_path(cct.get(), path, false); BlueStore bluestore(cct.get(), path); int r; if (action == "fsck") { r = bluestore.fsck(fsck_deep); - } else { + } else if (action == "repair") { r = bluestore.repair(fsck_deep); + } else { + r = bluestore.quick_fix(); } if (r < 0) { cerr << "error from fsck: " << cpp_strerror(r) << std::endl; |