diff options
author | Gregory Farnum <greg@gregs42.com> | 2016-03-10 07:22:42 +0100 |
---|---|---|
committer | Gregory Farnum <greg@gregs42.com> | 2016-03-10 07:22:42 +0100 |
commit | 9ae848687567770ce2867bbe7e29ac7ecb14c21e (patch) | |
tree | 3fa9eda19482b074f83310467b1b50b696fd1596 | |
parent | Merge pull request #8014 from ukernel/wip-15038 (diff) | |
parent | doc: add notes about upgrading cephfs (diff) | |
download | ceph-9ae848687567770ce2867bbe7e29ac7ecb14c21e.tar.xz ceph-9ae848687567770ce2867bbe7e29ac7ecb14c21e.zip |
Merge pull request #7003 from jcsp/wip-cephfs-tmap-migrate
tools/cephfs: add tmap_upgrade
Reviewed-by: David Zafman <dzafman@redhat.com>
Reviewed-by: Yan, Zheng <zyan@redhat.com>
Reviewed-by: Greg Farnum <gfarnum@redhat.com>
-rw-r--r-- | PendingReleaseNotes | 4 | ||||
-rw-r--r-- | doc/cephfs/index.rst | 1 | ||||
-rw-r--r-- | doc/cephfs/upgrading.rst | 34 | ||||
-rw-r--r-- | src/test/CMakeLists.txt | 11 | ||||
-rw-r--r-- | src/test/Makefile-client.am | 5 | ||||
-rw-r--r-- | src/test/librados/tmap_migrate.cc | 70 | ||||
-rw-r--r-- | src/tools/cephfs/DataScan.cc | 82 | ||||
-rw-r--r-- | src/tools/cephfs/DataScan.h | 10 |
8 files changed, 216 insertions, 1 deletions
diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 5da5a81b18d..96735081c36 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -24,6 +24,10 @@ v10.0.0 limit max waiting time of monitor election process, which was previously restricted by 'mon_lease'. +* CephFS filesystems created using versions older than Firefly (0.80) must + use the new "cephfs-data-scan tmap_upgrade" command after upgrading to + Jewel. See 'Upgrading' in the CephFS documentation for more information. + v9.3.0 ====== * Some symbols wrongly exposed by librados in v9.1.0 and v9.2.0 were removed. diff --git a/doc/cephfs/index.rst b/doc/cephfs/index.rst index ad30220b067..316db1d452e 100644 --- a/doc/cephfs/index.rst +++ b/doc/cephfs/index.rst @@ -92,6 +92,7 @@ authentication keyring. Troubleshooting <troubleshooting> Disaster recovery <disaster-recovery> Client authentication <client-auth> + Upgrading old filesystems <upgrading> .. raw:: html diff --git a/doc/cephfs/upgrading.rst b/doc/cephfs/upgrading.rst new file mode 100644 index 00000000000..7ee3f098626 --- /dev/null +++ b/doc/cephfs/upgrading.rst @@ -0,0 +1,34 @@ + +Upgrading pre-Firefly filesystems past Jewel +============================================ + +.. tip:: + + This advice only applies to users with filesystems + created using versions of Ceph older than *Firefly* (0.80). + Users creating new filesystems may disregard this advice. + +Pre-firefly versions of Ceph used a now-deprecated format +for storing CephFS directory objects, called TMAPs. Support +for reading these in RADOS will be removed after the Jewel +release of Ceph, so for upgrading CephFS users it is important +to ensure that any old directory objects have been converted. + +After installing Jewel on all your MDS and OSD servers, and restarting +the services, run the following command: + +:: + + cephfs-data-scan tmap_upgrade <metadata pool name> + +This only needs to be run once, and it is not necessary to +stop any other services while it runs. The command may take some +time to execute, as it iterates overall objects in your metadata +pool. It is safe to continue using your filesystem as normal while +it executes. If the command aborts for any reason, it is safe +to simply run it again. + +If you are upgrading a pre-Firefly CephFS filesystem to a newer Ceph version +than Jewel, you must first upgrade to Jewel and run the ``tmap_upgrade`` +command before completing your upgrade to the latest version. + diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 895437cc880..1cf72ab0c1f 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1900,6 +1900,17 @@ set_target_properties(test_rados_api_misc PROPERTIES COMPILE_FLAGS target_link_libraries(test_rados_api_misc librados global ${UNITTEST_LIBS} radostest) +add_executable(test_rados_api_tmap_migrate + ../tools/cephfs/DataScan.cc + ../tools/cephfs/MDSUtility.cc + librados/tmap_migrate.cc + $<TARGET_OBJECTS:heap_profiler_objs> + ) +set_target_properties(test_rados_api_tmap_migrate PROPERTIES COMPILE_FLAGS + ${UNITTEST_CXX_FLAGS}) +target_link_libraries(test_rados_api_tmap_migrate + librados mds osdc global cls_cephfs_client ${UNITTEST_LIBS} ${ALLOC_LIBS} radostest) + add_executable(test_rados_api_lock librados/lock.cc ) diff --git a/src/test/Makefile-client.am b/src/test/Makefile-client.am index 2b0e804e526..c1a28348ebb 100644 --- a/src/test/Makefile-client.am +++ b/src/test/Makefile-client.am @@ -285,6 +285,11 @@ ceph_test_rados_api_lock_LDADD = $(LIBRADOS) $(UNITTEST_LDADD) $(RADOS_TEST_LDAD ceph_test_rados_api_lock_CXXFLAGS = $(UNITTEST_CXXFLAGS) bin_DEBUGPROGRAMS += ceph_test_rados_api_lock +ceph_test_rados_api_tmap_migrate_SOURCES = test/librados/tmap_migrate.cc tools/cephfs/DataScan.cc tools/cephfs/MDSUtility.cc +ceph_test_rados_api_tmap_migrate_LDADD = $(LIBRADOS) $(UNITTEST_LDADD) $(LIBMDS) libcls_cephfs_client.la $(CEPH_GLOBAL) $(RADOS_TEST_LDADD) +ceph_test_rados_api_tmap_migrate_CXXFLAGS = $(UNITTEST_CXXFLAGS) +bin_DEBUGPROGRAMS += ceph_test_rados_api_tmap_migrate + ceph_test_stress_watch_SOURCES = test/test_stress_watch.cc ceph_test_stress_watch_LDADD = \ diff --git a/src/test/librados/tmap_migrate.cc b/src/test/librados/tmap_migrate.cc new file mode 100644 index 00000000000..f79e5849f75 --- /dev/null +++ b/src/test/librados/tmap_migrate.cc @@ -0,0 +1,70 @@ +#include "include/rados/librados.h" +#include "include/rados/librados.hpp" +#include "test/librados/test.h" +#include "test/librados/TestCase.h" +#include "include/encoding.h" +#include "tools/cephfs/DataScan.h" +#include "global/global_init.h" + +#include <algorithm> +#include <errno.h> +#include "gtest/gtest.h" + + +using namespace librados; + +typedef RadosTestPP TmapMigratePP; + +TEST_F(TmapMigratePP, DataScan) { + std::vector<const char *> args; + global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0); + common_init_finish(g_ceph_context); + + // DataScan isn't namespace-aware, so override RadosTestPP's default + // behaviour of putting everything into a namespace + ioctx.set_namespace(""); + + bufferlist header; + std::map<std::string, bufferlist> kvs; + bufferlist val; + val.append("custard"); + kvs.insert({"rhubarb", val}); + + bufferlist tmap_trans; + ::encode(header, tmap_trans); + ::encode(kvs, tmap_trans); + + // Create a TMAP object + ASSERT_EQ(0, ioctx.tmap_put("10000000000.00000000", tmap_trans)); + + // Create an OMAP object + std::map<std::string, bufferlist> omap_kvs; + bufferlist omap_val; + omap_val.append("waffles"); + omap_kvs.insert({"tasty", omap_val}); + ASSERT_EQ(0, ioctx.omap_set("10000000001.00000000", omap_kvs)); + + DataScan ds; + ASSERT_EQ(0, ds.init()); + int r = ds.main({"tmap_upgrade", pool_name.c_str()}); + ASSERT_EQ(r, 0); + ds.shutdown(); + + // Check that the TMAP object is now an omap object + std::map<std::string, bufferlist> read_vals; + ASSERT_EQ(0, ioctx.omap_get_vals("10000000000.00000000", "", 1, &read_vals)); + ASSERT_EQ(read_vals.size(), 1); + bufferlist tmap_expect_val; + tmap_expect_val.append("custard"); + ASSERT_EQ(read_vals.at("rhubarb"), tmap_expect_val); + + + // Check that the OMAP object is still readable + read_vals.clear(); + ASSERT_EQ(0, ioctx.omap_get_vals("10000000001.00000000", "", 1, &read_vals)); + ASSERT_EQ(read_vals.size(), 1); + bufferlist expect_omap_val; + expect_omap_val.append("waffles"); + ASSERT_EQ(read_vals.at("tasty"), expect_omap_val); +} + diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc index 0a992a32e8f..626cb0ac9f4 100644 --- a/src/tools/cephfs/DataScan.cc +++ b/src/tools/cephfs/DataScan.cc @@ -39,6 +39,8 @@ void DataScan::usage() << " --force-pool: use data pool even if it is not in MDSMap\n" << "\n" << " cephfs-data-scan scan_frags [--force-corrupt]\n" + << "\n" + << " cephfs-data-scan tmap_upgrade <metadata_pool>\n" << std::endl; generic_client_usage(); @@ -131,6 +133,7 @@ int DataScan::main(const std::vector<const char*> &args) std::string const &command = args[0]; std::string data_pool_name; + std::string metadata_pool_name; // Consume any known --key val or --flag arguments for (std::vector<const char *>::const_iterator i = args.begin() + 1; @@ -147,12 +150,19 @@ int DataScan::main(const std::vector<const char*> &args) continue; } + // Trailing positional argument if (i + 1 == args.end() && (command == "scan_inodes" || command == "scan_extents")) { data_pool_name = *i; continue; } + // Trailing positional argument + if (i + 1 == args.end() && (command == "tmap_upgrade")) { + metadata_pool_name = *i; + continue; + } + // Fall through: unhandled std::cerr << "Unknown argument '" << *i << "'" << std::endl; return -EINVAL; @@ -207,9 +217,9 @@ int DataScan::main(const std::vector<const char*> &args) } } + // Initialize metadata_io from MDSMap for scan_frags if (command == "scan_frags") { int const metadata_pool_id = mdsmap->get_metadata_pool(); - dout(4) << "resolving metadata pool " << metadata_pool_id << dendl; std::string metadata_pool_name; int r = rados.pool_reverse_lookup(metadata_pool_id, &metadata_pool_name); @@ -225,6 +235,30 @@ int DataScan::main(const std::vector<const char*> &args) } } + // Initialize metadata_io from pool on command line for tmap_upgrade + if (command == "tmap_upgrade") { + if (metadata_pool_name.empty()) { + std::cerr << "Metadata pool not specified" << std::endl; + usage(); + return -EINVAL; + } + + long metadata_pool_id = rados.pool_lookup(metadata_pool_name.c_str()); + if (metadata_pool_id < 0) { + std::cerr << "Pool '" << metadata_pool_name << "' not found!" << std::endl; + return -ENOENT; + } else { + dout(4) << "pool '" << metadata_pool_name + << "' has ID " << metadata_pool_id << dendl; + } + + r = rados.ioctx_create(metadata_pool_name.c_str(), metadata_io); + if (r != 0) { + return r; + } + std::cerr << "Created ioctx for " << metadata_pool_name << std::endl; + } + // Finally, dispatch command if (command == "scan_inodes") { return scan_inodes(); @@ -232,6 +266,8 @@ int DataScan::main(const std::vector<const char*> &args) return scan_extents(); } else if (command == "scan_frags") { return scan_frags(); + } else if (command == "tmap_upgrade") { + return tmap_upgrade(); } else if (command == "init") { return driver->init_roots(mdsmap->get_first_data_pool()); } else { @@ -772,7 +808,51 @@ int DataScan::scan_inodes() }); } +bool DataScan::valid_ino(inodeno_t ino) const +{ + return (ino >= inodeno_t((1ull << 40))) + || (MDS_INO_IS_STRAY(ino)) + || (MDS_INO_IS_MDSDIR(ino)) + || ino == MDS_INO_ROOT + || ino == MDS_INO_CEPH; +} + +int DataScan::tmap_upgrade() +{ + librados::NObjectIterator i = metadata_io.nobjects_begin(); + const librados::NObjectIterator i_end = metadata_io.nobjects_end(); + + int overall_r = 0; + for (; i != i_end; ++i) { + const std::string oid = i->get_oid(); + + uint64_t inode_no = 0; + uint64_t frag_id = 0; + int r = parse_oid(oid, &inode_no, &frag_id); + if (r == -EINVAL) { + dout(10) << "Not a dirfrag: '" << oid << "'" << dendl; + continue; + } else { + // parse_oid can only do 0 or -EINVAL + assert(r == 0); + } + + if (!valid_ino(inode_no)) { + dout(10) << "Not a difrag (invalid ino): '" << oid << "'" << dendl; + continue; + } + + r = metadata_io.tmap_to_omap(oid, true); + dout(20) << "tmap2omap(" << oid << "): " << r << dendl; + if (r < 0) { + derr << "Error converting '" << oid << "': " << cpp_strerror(r) << dendl; + overall_r = r; + } + } + + return overall_r; +} int DataScan::scan_frags() { diff --git a/src/tools/cephfs/DataScan.h b/src/tools/cephfs/DataScan.h index 67c076098c7..3b94063f8ae 100644 --- a/src/tools/cephfs/DataScan.h +++ b/src/tools/cephfs/DataScan.h @@ -249,6 +249,16 @@ class DataScan : public MDSUtility, public MetadataTool */ int scan_frags(); + /** + * Check if an inode number is in the permitted ranges + */ + bool valid_ino(inodeno_t ino) const; + + /** + * Invoke tmap_to_omap on all metadata pool objects + */ + int tmap_upgrade(); + // Accept pools which are not in the MDSMap bool force_pool; // Respond to decode errors by overwriting |