summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGregory Farnum <greg@gregs42.com>2016-03-10 07:22:42 +0100
committerGregory Farnum <greg@gregs42.com>2016-03-10 07:22:42 +0100
commit9ae848687567770ce2867bbe7e29ac7ecb14c21e (patch)
tree3fa9eda19482b074f83310467b1b50b696fd1596
parentMerge pull request #8014 from ukernel/wip-15038 (diff)
parentdoc: add notes about upgrading cephfs (diff)
downloadceph-9ae848687567770ce2867bbe7e29ac7ecb14c21e.tar.xz
ceph-9ae848687567770ce2867bbe7e29ac7ecb14c21e.zip
Merge pull request #7003 from jcsp/wip-cephfs-tmap-migrate
tools/cephfs: add tmap_upgrade Reviewed-by: David Zafman <dzafman@redhat.com> Reviewed-by: Yan, Zheng <zyan@redhat.com> Reviewed-by: Greg Farnum <gfarnum@redhat.com>
-rw-r--r--PendingReleaseNotes4
-rw-r--r--doc/cephfs/index.rst1
-rw-r--r--doc/cephfs/upgrading.rst34
-rw-r--r--src/test/CMakeLists.txt11
-rw-r--r--src/test/Makefile-client.am5
-rw-r--r--src/test/librados/tmap_migrate.cc70
-rw-r--r--src/tools/cephfs/DataScan.cc82
-rw-r--r--src/tools/cephfs/DataScan.h10
8 files changed, 216 insertions, 1 deletions
diff --git a/PendingReleaseNotes b/PendingReleaseNotes
index 5da5a81b18d..96735081c36 100644
--- a/PendingReleaseNotes
+++ b/PendingReleaseNotes
@@ -24,6 +24,10 @@ v10.0.0
limit max waiting time of monitor election process, which was previously
restricted by 'mon_lease'.
+* CephFS filesystems created using versions older than Firefly (0.80) must
+ use the new "cephfs-data-scan tmap_upgrade" command after upgrading to
+ Jewel. See 'Upgrading' in the CephFS documentation for more information.
+
v9.3.0
======
* Some symbols wrongly exposed by librados in v9.1.0 and v9.2.0 were removed.
diff --git a/doc/cephfs/index.rst b/doc/cephfs/index.rst
index ad30220b067..316db1d452e 100644
--- a/doc/cephfs/index.rst
+++ b/doc/cephfs/index.rst
@@ -92,6 +92,7 @@ authentication keyring.
Troubleshooting <troubleshooting>
Disaster recovery <disaster-recovery>
Client authentication <client-auth>
+ Upgrading old filesystems <upgrading>
.. raw:: html
diff --git a/doc/cephfs/upgrading.rst b/doc/cephfs/upgrading.rst
new file mode 100644
index 00000000000..7ee3f098626
--- /dev/null
+++ b/doc/cephfs/upgrading.rst
@@ -0,0 +1,34 @@
+
+Upgrading pre-Firefly filesystems past Jewel
+============================================
+
+.. tip::
+
+ This advice only applies to users with filesystems
+ created using versions of Ceph older than *Firefly* (0.80).
+ Users creating new filesystems may disregard this advice.
+
+Pre-firefly versions of Ceph used a now-deprecated format
+for storing CephFS directory objects, called TMAPs. Support
+for reading these in RADOS will be removed after the Jewel
+release of Ceph, so for upgrading CephFS users it is important
+to ensure that any old directory objects have been converted.
+
+After installing Jewel on all your MDS and OSD servers, and restarting
+the services, run the following command:
+
+::
+
+ cephfs-data-scan tmap_upgrade <metadata pool name>
+
+This only needs to be run once, and it is not necessary to
+stop any other services while it runs. The command may take some
+time to execute, as it iterates overall objects in your metadata
+pool. It is safe to continue using your filesystem as normal while
+it executes. If the command aborts for any reason, it is safe
+to simply run it again.
+
+If you are upgrading a pre-Firefly CephFS filesystem to a newer Ceph version
+than Jewel, you must first upgrade to Jewel and run the ``tmap_upgrade``
+command before completing your upgrade to the latest version.
+
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index 895437cc880..1cf72ab0c1f 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -1900,6 +1900,17 @@ set_target_properties(test_rados_api_misc PROPERTIES COMPILE_FLAGS
target_link_libraries(test_rados_api_misc
librados global ${UNITTEST_LIBS} radostest)
+add_executable(test_rados_api_tmap_migrate
+ ../tools/cephfs/DataScan.cc
+ ../tools/cephfs/MDSUtility.cc
+ librados/tmap_migrate.cc
+ $<TARGET_OBJECTS:heap_profiler_objs>
+ )
+set_target_properties(test_rados_api_tmap_migrate PROPERTIES COMPILE_FLAGS
+ ${UNITTEST_CXX_FLAGS})
+target_link_libraries(test_rados_api_tmap_migrate
+ librados mds osdc global cls_cephfs_client ${UNITTEST_LIBS} ${ALLOC_LIBS} radostest)
+
add_executable(test_rados_api_lock
librados/lock.cc
)
diff --git a/src/test/Makefile-client.am b/src/test/Makefile-client.am
index 2b0e804e526..c1a28348ebb 100644
--- a/src/test/Makefile-client.am
+++ b/src/test/Makefile-client.am
@@ -285,6 +285,11 @@ ceph_test_rados_api_lock_LDADD = $(LIBRADOS) $(UNITTEST_LDADD) $(RADOS_TEST_LDAD
ceph_test_rados_api_lock_CXXFLAGS = $(UNITTEST_CXXFLAGS)
bin_DEBUGPROGRAMS += ceph_test_rados_api_lock
+ceph_test_rados_api_tmap_migrate_SOURCES = test/librados/tmap_migrate.cc tools/cephfs/DataScan.cc tools/cephfs/MDSUtility.cc
+ceph_test_rados_api_tmap_migrate_LDADD = $(LIBRADOS) $(UNITTEST_LDADD) $(LIBMDS) libcls_cephfs_client.la $(CEPH_GLOBAL) $(RADOS_TEST_LDADD)
+ceph_test_rados_api_tmap_migrate_CXXFLAGS = $(UNITTEST_CXXFLAGS)
+bin_DEBUGPROGRAMS += ceph_test_rados_api_tmap_migrate
+
ceph_test_stress_watch_SOURCES = test/test_stress_watch.cc
ceph_test_stress_watch_LDADD = \
diff --git a/src/test/librados/tmap_migrate.cc b/src/test/librados/tmap_migrate.cc
new file mode 100644
index 00000000000..f79e5849f75
--- /dev/null
+++ b/src/test/librados/tmap_migrate.cc
@@ -0,0 +1,70 @@
+#include "include/rados/librados.h"
+#include "include/rados/librados.hpp"
+#include "test/librados/test.h"
+#include "test/librados/TestCase.h"
+#include "include/encoding.h"
+#include "tools/cephfs/DataScan.h"
+#include "global/global_init.h"
+
+#include <algorithm>
+#include <errno.h>
+#include "gtest/gtest.h"
+
+
+using namespace librados;
+
+typedef RadosTestPP TmapMigratePP;
+
+TEST_F(TmapMigratePP, DataScan) {
+ std::vector<const char *> args;
+ global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT, CODE_ENVIRONMENT_UTILITY, 0);
+ common_init_finish(g_ceph_context);
+
+ // DataScan isn't namespace-aware, so override RadosTestPP's default
+ // behaviour of putting everything into a namespace
+ ioctx.set_namespace("");
+
+ bufferlist header;
+ std::map<std::string, bufferlist> kvs;
+ bufferlist val;
+ val.append("custard");
+ kvs.insert({"rhubarb", val});
+
+ bufferlist tmap_trans;
+ ::encode(header, tmap_trans);
+ ::encode(kvs, tmap_trans);
+
+ // Create a TMAP object
+ ASSERT_EQ(0, ioctx.tmap_put("10000000000.00000000", tmap_trans));
+
+ // Create an OMAP object
+ std::map<std::string, bufferlist> omap_kvs;
+ bufferlist omap_val;
+ omap_val.append("waffles");
+ omap_kvs.insert({"tasty", omap_val});
+ ASSERT_EQ(0, ioctx.omap_set("10000000001.00000000", omap_kvs));
+
+ DataScan ds;
+ ASSERT_EQ(0, ds.init());
+ int r = ds.main({"tmap_upgrade", pool_name.c_str()});
+ ASSERT_EQ(r, 0);
+ ds.shutdown();
+
+ // Check that the TMAP object is now an omap object
+ std::map<std::string, bufferlist> read_vals;
+ ASSERT_EQ(0, ioctx.omap_get_vals("10000000000.00000000", "", 1, &read_vals));
+ ASSERT_EQ(read_vals.size(), 1);
+ bufferlist tmap_expect_val;
+ tmap_expect_val.append("custard");
+ ASSERT_EQ(read_vals.at("rhubarb"), tmap_expect_val);
+
+
+ // Check that the OMAP object is still readable
+ read_vals.clear();
+ ASSERT_EQ(0, ioctx.omap_get_vals("10000000001.00000000", "", 1, &read_vals));
+ ASSERT_EQ(read_vals.size(), 1);
+ bufferlist expect_omap_val;
+ expect_omap_val.append("waffles");
+ ASSERT_EQ(read_vals.at("tasty"), expect_omap_val);
+}
+
diff --git a/src/tools/cephfs/DataScan.cc b/src/tools/cephfs/DataScan.cc
index 0a992a32e8f..626cb0ac9f4 100644
--- a/src/tools/cephfs/DataScan.cc
+++ b/src/tools/cephfs/DataScan.cc
@@ -39,6 +39,8 @@ void DataScan::usage()
<< " --force-pool: use data pool even if it is not in MDSMap\n"
<< "\n"
<< " cephfs-data-scan scan_frags [--force-corrupt]\n"
+ << "\n"
+ << " cephfs-data-scan tmap_upgrade <metadata_pool>\n"
<< std::endl;
generic_client_usage();
@@ -131,6 +133,7 @@ int DataScan::main(const std::vector<const char*> &args)
std::string const &command = args[0];
std::string data_pool_name;
+ std::string metadata_pool_name;
// Consume any known --key val or --flag arguments
for (std::vector<const char *>::const_iterator i = args.begin() + 1;
@@ -147,12 +150,19 @@ int DataScan::main(const std::vector<const char*> &args)
continue;
}
+ // Trailing positional argument
if (i + 1 == args.end() &&
(command == "scan_inodes" || command == "scan_extents")) {
data_pool_name = *i;
continue;
}
+ // Trailing positional argument
+ if (i + 1 == args.end() && (command == "tmap_upgrade")) {
+ metadata_pool_name = *i;
+ continue;
+ }
+
// Fall through: unhandled
std::cerr << "Unknown argument '" << *i << "'" << std::endl;
return -EINVAL;
@@ -207,9 +217,9 @@ int DataScan::main(const std::vector<const char*> &args)
}
}
+ // Initialize metadata_io from MDSMap for scan_frags
if (command == "scan_frags") {
int const metadata_pool_id = mdsmap->get_metadata_pool();
-
dout(4) << "resolving metadata pool " << metadata_pool_id << dendl;
std::string metadata_pool_name;
int r = rados.pool_reverse_lookup(metadata_pool_id, &metadata_pool_name);
@@ -225,6 +235,30 @@ int DataScan::main(const std::vector<const char*> &args)
}
}
+ // Initialize metadata_io from pool on command line for tmap_upgrade
+ if (command == "tmap_upgrade") {
+ if (metadata_pool_name.empty()) {
+ std::cerr << "Metadata pool not specified" << std::endl;
+ usage();
+ return -EINVAL;
+ }
+
+ long metadata_pool_id = rados.pool_lookup(metadata_pool_name.c_str());
+ if (metadata_pool_id < 0) {
+ std::cerr << "Pool '" << metadata_pool_name << "' not found!" << std::endl;
+ return -ENOENT;
+ } else {
+ dout(4) << "pool '" << metadata_pool_name
+ << "' has ID " << metadata_pool_id << dendl;
+ }
+
+ r = rados.ioctx_create(metadata_pool_name.c_str(), metadata_io);
+ if (r != 0) {
+ return r;
+ }
+ std::cerr << "Created ioctx for " << metadata_pool_name << std::endl;
+ }
+
// Finally, dispatch command
if (command == "scan_inodes") {
return scan_inodes();
@@ -232,6 +266,8 @@ int DataScan::main(const std::vector<const char*> &args)
return scan_extents();
} else if (command == "scan_frags") {
return scan_frags();
+ } else if (command == "tmap_upgrade") {
+ return tmap_upgrade();
} else if (command == "init") {
return driver->init_roots(mdsmap->get_first_data_pool());
} else {
@@ -772,7 +808,51 @@ int DataScan::scan_inodes()
});
}
+bool DataScan::valid_ino(inodeno_t ino) const
+{
+ return (ino >= inodeno_t((1ull << 40)))
+ || (MDS_INO_IS_STRAY(ino))
+ || (MDS_INO_IS_MDSDIR(ino))
+ || ino == MDS_INO_ROOT
+ || ino == MDS_INO_CEPH;
+}
+
+int DataScan::tmap_upgrade()
+{
+ librados::NObjectIterator i = metadata_io.nobjects_begin();
+ const librados::NObjectIterator i_end = metadata_io.nobjects_end();
+
+ int overall_r = 0;
+ for (; i != i_end; ++i) {
+ const std::string oid = i->get_oid();
+
+ uint64_t inode_no = 0;
+ uint64_t frag_id = 0;
+ int r = parse_oid(oid, &inode_no, &frag_id);
+ if (r == -EINVAL) {
+ dout(10) << "Not a dirfrag: '" << oid << "'" << dendl;
+ continue;
+ } else {
+ // parse_oid can only do 0 or -EINVAL
+ assert(r == 0);
+ }
+
+ if (!valid_ino(inode_no)) {
+ dout(10) << "Not a difrag (invalid ino): '" << oid << "'" << dendl;
+ continue;
+ }
+
+ r = metadata_io.tmap_to_omap(oid, true);
+ dout(20) << "tmap2omap(" << oid << "): " << r << dendl;
+ if (r < 0) {
+ derr << "Error converting '" << oid << "': " << cpp_strerror(r) << dendl;
+ overall_r = r;
+ }
+ }
+
+ return overall_r;
+}
int DataScan::scan_frags()
{
diff --git a/src/tools/cephfs/DataScan.h b/src/tools/cephfs/DataScan.h
index 67c076098c7..3b94063f8ae 100644
--- a/src/tools/cephfs/DataScan.h
+++ b/src/tools/cephfs/DataScan.h
@@ -249,6 +249,16 @@ class DataScan : public MDSUtility, public MetadataTool
*/
int scan_frags();
+ /**
+ * Check if an inode number is in the permitted ranges
+ */
+ bool valid_ino(inodeno_t ino) const;
+
+ /**
+ * Invoke tmap_to_omap on all metadata pool objects
+ */
+ int tmap_upgrade();
+
// Accept pools which are not in the MDSMap
bool force_pool;
// Respond to decode errors by overwriting