summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-12-20 01:15:27 +0100
committerSamuel Just <sam.just@inktank.com>2014-02-18 05:12:16 +0100
commit33c38301580e523e59ca2cd3c86a8f3282c75a9b (patch)
tree07f03bc6b7105a30140753fbb16b48f8ebbeb13f
parentObjectStore: add bufferlist/string getattr (diff)
downloadceph-33c38301580e523e59ca2cd3c86a8f3282c75a9b.tar.xz
ceph-33c38301580e523e59ca2cd3c86a8f3282c75a9b.zip
osd/ECUtil: add ec encode/decode util helpers
Signed-off-by: Samuel Just <sam.just@inktank.com>
-rw-r--r--src/osd/ECUtil.cc196
-rw-r--r--src/osd/ECUtil.h154
-rw-r--r--src/osd/Makefile.am2
-rw-r--r--src/test/encoding/types.h3
-rw-r--r--src/test/osd/TestECBackend.cc40
5 files changed, 395 insertions, 0 deletions
diff --git a/src/osd/ECUtil.cc b/src/osd/ECUtil.cc
new file mode 100644
index 00000000000..1f3b45857da
--- /dev/null
+++ b/src/osd/ECUtil.cc
@@ -0,0 +1,196 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+
+#include <errno.h>
+#include "include/encoding.h"
+#include "ECUtil.h"
+
+int ECUtil::decode(
+ const stripe_info_t &sinfo,
+ ErasureCodeInterfaceRef &ec_impl,
+ map<int, bufferlist> &to_decode,
+ bufferlist *out) {
+
+ uint64_t total_chunk_size = to_decode.begin()->second.length();
+
+ assert(to_decode.size());
+ assert(total_chunk_size % sinfo.get_chunk_size() == 0);
+ assert(out);
+ assert(out->length() == 0);
+
+ for (map<int, bufferlist>::iterator i = to_decode.begin();
+ i != to_decode.end();
+ ++i) {
+ assert(i->second.length() == total_chunk_size);
+ }
+
+ if (total_chunk_size == 0)
+ return 0;
+
+ for (uint64_t i = 0; i < total_chunk_size; i += sinfo.get_chunk_size()) {
+ map<int, bufferlist> chunks;
+ for (map<int, bufferlist>::iterator j = to_decode.begin();
+ j != to_decode.end();
+ ++j) {
+ chunks[j->first].substr_of(j->second, i, sinfo.get_chunk_size());
+ }
+ bufferlist bl;
+ int r = ec_impl->decode_concat(chunks, &bl);
+ assert(bl.length() == sinfo.get_stripe_width());
+ assert(r == 0);
+ out->claim_append(bl);
+ }
+ return 0;
+}
+
+int ECUtil::decode(
+ const stripe_info_t &sinfo,
+ ErasureCodeInterfaceRef &ec_impl,
+ map<int, bufferlist> &to_decode,
+ map<int, bufferlist*> &out) {
+
+ uint64_t total_chunk_size = to_decode.begin()->second.length();
+
+ assert(to_decode.size());
+ assert(total_chunk_size % sinfo.get_chunk_size() == 0);
+
+ for (map<int, bufferlist>::iterator i = to_decode.begin();
+ i != to_decode.end();
+ ++i) {
+ assert(i->second.length() == total_chunk_size);
+ }
+
+ if (total_chunk_size == 0)
+ return 0;
+
+ set<int> need;
+ for (map<int, bufferlist*>::iterator i = out.begin();
+ i != out.end();
+ ++i) {
+ assert(i->second);
+ assert(i->second->length() == 0);
+ need.insert(i->first);
+ }
+
+ for (uint64_t i = 0; i < total_chunk_size; i += sinfo.get_chunk_size()) {
+ map<int, bufferlist> chunks;
+ for (map<int, bufferlist>::iterator j = to_decode.begin();
+ j != to_decode.end();
+ ++j) {
+ chunks[j->first].substr_of(j->second, i, sinfo.get_chunk_size());
+ }
+ map<int, bufferlist> out_bls;
+ int r = ec_impl->decode(need, chunks, &out_bls);
+ assert(r == 0);
+ for (map<int, bufferlist*>::iterator j = out.begin();
+ j != out.end();
+ ++j) {
+ assert(out_bls.count(j->first));
+ assert(out_bls[j->first].length() == sinfo.get_chunk_size());
+ j->second->claim_append(out_bls[j->first]);
+ }
+ }
+ for (map<int, bufferlist*>::iterator i = out.begin();
+ i != out.end();
+ ++i) {
+ assert(i->second->length() == total_chunk_size);
+ }
+ return 0;
+}
+
+int ECUtil::encode(
+ const stripe_info_t &sinfo,
+ ErasureCodeInterfaceRef &ec_impl,
+ bufferlist &in,
+ const set<int> &want,
+ map<int, bufferlist> *out) {
+
+ uint64_t logical_size = in.length();
+
+ assert(logical_size % sinfo.get_stripe_width() == 0);
+ assert(out);
+ assert(out->empty());
+
+ if (logical_size == 0)
+ return 0;
+
+ for (uint64_t i = 0; i < logical_size; i += sinfo.get_stripe_width()) {
+ map<int, bufferlist> encoded;
+ bufferlist buf;
+ buf.substr_of(in, i, sinfo.get_stripe_width());
+ int r = ec_impl->encode(want, buf, &encoded);
+ assert(r == 0);
+ for (map<int, bufferlist>::iterator i = encoded.begin();
+ i != encoded.end();
+ ++i) {
+ assert(i->second.length() == sinfo.get_chunk_size());
+ (*out)[i->first].claim_append(i->second);
+ }
+ }
+
+ for (map<int, bufferlist>::iterator i = out->begin();
+ i != out->end();
+ ++i) {
+ assert(i->second.length() % sinfo.get_chunk_size() == 0);
+ assert(
+ sinfo.aligned_chunk_offset_to_logical_offset(i->second.length()) ==
+ logical_size);
+ }
+ return 0;
+}
+
+void ECUtil::HashInfo::encode(bufferlist &bl) const
+{
+ ENCODE_START(1, 1, bl);
+ ::encode(total_chunk_size, bl);
+ ::encode(cumulative_shard_hashes, bl);
+ ENCODE_FINISH(bl);
+}
+
+void ECUtil::HashInfo::decode(bufferlist::iterator &bl)
+{
+ DECODE_START(1, bl);
+ ::decode(total_chunk_size, bl);
+ ::decode(cumulative_shard_hashes, bl);
+ DECODE_FINISH(bl);
+}
+
+void ECUtil::HashInfo::dump(Formatter *f) const
+{
+ f->dump_unsigned("total_chunk_size", total_chunk_size);
+ f->open_object_section("cumulative_shard_hashes");
+ for (unsigned i = 0; i != cumulative_shard_hashes.size(); ++i) {
+ f->open_object_section("hash");
+ f->dump_unsigned("shard", i);
+ f->dump_unsigned("hash", cumulative_shard_hashes[i]);
+ f->close_section();
+ }
+ f->close_section();
+}
+
+void ECUtil::HashInfo::generate_test_instances(list<HashInfo*>& o)
+{
+ o.push_back(new HashInfo(3));
+ {
+ bufferlist bl;
+ bl.append_zero(20);
+ map<int, bufferlist> buffers;
+ buffers[0] = bl;
+ buffers[1] = bl;
+ buffers[2] = bl;
+ o.back()->append(0, buffers);
+ o.back()->append(20, buffers);
+ }
+ o.push_back(new HashInfo(4));
+}
+
+const string HINFO_KEY = "hinfo_key";
+
+bool ECUtil::is_hinfo_key_string(const string &key)
+{
+ return key == HINFO_KEY;
+}
+
+const string &ECUtil::get_hinfo_key()
+{
+ return HINFO_KEY;
+}
diff --git a/src/osd/ECUtil.h b/src/osd/ECUtil.h
new file mode 100644
index 00000000000..4952ed8be34
--- /dev/null
+++ b/src/osd/ECUtil.h
@@ -0,0 +1,154 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2013 Inktank Storage, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef ECUTIL_H
+#define ECUTIL_H
+
+#include <map>
+#include <set>
+
+#include <memory>
+#include "erasure-code/ErasureCodeInterface.h"
+#include "include/buffer.h"
+#include "include/assert.h"
+#include "include/encoding.h"
+#include "common/Formatter.h"
+
+namespace ECUtil {
+
+const uint64_t CHUNK_ALIGNMENT = 64;
+const uint64_t CHUNK_INFO = 8;
+const uint64_t CHUNK_PADDING = 8;
+const uint64_t CHUNK_OVERHEAD = 16; // INFO + PADDING
+
+class stripe_info_t {
+ const uint64_t stripe_size;
+ const uint64_t stripe_width;
+ const uint64_t chunk_size;
+public:
+ stripe_info_t(uint64_t stripe_size, uint64_t stripe_width)
+ : stripe_size(stripe_size), stripe_width(stripe_width),
+ chunk_size(stripe_width / stripe_size) {
+ assert(stripe_width % stripe_size == 0);
+ }
+ uint64_t get_stripe_width() const {
+ return stripe_width;
+ }
+ uint64_t get_chunk_size() const {
+ return chunk_size;
+ }
+ uint64_t logical_to_prev_chunk_offset(uint64_t offset) const {
+ return (offset / stripe_width) * chunk_size;
+ }
+ uint64_t logical_to_next_chunk_offset(uint64_t offset) const {
+ return ((offset + stripe_width - 1)/ stripe_width) * chunk_size;
+ }
+ uint64_t logical_to_prev_stripe_offset(uint64_t offset) const {
+ return offset - (offset % stripe_width);
+ }
+ uint64_t logical_to_next_stripe_offset(uint64_t offset) const {
+ return offset % stripe_width ?
+ offset - (offset % stripe_width) + stripe_width :
+ offset;
+ }
+ uint64_t aligned_logical_offset_to_chunk_offset(uint64_t offset) const {
+ assert(offset % stripe_width == 0);
+ return (offset / stripe_width) * chunk_size;
+ }
+ uint64_t aligned_chunk_offset_to_logical_offset(uint64_t offset) const {
+ assert(offset % chunk_size == 0);
+ return (offset / chunk_size) * stripe_width;
+ }
+ pair<uint64_t, uint64_t> aligned_offset_len_to_chunk(
+ pair<uint64_t, uint64_t> in) const {
+ return make_pair(
+ aligned_logical_offset_to_chunk_offset(in.first),
+ aligned_logical_offset_to_chunk_offset(in.second));
+ }
+ pair<uint64_t, uint64_t> offset_len_to_stripe_bounds(
+ pair<uint64_t, uint64_t> in) const {
+ uint64_t off = logical_to_prev_stripe_offset(in.first);
+ uint64_t len = logical_to_next_stripe_offset(
+ (in.first - off) + in.second);
+ return make_pair(off, len);
+ }
+};
+
+int decode(
+ const stripe_info_t &sinfo,
+ ErasureCodeInterfaceRef &ec_impl,
+ map<int, bufferlist> &to_decode,
+ bufferlist *out);
+
+int decode(
+ const stripe_info_t &sinfo,
+ ErasureCodeInterfaceRef &ec_impl,
+ map<int, bufferlist> &to_decode,
+ map<int, bufferlist*> &out);
+
+int encode(
+ const stripe_info_t &sinfo,
+ ErasureCodeInterfaceRef &ec_impl,
+ bufferlist &in,
+ const set<int> &want,
+ map<int, bufferlist> *out);
+
+class HashInfo {
+ uint64_t total_chunk_size;
+ vector<uint32_t> cumulative_shard_hashes;
+public:
+ HashInfo() : total_chunk_size(0) {}
+ HashInfo(unsigned num_chunks)
+ : total_chunk_size(0),
+ cumulative_shard_hashes(num_chunks, -1) {}
+ void append(uint64_t old_size, map<int, bufferlist> &to_append) {
+ assert(to_append.size() == cumulative_shard_hashes.size());
+ assert(old_size == total_chunk_size);
+ uint64_t size_to_append = to_append.begin()->second.length();
+ for (map<int, bufferlist>::iterator i = to_append.begin();
+ i != to_append.end();
+ ++i) {
+ assert(size_to_append == i->second.length());
+ assert((unsigned)i->first < cumulative_shard_hashes.size());
+ uint32_t new_hash = i->second.crc32c(cumulative_shard_hashes[i->first]);
+ cumulative_shard_hashes[i->first] = new_hash;
+ }
+ total_chunk_size += size_to_append;
+ }
+ void clear() {
+ total_chunk_size = 0;
+ cumulative_shard_hashes = vector<uint32_t>(
+ cumulative_shard_hashes.size(),
+ 0);
+ }
+ void encode(bufferlist &bl) const;
+ void decode(bufferlist::iterator &bl);
+ void dump(Formatter *f) const;
+ static void generate_test_instances(list<HashInfo*>& o);
+ uint32_t get_chunk_hash(int shard) const {
+ assert((unsigned)shard < cumulative_shard_hashes.size());
+ return cumulative_shard_hashes[shard];
+ }
+ uint64_t get_total_chunk_size() const {
+ return total_chunk_size;
+ }
+};
+typedef std::tr1::shared_ptr<HashInfo> HashInfoRef;
+
+bool is_hinfo_key_string(const string &key);
+const string &get_hinfo_key();
+
+};
+WRITE_CLASS_ENCODER(ECUtil::HashInfo)
+#endif
diff --git a/src/osd/Makefile.am b/src/osd/Makefile.am
index ecaa9d8e989..6d571731d73 100644
--- a/src/osd/Makefile.am
+++ b/src/osd/Makefile.am
@@ -16,6 +16,7 @@ libosd_la_SOURCES = \
common/TrackedOp.cc \
osd/SnapMapper.cc \
osd/osd_types.cc \
+ osd/ECUtil.cc \
objclass/class_api.cc
libosd_la_LIBADD = $(LIBOSDC) $(LIBOS) $(LIBERASURE_CODE)
noinst_LTLIBRARIES += libosd.la
@@ -37,6 +38,7 @@ noinst_HEADERS += \
osd/ReplicatedBackend.h \
osd/TierAgentState.h \
osd/ECBackend.h \
+ osd/ECUtil.h \
osd/ECMsgTypes.h \
osd/Watch.h \
osd/osd_types.h
diff --git a/src/test/encoding/types.h b/src/test/encoding/types.h
index ba577e1935d..678bb627869 100644
--- a/src/test/encoding/types.h
+++ b/src/test/encoding/types.h
@@ -82,6 +82,9 @@ TYPE(PullOp)
TYPE(PushOp)
TYPE(PushReplyOp)
+#include "osd/ECUtil.h"
+TYPE(ECUtil::HashInfo)
+
#include "osd/ECMsgTypes.h"
TYPE(ECSubWrite)
TYPE(ECSubWriteReply)
diff --git a/src/test/osd/TestECBackend.cc b/src/test/osd/TestECBackend.cc
index 98530030aec..11bb53b555b 100644
--- a/src/test/osd/TestECBackend.cc
+++ b/src/test/osd/TestECBackend.cc
@@ -18,3 +18,43 @@
#include <signal.h>
#include "osd/ECBackend.h"
#include "gtest/gtest.h"
+
+TEST(ECUtil, stripe_info_t)
+{
+ const uint64_t swidth = 4096;
+ const uint64_t ssize = 4;
+
+ ECUtil::stripe_info_t s(ssize, swidth);
+ ASSERT_EQ(s.get_stripe_width(), swidth);
+
+ ASSERT_EQ(s.logical_to_next_chunk_offset(0), 0);
+ ASSERT_EQ(s.logical_to_next_chunk_offset(1), s.get_chunk_size());
+ ASSERT_EQ(s.logical_to_next_chunk_offset(swidth - 1),
+ s.get_chunk_size());
+
+ ASSERT_EQ(s.logical_to_prev_chunk_offset(0), 0);
+ ASSERT_EQ(s.logical_to_prev_chunk_offset(swidth), s.get_chunk_size());
+ ASSERT_EQ(s.logical_to_prev_chunk_offset((swidth * 2) - 1),
+ s.get_chunk_size());
+
+ ASSERT_EQ(s.logical_to_next_stripe_offset(0), 0);
+ ASSERT_EQ(s.logical_to_next_stripe_offset(swidth - 1),
+ s.get_stripe_width());
+
+ ASSERT_EQ(s.logical_to_prev_stripe_offset(swidth), s.get_stripe_width());
+ ASSERT_EQ(s.logical_to_prev_stripe_offset(swidth), s.get_stripe_width());
+ ASSERT_EQ(s.logical_to_prev_stripe_offset((swidth * 2) - 1),
+ s.get_stripe_width());
+
+ ASSERT_EQ(s.aligned_logical_offset_to_chunk_offset(2*swidth),
+ 2*s.get_chunk_size());
+ ASSERT_EQ(s.aligned_chunk_offset_to_logical_offset(2*s.get_chunk_size()),
+ 2*s.get_stripe_width());
+
+ ASSERT_EQ(s.aligned_offset_len_to_chunk(make_pair(swidth, 10*swidth)),
+ make_pair(s.get_chunk_size(), 10*s.get_chunk_size()));
+
+ ASSERT_EQ(s.offset_len_to_stripe_bounds(make_pair(swidth-10, (uint64_t)20)),
+ make_pair((uint64_t)0, 2*swidth));
+}
+