diff options
author | Jamie Pryde <jamiepry@uk.ibm.com> | 2024-07-09 21:05:58 +0200 |
---|---|---|
committer | Jamie Pryde <jamiepry@uk.ibm.com> | 2024-07-18 16:04:36 +0200 |
commit | 63f63662483149e44a7f4582d625f6030ffb40fc (patch) | |
tree | c2a560861061f90f1544084ac9e8ec6eb95b227e | |
parent | Merge pull request #58530 from zdover23/wip-doc-2024-07-11-radosgw-s3-authent... (diff) | |
download | ceph-63f63662483149e44a7f4582d625f6030ffb40fc.tar.xz ceph-63f63662483149e44a7f4582d625f6030ffb40fc.zip |
erasure-code/isa: Use isa/raid's xor_gen() instead of the region_xor() optimisation
Signed-off-by: Jamie Pryde <jamiepry@uk.ibm.com>
-rw-r--r-- | src/erasure-code/isa/CMakeLists.txt | 20 | ||||
-rw-r--r-- | src/erasure-code/isa/ErasureCodeIsa.cc | 106 | ||||
-rw-r--r-- | src/erasure-code/isa/ErasureCodeIsa.h | 2 | ||||
-rw-r--r-- | src/erasure-code/isa/xor_op.cc | 237 | ||||
-rw-r--r-- | src/erasure-code/isa/xor_op.h | 97 | ||||
-rw-r--r-- | src/test/erasure-code/TestErasureCodeIsa.cc | 1 |
6 files changed, 81 insertions, 382 deletions
diff --git a/src/erasure-code/isa/CMakeLists.txt b/src/erasure-code/isa/CMakeLists.txt index 2486692b843..2ca398ffcb1 100644 --- a/src/erasure-code/isa/CMakeLists.txt +++ b/src/erasure-code/isa/CMakeLists.txt @@ -54,10 +54,19 @@ if(HAVE_NASM_X64_AVX2) ${isal_src_dir}/erasure_code/gf_4vect_mad_avx512.asm ${isal_src_dir}/erasure_code/gf_vect_dot_prod_avx512.asm ${isal_src_dir}/erasure_code/gf_vect_mad_avx512.asm + ${isal_src_dir}/raid/raid_base.c + ${isal_src_dir}/raid/raid_multibinary.asm + ${isal_src_dir}/raid/xor_check_sse.asm + ${isal_src_dir}/raid/xor_gen_sse.asm + ${isal_src_dir}/raid/xor_gen_avx.asm + ${isal_src_dir}/raid/xor_gen_avx512.asm + ${isal_src_dir}/raid/pq_check_sse.asm + ${isal_src_dir}/raid/pq_gen_sse.asm + ${isal_src_dir}/raid/pq_gen_avx.asm + ${isal_src_dir}/raid/pq_gen_avx2.asm ErasureCodeIsa.cc ErasureCodeIsaTableCache.cc ErasureCodePluginIsa.cc - xor_op.cc ) elseif(HAVE_ARMV8_SIMD) set(isa_srcs @@ -77,13 +86,20 @@ elseif(HAVE_ARMV8_SIMD) ${isal_src_dir}/erasure_code/aarch64/gf_vect_mad_neon.S ${isal_src_dir}/erasure_code/aarch64/gf_vect_mul_neon.S ${isal_src_dir}/erasure_code/aarch64/ec_multibinary_arm.S + ${isal_src_dir}/raid/raid_base.c + ${isal_src_dir}/raid/aarch64/raid_aarch64_dispatcher.c + ${isal_src_dir}/raid/aarch64/raid_multibinary_arm.S + ${isal_src_dir}/raid/aarch64/xor_check_neon.S + ${isal_src_dir}/raid/aarch64/xor_gen_neon.S + ${isal_src_dir}/raid/aarch64/pq_check_neon.S + ${isal_src_dir}/raid/aarch64/pq_gen_neon.S ErasureCodeIsa.cc ErasureCodeIsaTableCache.cc ErasureCodePluginIsa.cc - xor_op.cc ) set_source_files_properties( ${isal_src_dir}/erasure_code/aarch64/ec_multibinary_arm.S + ${isal_src_dir}/raid/aarch64/raid_multibinary_arm.S PROPERTIES COMPILE_FLAGS "-D__ASSEMBLY__" ) endif() diff --git a/src/erasure-code/isa/ErasureCodeIsa.cc b/src/erasure-code/isa/ErasureCodeIsa.cc index 305667e72e9..1548139756b 100644 --- a/src/erasure-code/isa/ErasureCodeIsa.cc +++ b/src/erasure-code/isa/ErasureCodeIsa.cc @@ -18,7 +18,6 @@ // ----------------------------------------------------------------------------- #include "common/debug.h" #include "ErasureCodeIsa.h" -#include "xor_op.h" #include "include/ceph_assert.h" using namespace std; using namespace ceph; @@ -26,6 +25,7 @@ using namespace ceph; // ----------------------------------------------------------------------------- extern "C" { #include "isa-l/include/erasure_code.h" +#include "isa-l/include/raid.h" } // ----------------------------------------------------------------------------- #define dout_context g_ceph_context @@ -121,10 +121,9 @@ ErasureCodeIsaDefault::isa_encode(char **data, char **coding, int blocksize) { - if (m == 1) // single parity stripe - region_xor((unsigned char**) data, (unsigned char*) coding[0], k, blocksize); + xor_gen(k+m, blocksize, (void**) data); else ec_encode_data(blocksize, k, m, encode_tbls, (unsigned char**) data, (unsigned char**) coding); @@ -157,61 +156,81 @@ ErasureCodeIsaDefault::isa_decode(int *erasures, int nerrs = 0; int i, r, s; + unsigned char *recover_source[k]; + unsigned char *recover_target[m]; + unsigned char *recover_buf[k+1]; + // count the errors for (int l = 0; erasures[l] != -1; l++) { nerrs++; } - unsigned char *recover_source[k]; - unsigned char *recover_target[m]; - - memset(recover_source, 0, sizeof (recover_source)); - memset(recover_target, 0, sizeof (recover_target)); + if (nerrs > m) + return -1; - // --------------------------------------------- - // Assign source and target buffers - // --------------------------------------------- - for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) { - if (!erasure_contains(erasures, i)) { - if (r < k) { + // ----------------------------------- + // Assign source and target buffers. + // ----------------------------------- + if ((m == 1) || + ((matrixtype == kVandermonde) && (nerrs == 1) && (erasures[0] < (k + 1)))) { + // We need a single buffer to use the xor_gen() optimisation. + // The last index must point to the erasure, and index that contained + // the erasure must point to the parity. + memset(recover_buf, 0, sizeof (recover_buf)); + bool parity_set = false; + for (i = 0; i < (k + 1); i++) { + if (erasure_contains(erasures, i)) { + if (i < k) { + recover_buf[i] = (unsigned char*) coding[0]; + recover_buf[k] = (unsigned char*) data[i]; + parity_set = true; + } else { + recover_buf[i] = (unsigned char*) coding[0]; + } + } else { if (i < k) { - recover_source[r] = (unsigned char*) data[i]; + recover_buf[i] = (unsigned char*) data[i]; } else { - recover_source[r] = (unsigned char*) coding[i - k]; + if (!parity_set) { + recover_buf[i] = (unsigned char*) coding[0]; + } } - r++; } - } else { - if (s < m) { - if (i < k) { - recover_target[s] = (unsigned char*) data[i]; - } else { - recover_target[s] = (unsigned char*) coding[i - k]; + } + } + else { + // We need source and target buffers to use ec_encode_data(). + // The erasure must be moved to the target buffer. + memset(recover_source, 0, sizeof (recover_source)); + memset(recover_target, 0, sizeof (recover_target)); + for (i = 0, s = 0, r = 0; ((r < k) || (s < nerrs)) && (i < (k + m)); i++) { + if (!erasure_contains(erasures, i)) { + if (r < k) { + if (i < k) { + recover_source[r] = (unsigned char*) data[i]; + } else { + recover_source[r] = (unsigned char*) coding[i - k]; + } + r++; + } + } else { + if (s < m) { + if (i < k) { + recover_target[s] = (unsigned char*) data[i]; + } else { + recover_target[s] = (unsigned char*) coding[i - k]; + } + s++; } - s++; } } } - if (m == 1) { + if ((m == 1) || + ((matrixtype == kVandermonde) && (nerrs == 1) && (erasures[0] < (k + 1)))) { // single parity decoding - ceph_assert(1 == nerrs); - dout(20) << "isa_decode: reconstruct using region xor [" << - erasures[0] << "]" << dendl; - region_xor(recover_source, recover_target[0], k, blocksize); - return 0; - } - - - if ((matrixtype == kVandermonde) && - (nerrs == 1) && - (erasures[0] < (k + 1))) { - // use xor decoding if a data chunk is missing or the first coding chunk - dout(20) << "isa_decode: reconstruct using region xor [" << - erasures[0] << "]" << dendl; - ceph_assert(1 == s); - ceph_assert(k == r); - region_xor(recover_source, recover_target[0], k, blocksize); + dout(20) << "isa_decode: reconstruct using xor_gen [" << erasures[0] << "]" << dendl; + xor_gen(k+1, blocksize, (void **) recover_buf); return 0; } @@ -221,9 +240,6 @@ ErasureCodeIsaDefault::isa_decode(int *erasures, int decode_index[k]; - if (nerrs > m) - return -1; - std::string erasure_signature; // describes a matrix configuration for caching // --------------------------------------------- diff --git a/src/erasure-code/isa/ErasureCodeIsa.h b/src/erasure-code/isa/ErasureCodeIsa.h index 06c51bbc907..85f1cd9cb46 100644 --- a/src/erasure-code/isa/ErasureCodeIsa.h +++ b/src/erasure-code/isa/ErasureCodeIsa.h @@ -30,6 +30,8 @@ #include "ErasureCodeIsaTableCache.h" // ----------------------------------------------------------------------------- +#define EC_ISA_ADDRESS_ALIGNMENT 32u + class ErasureCodeIsa : public ceph::ErasureCode { public: diff --git a/src/erasure-code/isa/xor_op.cc b/src/erasure-code/isa/xor_op.cc deleted file mode 100644 index 33f133539d5..00000000000 --- a/src/erasure-code/isa/xor_op.cc +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2014 CERN (Switzerland) - * * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - */ - -// ----------------------------------------------------------------------------- -#include "xor_op.h" -#include <stdio.h> -#include <string.h> -#include "arch/intel.h" -#include "arch/arm.h" - -#if defined(__aarch64__) && defined(__ARM_NEON) - #include <arm_neon.h> -#endif - -#include "include/ceph_assert.h" - -// ----------------------------------------------------------------------------- - - -// ----------------------------------------------------------------------------- - -void -// ----------------------------------------------------------------------------- -byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew) -// ----------------------------------------------------------------------------- -{ - while (cw < ew) - *dw++ ^= *cw++; -} - -// ----------------------------------------------------------------------------- - -void -// ----------------------------------------------------------------------------- -vector_xor(vector_op_t* cw, - vector_op_t* dw, - vector_op_t* ew) -// ----------------------------------------------------------------------------- -{ - ceph_assert(is_aligned(cw, EC_ISA_VECTOR_OP_WORDSIZE)); - ceph_assert(is_aligned(dw, EC_ISA_VECTOR_OP_WORDSIZE)); - ceph_assert(is_aligned(ew, EC_ISA_VECTOR_OP_WORDSIZE)); - while (cw < ew) { - *dw++ ^= *cw++; - } -} - - -// ----------------------------------------------------------------------------- - -void -// ----------------------------------------------------------------------------- -region_xor(unsigned char** src, - unsigned char* parity, - int src_size, - unsigned size) -{ - if (!size) { - // nothing to do - return; - } - - if (!src_size) { - // nothing to do - return; - } - - if (src_size == 1) { - // just copy source to parity - memcpy(parity, src[0], size); - return; - } - - unsigned size_left = size; - - // ---------------------------------------------------------- - // region or vector XOR operations require aligned addresses - // ---------------------------------------------------------- - - bool src_aligned = true; - for (int i = 0; i < src_size; i++) { - src_aligned &= is_aligned(src[i], EC_ISA_VECTOR_OP_WORDSIZE); - } - - if (src_aligned && - is_aligned(parity, EC_ISA_VECTOR_OP_WORDSIZE)) { - -#ifdef __x86_64__ - if (ceph_arch_intel_sse2) { - // ----------------------------- - // use SSE2 region xor function - // ----------------------------- - unsigned region_size = - (size / EC_ISA_VECTOR_SSE2_WORDSIZE) * EC_ISA_VECTOR_SSE2_WORDSIZE; - - size_left -= region_size; - // 64-byte region xor - region_sse2_xor((char**) src, (char*) parity, src_size, region_size); - } else -#elif defined (__aarch64__) && defined(__ARM_NEON) - if (ceph_arch_neon) { - // ----------------------------- - // use NEON region xor function - // ----------------------------- - unsigned region_size = - (size / EC_ISA_VECTOR_NEON_WORDSIZE) * EC_ISA_VECTOR_NEON_WORDSIZE; - size_left -= region_size; - region_neon_xor((char**) src, (char *) parity, src_size, region_size); - } else -#endif - { - // -------------------------------------------- - // use region xor based on vector xor operation - // -------------------------------------------- - unsigned vector_words = size / EC_ISA_VECTOR_OP_WORDSIZE; - unsigned vector_size = vector_words * EC_ISA_VECTOR_OP_WORDSIZE; - memcpy(parity, src[0], vector_size); - - size_left -= vector_size; - vector_op_t* p_vec = (vector_op_t*) parity; - for (int i = 1; i < src_size; i++) { - vector_op_t* s_vec = (vector_op_t*) src[i]; - vector_op_t* e_vec = s_vec + vector_words; - vector_xor(s_vec, p_vec, e_vec); - } - } - } - - if (size_left) { - // -------------------------------------------------- - // xor the not aligned part with byte-wise region xor - // -------------------------------------------------- - memcpy(parity + size - size_left, src[0] + size - size_left, size_left); - for (int i = 1; i < src_size; i++) { - byte_xor(src[i] + size - size_left, parity + size - size_left, src[i] + size); - } - } -} - -// ----------------------------------------------------------------------------- - -void -// ----------------------------------------------------------------------------- -region_sse2_xor(char** src, - char* parity, - int src_size, - unsigned size) -// ----------------------------------------------------------------------------- -{ -#ifdef __x86_64__ - ceph_assert(!(size % EC_ISA_VECTOR_SSE2_WORDSIZE)); - unsigned char* p; - int d, l; - unsigned i; - unsigned char* vbuf[256]; - - for (int v = 0; v < src_size; v++) { - vbuf[v] = (unsigned char*) src[v]; - } - - l = src_size; - p = (unsigned char*) parity; - - for (i = 0; i < size; i += EC_ISA_VECTOR_SSE2_WORDSIZE) { - asm volatile("movdqa %0,%%xmm0" : : "m" (vbuf[0][i])); - asm volatile("movdqa %0,%%xmm1" : : "m" (vbuf[0][i + 16])); - asm volatile("movdqa %0,%%xmm2" : : "m" (vbuf[0][i + 32])); - asm volatile("movdqa %0,%%xmm3" : : "m" (vbuf[0][i + 48])); - - for (d = 1; d < l; d++) { - asm volatile("movdqa %0,%%xmm4" : : "m" (vbuf[d][i])); - asm volatile("movdqa %0,%%xmm5" : : "m" (vbuf[d][i + 16])); - asm volatile("movdqa %0,%%xmm6" : : "m" (vbuf[d][i + 32])); - asm volatile("movdqa %0,%%xmm7" : : "m" (vbuf[d][i + 48])); - asm volatile("pxor %xmm4,%xmm0"); - asm volatile("pxor %xmm5,%xmm1"); - asm volatile("pxor %xmm6,%xmm2"); - asm volatile("pxor %xmm7,%xmm3"); - } - asm volatile("movntdq %%xmm0,%0" : "=m" (p[i])); - asm volatile("movntdq %%xmm1,%0" : "=m" (p[i + 16])); - asm volatile("movntdq %%xmm2,%0" : "=m" (p[i + 32])); - asm volatile("movntdq %%xmm3,%0" : "=m" (p[i + 48])); - } - - asm volatile("sfence" : : : "memory"); -#endif // __x86_64__ - return; -} - -#if defined(__aarch64__) && defined(__ARM_NEON) -void -// ----------------------------------------------------------------------------- -region_neon_xor(char **src, - char *parity, - int src_size, - unsigned size) -// ----------------------------------------------------------------------------- -{ - ceph_assert(!(size % EC_ISA_VECTOR_NEON_WORDSIZE)); - unsigned char *p = (unsigned char *)parity; - unsigned char *vbuf[256] = { NULL }; - for (int v = 0; v < src_size; v++) { - vbuf[v] = (unsigned char *)src[v]; - } - - // ---------------------------------------------------------------------------------------- - // NEON load instructions can load 128bits of data each time, and there are 2 load channels - // ---------------------------------------------------------------------------------------- - for (unsigned i = 0; i < size; i += EC_ISA_VECTOR_NEON_WORDSIZE) { - uint64x2_t d0_1 = vld1q_u64((uint64_t *)(&(vbuf[0][i]))); - uint64x2_t d0_2 = vld1q_u64((uint64_t *)(&(vbuf[0][i + 16]))); - - for (int d = 1; d < src_size; d++) { - uint64x2_t di_1 = vld1q_u64((uint64_t *)(&(vbuf[d][i]))); - uint64x2_t di_2 = vld1q_u64((uint64_t *)(&(vbuf[d][i + 16]))); - - d0_1 = veorq_u64(d0_1, di_1); - d0_2 = veorq_u64(d0_2, di_2); - } - - vst1q_u64((uint64_t *)p, d0_1); - vst1q_u64((uint64_t *)(p + 16), d0_2); - p += EC_ISA_VECTOR_NEON_WORDSIZE; - } - return; -} -#endif // __aarch64__ && __ARM_NEON diff --git a/src/erasure-code/isa/xor_op.h b/src/erasure-code/isa/xor_op.h deleted file mode 100644 index 86b1645b616..00000000000 --- a/src/erasure-code/isa/xor_op.h +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Ceph - scalable distributed file system - * - * Copyright (C) 2014 CERN (Switzerland) - * \ - * Author: Andreas-Joachim Peters <Andreas.Joachim.Peters@cern.ch> \ - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - */ - -#ifndef EC_ISA_XOR_OP_H -#define EC_ISA_XOR_OP_H - -// ----------------------------------------------------------------------------- -#include <assert.h> -#include <stdint.h> -// ----------------------------------------------------------------------------- - -// ------------------------------------------------------------------------- -// declaration of 64/128-bit vector operations depending on availability -// ------------------------------------------------------------------------- -// ------------------------------------------------------------------------- - -#define EC_ISA_ADDRESS_ALIGNMENT 32u -#define EC_ISA_VECTOR_SSE2_WORDSIZE 64u -#define EC_ISA_VECTOR_NEON_WORDSIZE 32u -#if __GNUC__ > 4 || \ - ( (__GNUC__ == 4) && (__GNUC_MINOR__ >= 4) ) ||\ - (__clang__ == 1 ) -#ifdef EC_ISA_VECTOR_OP_DEBUG -#pragma message "* using 128-bit vector operations in " __FILE__ -#endif - -// ------------------------------------------------------------------------- -// use 128-bit pointer -// ------------------------------------------------------------------------- -typedef long vector_op_t __attribute__((vector_size(16))); -#define EC_ISA_VECTOR_OP_WORDSIZE 16 -#else -// ------------------------------------------------------------------------- -// use 64-bit pointer -// ------------------------------------------------------------------------- -typedef unsigned long long vector_op_t; -#define EC_ISA_VECTOR_OP_WORDSIZE 8 -#endif - - -// ------------------------------------------------------------------------- -// check if a pointer is aligend to byte_count -// ------------------------------------------------------------------------- -#define is_aligned(POINTER, BYTE_COUNT) \ - (((uintptr_t)(const void *)(POINTER)) % (BYTE_COUNT) == 0) - -// ------------------------------------------------------------------------- -// compute byte-wise XOR of cw and dw block, ew contains the end address of cw -// ------------------------------------------------------------------------- -void -byte_xor(unsigned char* cw, unsigned char* dw, unsigned char* ew); - -// ------------------------------------------------------------------------- -// compute word-wise XOR of cw and dw block, ew contains the end address of cw -// ------------------------------------------------------------------------- -void -vector_xor(vector_op_t* cw, vector_op_t* dw, vector_op_t* ew); - -// ------------------------------------------------------------------------- -// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-] -// ------------------------------------------------------------------------- -void -region_xor(unsigned char** src, unsigned char* parity, int src_size, unsigned size); - -// ------------------------------------------------------------------------- -// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-] -// using SSE2 64-byte operations -// ------------------------------------------------------------------------- -void -region_sse2_xor(char** src /* array of 64-byte aligned source pointer to xor */, - char* parity /* 64-byte aligned output pointer containing the parity */, - int src_size /* size of the source pointer array */, - unsigned size /* size of the region to xor */); - -#if defined(__aarch64__) && defined(__ARM_NEON) -// ------------------------------------------------------------------------- -// compute region XOR like parity = src[0] ^ src[1] ... ^ src[src_size-1] -// using NEON 32-byte operations -// ------------------------------------------------------------------------- -void -region_neon_xor(char** src /* array of 64-byte aligned source pointer to xor */, - char* parity /* 32-byte aligned output pointer containing the parity */, - int src_size /* size of the source pointer array */, - unsigned size /* size of the region to xor */); -#endif // __aarch64__ && __ARM_NEON -#endif // EC_ISA_XOR_OP_H diff --git a/src/test/erasure-code/TestErasureCodeIsa.cc b/src/test/erasure-code/TestErasureCodeIsa.cc index bbd4441fc72..5235c10caba 100644 --- a/src/test/erasure-code/TestErasureCodeIsa.cc +++ b/src/test/erasure-code/TestErasureCodeIsa.cc @@ -21,7 +21,6 @@ #include "crush/CrushWrapper.h" #include "include/stringify.h" #include "erasure-code/isa/ErasureCodeIsa.h" -#include "erasure-code/isa/xor_op.h" #include "global/global_context.h" #include "common/config.h" #include "gtest/gtest.h" |