diff options
Diffstat (limited to 'src/erasure-code/isa/isa-l/include/erasure_code.h')
-rw-r--r-- | src/erasure-code/isa/isa-l/include/erasure_code.h | 370 |
1 files changed, 322 insertions, 48 deletions
diff --git a/src/erasure-code/isa/isa-l/include/erasure_code.h b/src/erasure-code/isa/isa-l/include/erasure_code.h index 0f3b6db0825..53e480f0193 100644 --- a/src/erasure-code/isa/isa-l/include/erasure_code.h +++ b/src/erasure-code/isa/isa-l/include/erasure_code.h @@ -1,5 +1,5 @@ /********************************************************************** - Copyright(c) 2011-2014 Intel Corporation All rights reserved. + Copyright(c) 2011-2015 Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -74,73 +74,128 @@ extern "C" { void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls); /** - * @brief Generate or decode erasure codes on blocks of data. + * @brief Generate or decode erasure codes on blocks of data, runs appropriate version. * * Given a list of source data blocks, generate one or multiple blocks of * encoded data as specified by a matrix of GF(2^8) coefficients. When given a * suitable set of coefficients, this function will perform the fast generation * or decoding of Reed-Solomon type erasure codes. * - * @requires SSE4.1 + * This function determines what instruction sets are enabled and + * selects the appropriate version at runtime. + * * @param len Length of each block of data (vector) of source or dest data. * @param k The number of vector sources or rows in the generator matrix * for coding. * @param rows The number of output vectors to concurrently encode/decode. * @param gftbls Pointer to array of input tables generated from coding - * coefficients in ec_init_tables(). Must be of size 32*k*rows + * coefficients in ec_init_tables(). Must be of size 32*k*rows * @param data Array of pointers to source input buffers. * @param coding Array of pointers to coded output buffers. * @returns none */ -void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding); +void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); +/** + * @brief Generate or decode erasure codes on blocks of data. + * + * Arch specific version of ec_encode_data() with same parameters. + * @requires SSE4.1 + */ +void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); /** - * @brief Generate or decode erasure codes on blocks of data, runs appropriate version. + * @brief Generate or decode erasure codes on blocks of data. * - * Given a list of source data blocks, generate one or multiple blocks of - * encoded data as specified by a matrix of GF(2^8) coefficients. When given a - * suitable set of coefficients, this function will perform the fast generation - * or decoding of Reed-Solomon type erasure codes. + * Arch specific version of ec_encode_data() with same parameters. + * @requires AVX + */ +void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); + +/** + * @brief Generate or decode erasure codes on blocks of data. * - * This function determines what instruction sets are enabled and - * selects the appropriate version at runtime. + * Arch specific version of ec_encode_data() with same parameters. + * @requires AVX2 + */ +void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, + unsigned char **coding); + +/** + * @brief Generate or decode erasure codes on blocks of data, runs baseline version. + * + * Baseline version of ec_encode_data() with same parameters. + */ +void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, + unsigned char **dest); + +/** + * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version. + * + * Given one source data block, update one or multiple blocks of encoded data as + * specified by a matrix of GF(2^8) coefficients. When given a suitable set of + * coefficients, this function will perform the fast generation or decoding of + * Reed-Solomon type erasure codes from one input source at a time. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. * * @param len Length of each block of data (vector) of source or dest data. * @param k The number of vector sources or rows in the generator matrix * for coding. * @param rows The number of output vectors to concurrently encode/decode. - * @param gftbls Pointer to array of input tables generated from coding + * @param vec_i The vector index corresponding to the single input source. + * @param g_tbls Pointer to array of input tables generated from coding * coefficients in ec_init_tables(). Must be of size 32*k*rows - * @param data Array of pointers to source input buffers. + * @param data Pointer to single input source used to update output parity. * @param coding Array of pointers to coded output buffers. * @returns none */ +void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); -void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding); +/** + * @brief Generate update for encode or decode of erasure codes from single source. + * + * Arch specific version of ec_encode_data_update() with same parameters. + * @requires SSE4.1 + */ +void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); /** - * @brief Generate or decode erasure codes on blocks of data, runs baseline version. + * @brief Generate update for encode or decode of erasure codes from single source. * - * Given a list of source data blocks, generate one or multiple blocks of - * encoded data as specified by a matrix of GF(2^8) coefficients. When given a - * suitable set of coefficients, this function will perform the fast generation - * or decoding of Reed-Solomon type erasure codes. + * Arch specific version of ec_encode_data_update() with same parameters. + * @requires AVX + */ + +void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); + +/** + * @brief Generate update for encode or decode of erasure codes from single source. * - * @param len Length of each block of data (vector) of source or dest data. - * @param srcs The number of vector sources or rows in the generator matrix - * for coding. - * @param dests The number of output vectors to concurrently encode/decode. - * @param v Pointer to array of input tables generated from coding - * coefficients in ec_init_tables(). Must be of size 32*k*rows - * @param src Array of pointers to source input buffers. - * @param dest Array of pointers to coded output buffers. - * @returns none + * Arch specific version of ec_encode_data_update() with same parameters. + * @requires AVX2 + */ + +void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls, + unsigned char *data, unsigned char **coding); + +/** + * @brief Generate update for encode or decode of erasure codes from single source. + * + * Baseline version of ec_encode_data_update(). */ -void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, unsigned char **dest); +void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v, + unsigned char *data, unsigned char **dest); /** @@ -150,8 +205,8 @@ void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigne * set of coefficients to produce each byte of the output. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 32*vlen byte constant array based on the input coefficients. - * * @requires SSE4.1 + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based @@ -171,8 +226,8 @@ void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * set of coefficients to produce each byte of the output. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 32*vlen byte constant array based on the input coefficients. - * * @requires AVX + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based @@ -192,8 +247,8 @@ void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * set of coefficients to produce each byte of the output. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 32*vlen byte constant array based on the input coefficients. - * * @requires AVX2 + * * @param len Length of each vector in bytes. Must be >= 32. * @param vlen Number of vector sources. * @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based @@ -214,8 +269,8 @@ void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 2*32*vlen byte constant array based on the two sets of input coefficients. - * * @requires SSE4.1 + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants @@ -236,8 +291,8 @@ void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 2*32*vlen byte constant array based on the two sets of input coefficients. - * * @requires AVX + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants @@ -258,8 +313,8 @@ void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 2*32*vlen byte constant array based on the two sets of input coefficients. - * * @requires AVX2 + * * @param len Length of each vector in bytes. Must be >= 32. * @param vlen Number of vector sources. * @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants @@ -280,8 +335,8 @@ void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 3*32*vlen byte constant array based on the three sets of input coefficients. - * * @requires SSE4.1 + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants @@ -302,8 +357,8 @@ void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 3*32*vlen byte constant array based on the three sets of input coefficients. - * * @requires AVX + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants @@ -324,8 +379,8 @@ void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 3*32*vlen byte constant array based on the three sets of input coefficients. - * * @requires AVX2 + * * @param len Length of each vector in bytes. Must be >= 32. * @param vlen Number of vector sources. * @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants @@ -346,8 +401,8 @@ void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 4*32*vlen byte constant array based on the four sets of input coefficients. - * * @requires SSE4.1 + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants @@ -368,8 +423,8 @@ void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 4*32*vlen byte constant array based on the four sets of input coefficients. - * * @requires AVX + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants @@ -390,8 +445,8 @@ void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 4*32*vlen byte constant array based on the four sets of input coefficients. - * * @requires AVX2 + * * @param len Length of each vector in bytes. Must be >= 32. * @param vlen Number of vector sources. * @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants @@ -412,8 +467,8 @@ void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 5*32*vlen byte constant array based on the five sets of input coefficients. - * * @requires SSE4.1 + * * @param len Length of each vector in bytes. Must >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants @@ -434,8 +489,8 @@ void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 5*32*vlen byte constant array based on the five sets of input coefficients. - * * @requires AVX + * * @param len Length of each vector in bytes. Must >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants @@ -456,8 +511,8 @@ void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 5*32*vlen byte constant array based on the five sets of input coefficients. - * * @requires AVX2 + * * @param len Length of each vector in bytes. Must >= 32. * @param vlen Number of vector sources. * @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants @@ -478,8 +533,8 @@ void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 6*32*vlen byte constant array based on the six sets of input coefficients. - * * @requires SSE4.1 + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants @@ -500,8 +555,8 @@ void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 6*32*vlen byte constant array based on the six sets of input coefficients. - * * @requires AVX + * * @param len Length of each vector in bytes. Must be >= 16. * @param vlen Number of vector sources. * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants @@ -522,8 +577,8 @@ void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls, * sets of coefficients to produce each byte of the outputs. Can be used for * erasure coding encode and decode. Function requires pre-calculation of a * 6*32*vlen byte constant array based on the six sets of input coefficients. - * * @requires AVX2 + * * @param len Length of each vector in bytes. Must be >= 32. * @param vlen Number of vector sources. * @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants @@ -582,6 +637,224 @@ void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls, void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls, unsigned char **src, unsigned char *dest); + +/** + * @brief GF(2^8) vector multiply accumulate, runs appropriate version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constant and add to destination array. Can be used for erasure coding encode + * and decode update when only one source is available at a time. Function + * requires pre-calculation of a 32*vec byte constant array based on the input + * coefficients. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param len Length of each vector in bytes. Must be >= 32. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Array of pointers to source inputs. + * @param dest Pointer to destination data array. + * @returns none + */ + +void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); + +/** + * @brief GF(2^8) vector multiply accumulate, arch specific version. + * + * Arch specific version of gf_vect_mad() with same parameters. + * @requires SSE4.1 + */ + +void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); +/** + * @brief GF(2^8) vector multiply accumulate, arch specific version. + * + * Arch specific version of gf_vect_mad() with same parameters. + * @requires AVX + */ + +void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); + +/** + * @brief GF(2^8) vector multiply accumulate, arch specific version. + * + * Arch specific version of gf_vect_mad() with same parameters. + * @requires AVX2 + */ + +void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char *dest); + +/** + * @brief GF(2^8) vector multiply accumulate, baseline version. + * + * Baseline version of gf_vect_mad() with same parameters. + */ + +void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src, + unsigned char *dest); + +/** + * @brief GF(2^8) vector multiply with 2 accumulate. SSE version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constants and add to destination arrays. Can be used for erasure coding + * encode and decode update when only one source is available at a + * time. Function requires pre-calculation of a 32*vec byte constant array based + * on the input coefficients. + * @requires SSE4.1 + * + * @param len Length of each vector in bytes. Must be >= 32. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Pointer to source input array. + * @param dest Array of pointers to destination input/outputs. + * @returns none + */ + +void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse(). + * @requires AVX + */ +void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); +/** + * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse(). + * @requires AVX2 + */ +void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 3 accumulate. SSE version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constants and add to destination arrays. Can be used for erasure coding + * encode and decode update when only one source is available at a + * time. Function requires pre-calculation of a 32*vec byte constant array based + * on the input coefficients. + * @requires SSE4.1 + * + * @param len Length of each vector in bytes. Must be >= 32. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Pointer to source input array. + * @param dest Array of pointers to destination input/outputs. + * @returns none + */ + +void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse(). + * @requires AVX + */ +void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse(). + * @requires AVX2 + */ +void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 4 accumulate. SSE version. + * + * Does a GF(2^8) multiply across each byte of input source with expanded + * constants and add to destination arrays. Can be used for erasure coding + * encode and decode update when only one source is available at a + * time. Function requires pre-calculation of a 32*vec byte constant array based + * on the input coefficients. + * @requires SSE4.1 + * + * @param len Length of each vector in bytes. Must be >= 32. + * @param vec The number of vector sources or rows in the generator matrix + * for coding. + * @param vec_i The vector index corresponding to the single input source. + * @param gftbls Pointer to array of input tables generated from coding + * coefficients in ec_init_tables(). Must be of size 32*vec. + * @param src Pointer to source input array. + * @param dest Array of pointers to destination input/outputs. + * @returns none + */ + +void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse(). + * @requires AVX + */ +void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); +/** + * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse(). + * @requires AVX2 + */ +void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 5 accumulate. SSE version. + * @requires SSE4.1 + */ +void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 5 accumulate. AVX version. + * @requires AVX + */ +void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); +/** + * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version. + * @requires AVX2 + */ +void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 6 accumulate. SSE version. + * @requires SSE4.1 + */ +void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); +/** + * @brief GF(2^8) vector multiply with 6 accumulate. AVX version. + * @requires AVX + */ +void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + +/** + * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version. + * @requires AVX2 + */ +void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src, + unsigned char **dest); + + /********************************************************************** * The remaining are lib support functions used in GF(2^8) operations. */ @@ -650,6 +923,7 @@ void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k); int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n); + /*************************************************************/ #ifdef __cplusplus |