summaryrefslogtreecommitdiffstats
path: root/src/erasure-code/isa/isa-l/include/erasure_code.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/erasure-code/isa/isa-l/include/erasure_code.h')
-rw-r--r--src/erasure-code/isa/isa-l/include/erasure_code.h370
1 files changed, 322 insertions, 48 deletions
diff --git a/src/erasure-code/isa/isa-l/include/erasure_code.h b/src/erasure-code/isa/isa-l/include/erasure_code.h
index 0f3b6db0825..53e480f0193 100644
--- a/src/erasure-code/isa/isa-l/include/erasure_code.h
+++ b/src/erasure-code/isa/isa-l/include/erasure_code.h
@@ -1,5 +1,5 @@
/**********************************************************************
- Copyright(c) 2011-2014 Intel Corporation All rights reserved.
+ Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -74,73 +74,128 @@ extern "C" {
void ec_init_tables(int k, int rows, unsigned char* a, unsigned char* gftbls);
/**
- * @brief Generate or decode erasure codes on blocks of data.
+ * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
*
* Given a list of source data blocks, generate one or multiple blocks of
* encoded data as specified by a matrix of GF(2^8) coefficients. When given a
* suitable set of coefficients, this function will perform the fast generation
* or decoding of Reed-Solomon type erasure codes.
*
- * @requires SSE4.1
+ * This function determines what instruction sets are enabled and
+ * selects the appropriate version at runtime.
+ *
* @param len Length of each block of data (vector) of source or dest data.
* @param k The number of vector sources or rows in the generator matrix
* for coding.
* @param rows The number of output vectors to concurrently encode/decode.
* @param gftbls Pointer to array of input tables generated from coding
- * coefficients in ec_init_tables(). Must be of size 32*k*rows
+ * coefficients in ec_init_tables(). Must be of size 32*k*rows
* @param data Array of pointers to source input buffers.
* @param coding Array of pointers to coded output buffers.
* @returns none
*/
-void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding);
+void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+ unsigned char **coding);
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
+ *
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires SSE4.1
+ */
+void ec_encode_data_sse(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+ unsigned char **coding);
/**
- * @brief Generate or decode erasure codes on blocks of data, runs appropriate version.
+ * @brief Generate or decode erasure codes on blocks of data.
*
- * Given a list of source data blocks, generate one or multiple blocks of
- * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
- * suitable set of coefficients, this function will perform the fast generation
- * or decoding of Reed-Solomon type erasure codes.
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires AVX
+ */
+void ec_encode_data_avx(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+ unsigned char **coding);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data.
*
- * This function determines what instruction sets are enabled and
- * selects the appropriate version at runtime.
+ * Arch specific version of ec_encode_data() with same parameters.
+ * @requires AVX2
+ */
+void ec_encode_data_avx2(int len, int k, int rows, unsigned char *gftbls, unsigned char **data,
+ unsigned char **coding);
+
+/**
+ * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
+ *
+ * Baseline version of ec_encode_data() with same parameters.
+ */
+void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src,
+ unsigned char **dest);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source, runs appropriate version.
+ *
+ * Given one source data block, update one or multiple blocks of encoded data as
+ * specified by a matrix of GF(2^8) coefficients. When given a suitable set of
+ * coefficients, this function will perform the fast generation or decoding of
+ * Reed-Solomon type erasure codes from one input source at a time.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
*
* @param len Length of each block of data (vector) of source or dest data.
* @param k The number of vector sources or rows in the generator matrix
* for coding.
* @param rows The number of output vectors to concurrently encode/decode.
- * @param gftbls Pointer to array of input tables generated from coding
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param g_tbls Pointer to array of input tables generated from coding
* coefficients in ec_init_tables(). Must be of size 32*k*rows
- * @param data Array of pointers to source input buffers.
+ * @param data Pointer to single input source used to update output parity.
* @param coding Array of pointers to coded output buffers.
* @returns none
*/
+void ec_encode_data_update(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding);
-void ec_encode_data(int len, int k, int rows, unsigned char *gftbls, unsigned char **data, unsigned char **coding);
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source.
+ *
+ * Arch specific version of ec_encode_data_update() with same parameters.
+ * @requires SSE4.1
+ */
+void ec_encode_data_update_sse(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding);
/**
- * @brief Generate or decode erasure codes on blocks of data, runs baseline version.
+ * @brief Generate update for encode or decode of erasure codes from single source.
*
- * Given a list of source data blocks, generate one or multiple blocks of
- * encoded data as specified by a matrix of GF(2^8) coefficients. When given a
- * suitable set of coefficients, this function will perform the fast generation
- * or decoding of Reed-Solomon type erasure codes.
+ * Arch specific version of ec_encode_data_update() with same parameters.
+ * @requires AVX
+ */
+
+void ec_encode_data_update_avx(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source.
*
- * @param len Length of each block of data (vector) of source or dest data.
- * @param srcs The number of vector sources or rows in the generator matrix
- * for coding.
- * @param dests The number of output vectors to concurrently encode/decode.
- * @param v Pointer to array of input tables generated from coding
- * coefficients in ec_init_tables(). Must be of size 32*k*rows
- * @param src Array of pointers to source input buffers.
- * @param dest Array of pointers to coded output buffers.
- * @returns none
+ * Arch specific version of ec_encode_data_update() with same parameters.
+ * @requires AVX2
+ */
+
+void ec_encode_data_update_avx2(int len, int k, int rows, int vec_i, unsigned char *g_tbls,
+ unsigned char *data, unsigned char **coding);
+
+/**
+ * @brief Generate update for encode or decode of erasure codes from single source.
+ *
+ * Baseline version of ec_encode_data_update().
*/
-void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigned char **src, unsigned char **dest);
+void ec_encode_data_update_base(int len, int k, int rows, int vec_i, unsigned char *v,
+ unsigned char *data, unsigned char **dest);
/**
@@ -150,8 +205,8 @@ void ec_encode_data_base(int len, int srcs, int dests, unsigned char *v, unsigne
* set of coefficients to produce each byte of the output. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 32*vlen byte constant array based on the input coefficients.
- *
* @requires SSE4.1
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
@@ -171,8 +226,8 @@ void gf_vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
* set of coefficients to produce each byte of the output. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 32*vlen byte constant array based on the input coefficients.
- *
* @requires AVX
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
@@ -192,8 +247,8 @@ void gf_vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
* set of coefficients to produce each byte of the output. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 32*vlen byte constant array based on the input coefficients.
- *
* @requires AVX2
+ *
* @param len Length of each vector in bytes. Must be >= 32.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 32*vlen byte array of pre-calculated constants based
@@ -214,8 +269,8 @@ void gf_vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 2*32*vlen byte constant array based on the two sets of input coefficients.
- *
* @requires SSE4.1
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
@@ -236,8 +291,8 @@ void gf_2vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 2*32*vlen byte constant array based on the two sets of input coefficients.
- *
* @requires AVX
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
@@ -258,8 +313,8 @@ void gf_2vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 2*32*vlen byte constant array based on the two sets of input coefficients.
- *
* @requires AVX2
+ *
* @param len Length of each vector in bytes. Must be >= 32.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 2*32*vlen byte array of pre-calculated constants
@@ -280,8 +335,8 @@ void gf_2vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 3*32*vlen byte constant array based on the three sets of input coefficients.
- *
* @requires SSE4.1
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
@@ -302,8 +357,8 @@ void gf_3vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 3*32*vlen byte constant array based on the three sets of input coefficients.
- *
* @requires AVX
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
@@ -324,8 +379,8 @@ void gf_3vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 3*32*vlen byte constant array based on the three sets of input coefficients.
- *
* @requires AVX2
+ *
* @param len Length of each vector in bytes. Must be >= 32.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 3*32*vlen byte array of pre-calculated constants
@@ -346,8 +401,8 @@ void gf_3vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 4*32*vlen byte constant array based on the four sets of input coefficients.
- *
* @requires SSE4.1
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
@@ -368,8 +423,8 @@ void gf_4vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 4*32*vlen byte constant array based on the four sets of input coefficients.
- *
* @requires AVX
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
@@ -390,8 +445,8 @@ void gf_4vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 4*32*vlen byte constant array based on the four sets of input coefficients.
- *
* @requires AVX2
+ *
* @param len Length of each vector in bytes. Must be >= 32.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 4*32*vlen byte array of pre-calculated constants
@@ -412,8 +467,8 @@ void gf_4vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 5*32*vlen byte constant array based on the five sets of input coefficients.
- *
* @requires SSE4.1
+ *
* @param len Length of each vector in bytes. Must >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
@@ -434,8 +489,8 @@ void gf_5vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 5*32*vlen byte constant array based on the five sets of input coefficients.
- *
* @requires AVX
+ *
* @param len Length of each vector in bytes. Must >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
@@ -456,8 +511,8 @@ void gf_5vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 5*32*vlen byte constant array based on the five sets of input coefficients.
- *
* @requires AVX2
+ *
* @param len Length of each vector in bytes. Must >= 32.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 5*32*vlen byte array of pre-calculated constants
@@ -478,8 +533,8 @@ void gf_5vect_dot_prod_avx2(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 6*32*vlen byte constant array based on the six sets of input coefficients.
- *
* @requires SSE4.1
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
@@ -500,8 +555,8 @@ void gf_6vect_dot_prod_sse(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 6*32*vlen byte constant array based on the six sets of input coefficients.
- *
* @requires AVX
+ *
* @param len Length of each vector in bytes. Must be >= 16.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
@@ -522,8 +577,8 @@ void gf_6vect_dot_prod_avx(int len, int vlen, unsigned char *gftbls,
* sets of coefficients to produce each byte of the outputs. Can be used for
* erasure coding encode and decode. Function requires pre-calculation of a
* 6*32*vlen byte constant array based on the six sets of input coefficients.
- *
* @requires AVX2
+ *
* @param len Length of each vector in bytes. Must be >= 32.
* @param vlen Number of vector sources.
* @param gftbls Pointer to 6*32*vlen byte array of pre-calculated constants
@@ -582,6 +637,224 @@ void gf_vect_dot_prod_base(int len, int vlen, unsigned char *gftbls,
void gf_vect_dot_prod(int len, int vlen, unsigned char *gftbls,
unsigned char **src, unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, runs appropriate version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constant and add to destination array. Can be used for erasure coding encode
+ * and decode update when only one source is available at a time. Function
+ * requires pre-calculation of a 32*vec byte constant array based on the input
+ * coefficients.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Array of pointers to source inputs.
+ * @param dest Pointer to destination data array.
+ * @returns none
+ */
+
+void gf_vect_mad(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, arch specific version.
+ *
+ * Arch specific version of gf_vect_mad() with same parameters.
+ * @requires SSE4.1
+ */
+
+void gf_vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+/**
+ * @brief GF(2^8) vector multiply accumulate, arch specific version.
+ *
+ * Arch specific version of gf_vect_mad() with same parameters.
+ * @requires AVX
+ */
+
+void gf_vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, arch specific version.
+ *
+ * Arch specific version of gf_vect_mad() with same parameters.
+ * @requires AVX2
+ */
+
+void gf_vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply accumulate, baseline version.
+ *
+ * Baseline version of gf_vect_mad() with same parameters.
+ */
+
+void gf_vect_mad_base(int len, int vec, int vec_i, unsigned char *v, unsigned char *src,
+ unsigned char *dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 2 accumulate. SSE version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_2vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 2 accumulate. AVX version of gf_2vect_mad_sse().
+ * @requires AVX
+ */
+void gf_2vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+/**
+ * @brief GF(2^8) vector multiply with 2 accumulate. AVX2 version of gf_2vect_mad_sse().
+ * @requires AVX2
+ */
+void gf_2vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 3 accumulate. SSE version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_3vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 3 accumulate. AVX version of gf_3vect_mad_sse().
+ * @requires AVX
+ */
+void gf_3vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 3 accumulate. AVX2 version of gf_3vect_mad_sse().
+ * @requires AVX2
+ */
+void gf_3vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 4 accumulate. SSE version.
+ *
+ * Does a GF(2^8) multiply across each byte of input source with expanded
+ * constants and add to destination arrays. Can be used for erasure coding
+ * encode and decode update when only one source is available at a
+ * time. Function requires pre-calculation of a 32*vec byte constant array based
+ * on the input coefficients.
+ * @requires SSE4.1
+ *
+ * @param len Length of each vector in bytes. Must be >= 32.
+ * @param vec The number of vector sources or rows in the generator matrix
+ * for coding.
+ * @param vec_i The vector index corresponding to the single input source.
+ * @param gftbls Pointer to array of input tables generated from coding
+ * coefficients in ec_init_tables(). Must be of size 32*vec.
+ * @param src Pointer to source input array.
+ * @param dest Array of pointers to destination input/outputs.
+ * @returns none
+ */
+
+void gf_4vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 4 accumulate. AVX version of gf_4vect_mad_sse().
+ * @requires AVX
+ */
+void gf_4vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+/**
+ * @brief GF(2^8) vector multiply with 4 accumulate. AVX2 version of gf_4vect_mad_sse().
+ * @requires AVX2
+ */
+void gf_4vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 5 accumulate. SSE version.
+ * @requires SSE4.1
+ */
+void gf_5vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 5 accumulate. AVX version.
+ * @requires AVX
+ */
+void gf_5vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+/**
+ * @brief GF(2^8) vector multiply with 5 accumulate. AVX2 version.
+ * @requires AVX2
+ */
+void gf_5vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 6 accumulate. SSE version.
+ * @requires SSE4.1
+ */
+void gf_6vect_mad_sse(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+/**
+ * @brief GF(2^8) vector multiply with 6 accumulate. AVX version.
+ * @requires AVX
+ */
+void gf_6vect_mad_avx(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+/**
+ * @brief GF(2^8) vector multiply with 6 accumulate. AVX2 version.
+ * @requires AVX2
+ */
+void gf_6vect_mad_avx2(int len, int vec, int vec_i, unsigned char *gftbls, unsigned char *src,
+ unsigned char **dest);
+
+
/**********************************************************************
* The remaining are lib support functions used in GF(2^8) operations.
*/
@@ -650,6 +923,7 @@ void gf_gen_cauchy1_matrix(unsigned char *a, int m, int k);
int gf_invert_matrix(unsigned char *in, unsigned char *out, const int n);
+
/*************************************************************/
#ifdef __cplusplus