summaryrefslogtreecommitdiffstats
path: root/crypto/arm_arch.h
diff options
context:
space:
mode:
authorXiaokangQian <xiaokang.qian@arm.com>2021-06-09 08:35:46 +0200
committerPauli <pauli@openssl.org>2022-01-25 04:30:00 +0100
commit954f45ba4c504570206ff5bed811e512cf92dc8e (patch)
tree6d2521f79615afd4c8b35cb2c6794a57aded5602 /crypto/arm_arch.h
parentAES-GCM performance optimzation with stitched method for p9+ ppc64le (diff)
downloadopenssl-954f45ba4c504570206ff5bed811e512cf92dc8e.tar.xz
openssl-954f45ba4c504570206ff5bed811e512cf92dc8e.zip
Optimize AES-GCM for uarchs with unroll and new instructions
Increase the block numbers to 8 for every iteration. Increase the hash table capacity. Make use of EOR3 instruction to improve the performance. This can improve performance 25-40% on out-of-order microarchitectures with a large number of fast execution units, such as Neoverse V1. We also see 20-30% performance improvements on other architectures such as the M1. Assembly code reviewd by Tom Cosgrove (ARM). Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de> Reviewed-by: Paul Dale <pauli@openssl.org> (Merged from https://github.com/openssl/openssl/pull/15916)
Diffstat (limited to 'crypto/arm_arch.h')
-rw-r--r--crypto/arm_arch.h6
1 files changed, 6 insertions, 0 deletions
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
index 291620ebc9..33acbd99c0 100644
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -81,6 +81,8 @@ extern unsigned int OPENSSL_armv8_rsa_neonized;
# define ARMV8_RNG (1<<8)
# define ARMV8_SM3 (1<<9)
# define ARMV8_SM4 (1<<10)
+# define ARMV8_SHA3 (1<<11)
+# define ARMV8_UNROLL8_EOR3 (1<<12)
/*
* MIDR_EL1 system register
@@ -96,6 +98,7 @@ extern unsigned int OPENSSL_armv8_rsa_neonized;
# define ARM_CPU_PART_CORTEX_A72 0xD08
# define ARM_CPU_PART_N1 0xD0C
+# define ARM_CPU_PART_V1 0xD40
# define MIDR_PARTNUM_SHIFT 4
# define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT)
@@ -182,4 +185,7 @@ extern unsigned int OPENSSL_armv8_rsa_neonized;
# endif /* defined __ASSEMBLER__ */
+# define IS_CPU_SUPPORT_UNROLL8_EOR3() \
+ (OPENSSL_armcap_P & ARMV8_UNROLL8_EOR3)
+
#endif