summaryrefslogtreecommitdiffstats
path: root/crypto/md5/asm/md5-x86_64.pl
diff options
context:
space:
mode:
authorJonathan Swinney <jswinney@amazon.com>2024-10-18 18:55:07 +0200
committerTomas Mraz <tomas@openssl.org>2025-01-06 11:43:36 +0100
commitebe34f9a62630b45a825bc07a2e9cf52731e836e (patch)
treed67734e0db11499a2caba29ed031b2c066b2c59b /crypto/md5/asm/md5-x86_64.pl
parentopenssl-pkeyutl.pod.in: update from SHA-1 to SHA256, fixing default values an... (diff)
downloadopenssl-ebe34f9a62630b45a825bc07a2e9cf52731e836e.tar.xz
openssl-ebe34f9a62630b45a825bc07a2e9cf52731e836e.zip
Optimize x86/aarch64 MD5 implementation
As suggested in https://github.com/animetosho/md5-optimisation?tab=readme-ov-file#dependency-shortcut-in-g-function, we can delay the dependency on 'x' by recognizing that ((x & z) | (y & ~z)) is equivalent to ((x & z) + (y + ~z)) in this scenario, and we can perform those additions independently, leaving our dependency on x to the final addition. This speeds it up around 5% on both platforms. Signed-off-by: Oli Gillespie <ogillesp@amazon.com> Reviewed-by: Paul Dale <ppzgs1@gmail.com> Reviewed-by: Hugo Landau <hlandau@devever.net> (Merged from https://github.com/openssl/openssl/pull/25737)
Diffstat (limited to '')
-rwxr-xr-xcrypto/md5/asm/md5-x86_64.pl5
1 files changed, 2 insertions, 3 deletions
diff --git a/crypto/md5/asm/md5-x86_64.pl b/crypto/md5/asm/md5-x86_64.pl
index 6625fb7d08..aa2880a9cd 100755
--- a/crypto/md5/asm/md5-x86_64.pl
+++ b/crypto/md5/asm/md5-x86_64.pl
@@ -41,7 +41,6 @@ EOF
# %r10d = X[k_next]
# %r11d = z' (copy of z for the next step)
# %r12d = z' (copy of z for the next step)
-# Each round2_step() takes about 5.4 clocks (11 instructions, 2.0 IPC)
sub round2_step
{
my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
@@ -53,9 +52,9 @@ sub round2_step
lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
and $y, %r11d /* y & (not z) */
mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
- or %r11d, %r12d /* (y & (not z)) | (x & z) */
+ add %r11d, $dst /* dst += (y & (not z)) */
mov $y, %r11d /* (NEXT STEP) z' = $y */
- add %r12d, $dst /* dst += ... */
+ add %r12d, $dst /* dst += (x & z) */
mov $y, %r12d /* (NEXT STEP) z' = $y */
rol \$$s, $dst /* dst <<< s */
add $x, $dst /* dst += x */