summaryrefslogtreecommitdiffstats
path: root/crypto/md5
diff options
context:
space:
mode:
authorUlf Möller <ulf@openssl.org>1999-05-13 15:16:42 +0200
committerUlf Möller <ulf@openssl.org>1999-05-13 15:16:42 +0200
commitbd3576d2ddedb0492f5bd3c1e47c15778e4fbe3c (patch)
treee64a4b13276c3663c3ad9f6f4235909ecadc8852 /crypto/md5
parentVMS support. (diff)
downloadopenssl-bd3576d2ddedb0492f5bd3c1e47c15778e4fbe3c.tar.xz
openssl-bd3576d2ddedb0492f5bd3c1e47c15778e4fbe3c.zip
Reorganize and speed up MD5.
Submitted by: Andy Polyakov <appro@fy.chalmers.se>
Diffstat (limited to 'crypto/md5')
-rw-r--r--crypto/md5/Makefile.ssl10
-rw-r--r--crypto/md5/asm/md5-586.pl3
-rw-r--r--crypto/md5/asm/md5-sparcv9.S1035
-rw-r--r--crypto/md5/md5.h36
-rw-r--r--crypto/md5/md5_dgst.c365
-rw-r--r--crypto/md5/md5_locl.h153
-rw-r--r--crypto/md5/md5_one.c3
7 files changed, 1269 insertions, 336 deletions
diff --git a/crypto/md5/Makefile.ssl b/crypto/md5/Makefile.ssl
index e27cfca0b0..f8eaf628b3 100644
--- a/crypto/md5/Makefile.ssl
+++ b/crypto/md5/Makefile.ssl
@@ -66,6 +66,14 @@ asm/mx86bsdi.o: asm/mx86unix.cpp
asm/mx86unix.cpp: asm/md5-586.pl
(cd asm; $(PERL) md5-586.pl cpp >mx86unix.cpp)
+# works for both SC and gcc
+asm/md5-sparcv8plus.o: asm/md5-sparcv9.S
+ $(CPP) -DULTRASPARC -DMD5_BLOCK_DATA_ORDER asm/md5-sparcv9.S | as -xarch=v8plus /dev/fd/0 -o asm/md5-sparcv8plus.o
+
+asm/md5-sparcv9.o: asm/md5-sparcv9.S
+ $(CC) -xarch=v9 -DULTRASPARC -DMD5_BLOCK_DATA_ORDER -c asm/md5-sparcv9.S -o asm/md5-sparcv9.o
+
+
files:
$(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
@@ -103,5 +111,5 @@ clean:
# DO NOT DELETE THIS LINE -- make depend depends on it.
md5_dgst.o: ../../include/openssl/md5.h ../../include/openssl/opensslv.h
-md5_dgst.o: md5_locl.h
+md5_dgst.o: ../md32_common.h md5_locl.h
md5_one.o: ../../include/openssl/md5.h md5_locl.h
diff --git a/crypto/md5/asm/md5-586.pl b/crypto/md5/asm/md5-586.pl
index 0249e100e1..5fc6a205ce 100644
--- a/crypto/md5/asm/md5-586.pl
+++ b/crypto/md5/asm/md5-586.pl
@@ -29,7 +29,7 @@ $X="esi";
0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3
);
-&md5_block("md5_block_x86");
+&md5_block("md5_block_asm_host_order");
&asm_finish();
sub Np
@@ -183,6 +183,7 @@ sub md5_block
&mov($X, &wparam(1)); # esi
&mov($C, &wparam(2));
&push("ebp");
+ &shl($C, 6);
&push("ebx");
&add($C, $X); # offset we end at
&sub($C, 64);
diff --git a/crypto/md5/asm/md5-sparcv9.S b/crypto/md5/asm/md5-sparcv9.S
new file mode 100644
index 0000000000..955b3680b5
--- /dev/null
+++ b/crypto/md5/asm/md5-sparcv9.S
@@ -0,0 +1,1035 @@
+.ident "md5-sparcv9.S, Version 1.0"
+.ident "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>"
+.file "md5-sparcv9.S"
+
+/*
+ * ====================================================================
+ * Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>.
+ *
+ * Rights for redistribution and usage in source and binary forms are
+ * granted as long as above copyright notices are retained. Warranty
+ * of any kind is (of course:-) disclaimed.
+ * ====================================================================
+ */
+
+/*
+ * This is my modest contribution to OpenSSL project (see
+ * http://www.openssl.org/ for more information about it) and is an
+ * assembler implementation of MD5 block hash function. I've hand-coded
+ * this for the sole reason to reach UltraSPARC-specific "load in
+ * little-endian byte order" instruction. This gives up to 15%
+ * performance improvement for cases when input message is aligned at
+ * 32 bits boundary. The module was tested under both 32 *and* 64 bit
+ * kernels. For updates see http://fy.chalmers.se/~appro/hpe/.
+ *
+ * To compile with SC4.x/SC5.x:
+ *
+ * cc -xarch=v[9|8plus] -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
+ * -c md5-sparcv9.S
+ *
+ * and with gcc:
+ *
+ * gcc -mcpu=ultrasparc -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \
+ * -c md5-sparcv9.S
+ *
+ * or if above fails (it does if you have gas):
+ *
+ * gcc -E -DULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \
+ * as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o
+ */
+
+#define A %o0
+#define B %o1
+#define C %o2
+#define D %o3
+#define T1 %o4
+#define T2 %o5
+
+#define R0 %l0
+#define R1 %l1
+#define R2 %l2
+#define R3 %l3
+#define R4 %l4
+#define R5 %l5
+#define R6 %l6
+#define R7 %l7
+#define R8 %i3
+#define R9 %i4
+#define R10 %i5
+#define R11 %g1
+#define R12 %g2
+#define R13 %g3
+#define RX %g4
+
+#define Aptr %i0+0
+#define Bptr %i0+4
+#define Cptr %i0+8
+#define Dptr %i0+12
+
+#define Aval R5 /* those not used at the end of the last round */
+#define Bval R6
+#define Cval R7
+#define Dval R8
+
+#if defined(MD5_BLOCK_DATA_ORDER)
+# if defined(ULTRASPARC)
+# define LOAD lda
+# define X(i) [%i1+i*4]%asi
+# define md5_block md5_block_asm_data_order_aligned
+# define ASI_PRIMARY_LITTLE 0x88
+# else
+# error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!"
+# endif
+#else
+# define LOAD ld
+# define X(i) [%i1+i*4]
+# define md5_block md5_block_asm_host_order
+#endif
+
+.section ".text",#alloc,#execinstr
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+ /* They've said -xarch=v9 at command line */
+ .register %g2,#scratch
+ .register %g3,#scratch
+# define FRAME -192
+#else
+# define FRAME -96
+#endif
+
+.align 32
+
+.global md5_block
+md5_block:
+ save %sp,FRAME,%sp
+
+ ld [Dptr],D
+#ifdef ASI_PRIMARY_LITTLE
+ mov %asi,%o7 ! How dare I? Well, I just do:-)
+#else
+ nop
+#endif
+ ld [Cptr],C
+#ifdef ASI_PRIMARY_LITTLE
+ mov ASI_PRIMARY_LITTLE,%asi
+#else
+ nop
+#endif
+ ld [Bptr],B
+ nop
+ ld [Aptr],A
+ nop
+ LOAD X(0),R0
+ nop
+ ba .Lmd5_block_loop
+ nop
+
+.align 32
+.Lmd5_block_loop:
+
+!!!!!!!!Round 0
+
+ xor C,D,T1
+ sethi %hi(0xd76aa478),T2
+ and T1,B,T1
+ or T2,%lo(0xd76aa478),T2 !=
+ xor T1,D,T1
+ add T1,R0,T1
+ LOAD X(1),R1
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0xe8c7b756),T2
+ and T1,A,T1 !=
+ or T2,%lo(0xe8c7b756),T2
+ xor T1,C,T1
+ LOAD X(2),R2
+ add T1,R1,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0x242070db),T2 !=
+ and T1,D,T1
+ or T2,%lo(0x242070db),T2
+ xor T1,B,T1
+ add T1,R2,T1 !=
+ LOAD X(3),R3
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0xc1bdceee),T2
+ and T1,C,T1
+ or T2,%lo(0xc1bdceee),T2
+ xor T1,A,T1 !=
+ add T1,R3,T1
+ LOAD X(4),R4
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0xf57c0faf),T2
+ and T1,B,T1
+ or T2,%lo(0xf57c0faf),T2 !=
+ xor T1,D,T1
+ add T1,R4,T1
+ LOAD X(5),R5
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0x4787c62a),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x4787c62a),T2
+ xor T1,C,T1
+ LOAD X(6),R6
+ add T1,R5,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xa8304613),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xa8304613),T2
+ xor T1,B,T1
+ add T1,R6,T1 !=
+ LOAD X(7),R7
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0xfd469501),T2
+ and T1,C,T1
+ or T2,%lo(0xfd469501),T2
+ xor T1,A,T1 !=
+ add T1,R7,T1
+ LOAD X(8),R8
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0x698098d8),T2
+ and T1,B,T1
+ or T2,%lo(0x698098d8),T2 !=
+ xor T1,D,T1
+ add T1,R8,T1
+ LOAD X(9),R9
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0x8b44f7af),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x8b44f7af),T2
+ xor T1,C,T1
+ LOAD X(10),R10
+ add T1,R9,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xffff5bb1),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xffff5bb1),T2
+ xor T1,B,T1
+ add T1,R10,T1 !=
+ LOAD X(11),R11
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0x895cd7be),T2
+ and T1,C,T1
+ or T2,%lo(0x895cd7be),T2
+ xor T1,A,T1 !=
+ add T1,R11,T1
+ LOAD X(12),R12
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,22,T2
+ srl B,32-22,B
+ or B,T2,B
+ xor C,D,T1 !=
+ add B,C,B
+
+ sethi %hi(0x6b901122),T2
+ and T1,B,T1
+ or T2,%lo(0x6b901122),T2 !=
+ xor T1,D,T1
+ add T1,R12,T1
+ LOAD X(13),R13
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,7,T2
+ srl A,32-7,A
+ or A,T2,A !=
+ xor B,C,T1
+ add A,B,A
+
+ sethi %hi(0xfd987193),T2
+ and T1,A,T1 !=
+ or T2,%lo(0xfd987193),T2
+ xor T1,C,T1
+ LOAD X(14),RX
+ add T1,R13,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,12,T2
+ srl D,32-12,D !=
+ or D,T2,D
+ xor A,B,T1
+ add D,A,D
+
+ sethi %hi(0xa679438e),T2 !=
+ and T1,D,T1
+ or T2,%lo(0xa679438e),T2
+ xor T1,B,T1
+ add T1,RX,T1 !=
+ LOAD X(15),RX
+ add T1,T2,T1
+ add C,T1,C
+ sll C,17,T2 !=
+ srl C,32-17,C
+ or C,T2,C
+ xor D,A,T1
+ add C,D,C !=
+
+ sethi %hi(0x49b40821),T2
+ and T1,C,T1
+ or T2,%lo(0x49b40821),T2
+ xor T1,A,T1 !=
+ add T1,RX,T1
+ !pre-LOADed X(1),R1
+ add T1,T2,T1
+ add B,T1,B
+ sll B,22,T2 !=
+ srl B,32-22,B
+ or B,T2,B
+ add B,C,B
+
+!!!!!!!!Round 1
+
+ xor B,C,T1 !=
+ sethi %hi(0xf61e2562),T2
+ and T1,D,T1
+ or T2,%lo(0xf61e2562),T2
+ xor T1,C,T1 !=
+ add T1,R1,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add A,T1,A
+ sll A,5,T2 !=
+ srl A,32-5,A
+ or A,T2,A
+ add A,B,A
+
+ xor A,B,T1 !=
+ sethi %hi(0xc040b340),T2
+ and T1,C,T1
+ or T2,%lo(0xc040b340),T2
+ xor T1,B,T1 !=
+ add T1,R6,T1
+ !pre-LOADed X(11),R11
+ add T1,T2,T1
+ add D,T1,D
+ sll D,9,T2 !=
+ srl D,32-9,D
+ or D,T2,D
+ add D,A,D
+
+ xor D,A,T1 !=
+ sethi %hi(0x265e5a51),T2
+ and T1,B,T1
+ or T2,%lo(0x265e5a51),T2
+ xor T1,A,T1 !=
+ add T1,R11,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1
+ add C,T1,C
+ sll C,14,T2 !=
+ srl C,32-14,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xe9b6c7aa),T2
+ and T1,A,T1
+ or T2,%lo(0xe9b6c7aa),T2
+ xor T1,D,T1 !=
+ add T1,R0,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1
+ add B,T1,B
+ sll B,20,T2 !=
+ srl B,32-20,B
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1 !=
+ sethi %hi(0xd62f105d),T2
+ and T1,D,T1
+ or T2,%lo(0xd62f105d),T2
+ xor T1,C,T1 !=
+ add T1,R5,T1
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add A,T1,A
+ sll A,5,T2 !=
+ srl A,32-5,A
+ or A,T2,A
+ add A,B,A
+
+ xor A,B,T1 !=
+ sethi %hi(0x02441453),T2
+ and T1,C,T1
+ or T2,%lo(0x02441453),T2
+ xor T1,B,T1 !=
+ add T1,R10,T1
+ LOAD X(15),RX
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xd8a1e681),T2
+ and T1,B,T1
+ or T2,%lo(0xd8a1e681),T2 !=
+ xor T1,A,T1
+ add T1,RX,T1
+ !pre-LOADed X(4),R4
+ add T1,T2,T1
+ add C,T1,C !=
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C
+ add C,D,C !=
+
+ xor C,D,T1
+ sethi %hi(0xe7d3fbc8),T2
+ and T1,A,T1
+ or T2,%lo(0xe7d3fbc8),T2 !=
+ xor T1,D,T1
+ add T1,R4,T1
+ !pre-LOADed X(9),R9
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B
+ add B,C,B !=
+
+ xor B,C,T1
+ sethi %hi(0x21e1cde6),T2
+ and T1,D,T1
+ or T2,%lo(0x21e1cde6),T2 !=
+ xor T1,C,T1
+ add T1,R9,T1
+ LOAD X(14),RX
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,5,T2
+ srl A,32-5,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xc33707d6),T2
+ and T1,C,T1 !=
+ or T2,%lo(0xc33707d6),T2
+ xor T1,B,T1
+ add T1,RX,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0xf4d50d87),T2
+ and T1,B,T1 !=
+ or T2,%lo(0xf4d50d87),T2
+ xor T1,A,T1
+ add T1,R3,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C !=
+ add C,D,C
+
+ xor C,D,T1
+ sethi %hi(0x455a14ed),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x455a14ed),T2
+ xor T1,D,T1
+ add T1,R8,T1
+ !pre-LOADed X(13),R13
+ add T1,T2,T1 !=
+ add B,T1,B
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B !=
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xa9e3e905),T2
+ and T1,D,T1 !=
+ or T2,%lo(0xa9e3e905),T2
+ xor T1,C,T1
+ add T1,R13,T1
+ !pre-LOADed X(2),R2
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,5,T2
+ srl A,32-5,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xfcefa3f8),T2
+ and T1,C,T1 !=
+ or T2,%lo(0xfcefa3f8),T2
+ xor T1,B,T1
+ add T1,R2,T1
+ !pre-LOADed X(7),R7
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,9,T2
+ srl D,32-9,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0x676f02d9),T2
+ and T1,B,T1 !=
+ or T2,%lo(0x676f02d9),T2
+ xor T1,A,T1
+ add T1,R7,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,14,T2
+ srl C,32-14,C
+ or C,T2,C !=
+ add C,D,C
+
+ xor C,D,T1
+ sethi %hi(0x8d2a4c8a),T2
+ and T1,A,T1 !=
+ or T2,%lo(0x8d2a4c8a),T2
+ xor T1,D,T1
+ add T1,R12,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1 !=
+ add B,T1,B
+ sll B,20,T2
+ srl B,32-20,B
+ or B,T2,B !=
+ add B,C,B
+
+!!!!!!!!Round 2
+
+ xor B,C,T1
+ sethi %hi(0xfffa3942),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0xfffa3942),T2
+ add T1,R5,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1
+ add A,T1,A !=
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A
+ add A,B,A !=
+
+ xor A,B,T1
+ sethi %hi(0x8771f681),T2
+ xor T1,C,T1
+ or T2,%lo(0x8771f681),T2 !=
+ add T1,R8,T1
+ !pre-LOADed X(11),R11
+ add T1,T2,T1
+ add D,T1,D
+ sll D,11,T2 !=
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D
+
+ xor D,A,T1 !=
+ sethi %hi(0x6d9d6122),T2
+ xor T1,B,T1
+ or T2,%lo(0x6d9d6122),T2
+ add T1,R11,T1 !=
+ LOAD X(14),RX
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xfde5380c),T2
+ xor T1,A,T1
+ or T2,%lo(0xfde5380c),T2
+ add T1,RX,T1 !=
+ !pre-LOADed X(1),R1
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xa4beea44),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xa4beea44),T2
+ add T1,R1,T1
+ !pre-LOADed X(4),R4
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0x4bdecfa9),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0x4bdecfa9),T2
+ add T1,R4,T1
+ !pre-LOADed X(7),R7
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xf6bb4b60),T2
+ xor T1,B,T1
+ or T2,%lo(0xf6bb4b60),T2 !=
+ add T1,R7,T1
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0xbebfbc70),T2
+ xor T1,A,T1
+ or T2,%lo(0xbebfbc70),T2
+ add T1,R10,T1 !=
+ !pre-LOADed X(13),R13
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0x289b7ec6),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0x289b7ec6),T2
+ add T1,R13,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xeaa127fa),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0xeaa127fa),T2
+ add T1,R0,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1
+ add D,T1,D !=
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D
+ add D,A,D !=
+
+ xor D,A,T1
+ sethi %hi(0xd4ef3085),T2
+ xor T1,B,T1
+ or T2,%lo(0xd4ef3085),T2 !=
+ add T1,R3,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add C,T1,C
+ sll C,16,T2 !=
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C
+
+ xor C,D,T1 !=
+ sethi %hi(0x04881d05),T2
+ xor T1,A,T1
+ or T2,%lo(0x04881d05),T2
+ add T1,R6,T1 !=
+ !pre-LOADed X(9),R9
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2
+ srl B,32-23,B !=
+ or B,T2,B
+ add B,C,B
+
+ xor B,C,T1
+ sethi %hi(0xd9d4d039),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xd9d4d039),T2
+ add T1,R9,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1 !=
+ add A,T1,A
+ sll A,4,T2
+ srl A,32-4,A
+ or A,T2,A !=
+ add A,B,A
+
+ xor A,B,T1
+ sethi %hi(0xe6db99e5),T2
+ xor T1,C,T1 !=
+ or T2,%lo(0xe6db99e5),T2
+ add T1,R12,T1
+ LOAD X(15),RX
+ add T1,T2,T1 !=
+ add D,T1,D
+ sll D,11,T2
+ srl D,32-11,D
+ or D,T2,D !=
+ add D,A,D
+
+ xor D,A,T1
+ sethi %hi(0x1fa27cf8),T2
+ xor T1,B,T1 !=
+ or T2,%lo(0x1fa27cf8),T2
+ add T1,RX,T1
+ !pre-LOADed X(2),R2
+ add T1,T2,T1
+ add C,T1,C !=
+ sll C,16,T2
+ srl C,32-16,C
+ or C,T2,C
+ add C,D,C !=
+
+ xor C,D,T1
+ sethi %hi(0xc4ac5665),T2
+ xor T1,A,T1
+ or T2,%lo(0xc4ac5665),T2 !=
+ add T1,R2,T1
+ !pre-LOADed X(0),R0
+ add T1,T2,T1
+ add B,T1,B
+ sll B,23,T2 !=
+ srl B,32-23,B
+ or B,T2,B
+ add B,C,B
+
+!!!!!!!!Round 3
+
+ orn B,D,T1 !=
+ sethi %hi(0xf4292244),T2
+ xor T1,C,T1
+ or T2,%lo(0xf4292244),T2
+ add T1,R0,T1 !=
+ !pre-LOADed X(7),R7
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2
+ srl A,32-6,A !=
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1
+ sethi %hi(0x432aff97),T2 !=
+ xor T1,B,T1
+ or T2,%lo(0x432aff97),T2
+ LOAD X(14),RX
+ add T1,R7,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0xab9423a7),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0xab9423a7),T2
+ add T1,RX,T1
+ !pre-LOADed X(5),R5
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C
+ or C,T2,C !=
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0xfc93a039),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0xfc93a039),T2
+ add T1,R5,T1
+ !pre-LOADed X(12),R12
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,21,T2
+ srl B,32-21,B
+ or B,T2,B
+ add B,C,B !=
+
+ orn B,D,T1
+ sethi %hi(0x655b59c3),T2
+ xor T1,C,T1
+ or T2,%lo(0x655b59c3),T2 !=
+ add T1,R12,T1
+ !pre-LOADed X(3),R3
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2 !=
+ srl A,32-6,A
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1 !=
+ sethi %hi(0x8f0ccc92),T2
+ xor T1,B,T1
+ or T2,%lo(0x8f0ccc92),T2
+ add T1,R3,T1 !=
+ !pre-LOADed X(10),R10
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0xffeff47d),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0xffeff47d),T2
+ add T1,R10,T1
+ !pre-LOADed X(1),R1
+ add T1,T2,T1 !=
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C
+ or C,T2,C !=
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0x85845dd1),T2
+ xor T1,D,T1 !=
+ or T2,%lo(0x85845dd1),T2
+ add T1,R1,T1
+ !pre-LOADed X(8),R8
+ add T1,T2,T1
+ add B,T1,B !=
+ sll B,21,T2
+ srl B,32-21,B
+ or B,T2,B
+ add B,C,B !=
+
+ orn B,D,T1
+ sethi %hi(0x6fa87e4f),T2
+ xor T1,C,T1
+ or T2,%lo(0x6fa87e4f),T2 !=
+ add T1,R8,T1
+ LOAD X(15),RX
+ add T1,T2,T1
+ add A,T1,A !=
+ sll A,6,T2
+ srl A,32-6,A
+ or A,T2,A
+ add A,B,A !=
+
+ orn A,C,T1
+ sethi %hi(0xfe2ce6e0),T2
+ xor T1,B,T1
+ or T2,%lo(0xfe2ce6e0),T2 !=
+ add T1,RX,T1
+ !pre-LOADed X(6),R6
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2 !=
+ srl D,32-10,D
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1 !=
+ sethi %hi(0xa3014314),T2
+ xor T1,A,T1
+ or T2,%lo(0xa3014314),T2
+ add T1,R6,T1 !=
+ !pre-LOADed X(13),R13
+ add T1,T2,T1
+ add C,T1,C
+ sll C,15,T2
+ srl C,32-15,C !=
+ or C,T2,C
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0x4e0811a1),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0x4e0811a1),T2
+ !pre-LOADed X(4),R4
+ ld [Aptr],Aval
+ add T1,R13,T1 !=
+ add T1,T2,T1
+ add B,T1,B
+ sll B,21,T2
+ srl B,32-21,B !=
+ or B,T2,B
+ add B,C,B
+
+ orn B,D,T1
+ sethi %hi(0xf7537e82),T2 !=
+ xor T1,C,T1
+ or T2,%lo(0xf7537e82),T2
+ !pre-LOADed X(11),R11
+ ld [Dptr],Dval
+ add T1,R4,T1 !=
+ add T1,T2,T1
+ add A,T1,A
+ sll A,6,T2
+ srl A,32-6,A !=
+ or A,T2,A
+ add A,B,A
+
+ orn A,C,T1
+ sethi %hi(0xbd3af235),T2 !=
+ xor T1,B,T1
+ or T2,%lo(0xbd3af235),T2
+ !pre-LOADed X(2),R2
+ ld [Cptr],Cval
+ add T1,R11,T1 !=
+ add T1,T2,T1
+ add D,T1,D
+ sll D,10,T2
+ srl D,32-10,D !=
+ or D,T2,D
+ add D,A,D
+
+ orn D,B,T1
+ sethi %hi(0x2ad7d2bb),T2 !=
+ xor T1,A,T1
+ or T2,%lo(0x2ad7d2bb),T2
+ !pre-LOADed X(9),R9
+ ld [Bptr],Bval
+ add T1,R2,T1 !=
+ add Aval,A,Aval
+ add T1,T2,T1
+ st Aval,[Aptr]
+ add C,T1,C !=
+ sll C,15,T2
+ add Dval,D,Dval
+ srl C,32-15,C
+ or C,T2,C !=
+ st Dval,[Dptr]
+ add C,D,C
+
+ orn C,A,T1
+ sethi %hi(0xeb86d391),T2 !=
+ xor T1,D,T1
+ or T2,%lo(0xeb86d391),T2
+ add T1,R9,T1
+ !pre-LOADed X(0),R0
+ mov Aval,A !=
+ add T1,T2,T1
+ mov Dval,D
+ add B,T1,B
+ sll B,21,T2 !=
+ add Cval,C,Cval
+ srl B,32-21,B
+ st Cval,[Cptr]
+ or B,T2,B !=
+ add B,C,B
+
+ deccc %i2
+ mov Cval,C
+ add B,Bval,B !=
+ inc 64,%i1
+ nop
+ st B,[Bptr]
+ nop !=
+
+#ifdef ULTRASPARC
+ bg,a,pt %icc,.Lmd5_block_loop
+#else
+ bg,a .Lmd5_block_loop
+#endif
+ LOAD X(0),R0
+
+#ifdef ASI_PRIMARY_LITTLE
+ mov %o7,%asi
+#endif
+ ret
+ restore %g0,0,%o0
+
+.type md5_block,#function
+.size md5_block,(.-md5_block)
diff --git a/crypto/md5/md5.h b/crypto/md5/md5.h
index 6e97fe1e4f..148fb963d4 100644
--- a/crypto/md5/md5.h
+++ b/crypto/md5/md5.h
@@ -67,23 +67,43 @@ extern "C" {
#error MD5 is disabled.
#endif
+/*
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ * ! MD5_LONG has to be at least 32 bits wide. If it's wider, then !
+ * ! MD5_LONG_LOG2 has to be defined along. !
+ * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ */
+
+#if defined(WIN16) || defined(__LP32__)
+#define MD5_LONG unsigned long
+#elif defined(_CRAY) || defined(__ILP64__)
+#define MD5_LONG unsigned long
+#define MD5_LONG_LOG2 3
+/*
+ * _CRAY note. I could declare short, but I have no idea what impact
+ * does it have on performance on none-T3E machines. I could declare
+ * int, but at least on C90 sizeof(int) can be chosen at compile time.
+ * So I've chosen long...
+ * <appro@fy.chalmers.se>
+ */
+#else
+#define MD5_LONG unsigned int
+#endif
+
#define MD5_CBLOCK 64
-#define MD5_LBLOCK 16
-#define MD5_BLOCK 16
-#define MD5_LAST_BLOCK 56
-#define MD5_LENGTH_BLOCK 8
+#define MD5_LBLOCK (MD5_CBLOCK/4)
#define MD5_DIGEST_LENGTH 16
typedef struct MD5state_st
{
- unsigned long A,B,C,D;
- unsigned long Nl,Nh;
- unsigned long data[MD5_LBLOCK];
+ MD5_LONG A,B,C,D;
+ MD5_LONG Nl,Nh;
+ MD5_LONG data[MD5_LBLOCK];
int num;
} MD5_CTX;
void MD5_Init(MD5_CTX *c);
-void MD5_Update(MD5_CTX *c, const void *data, unsigned long len);
+void MD5_Update(MD5_CTX *c, const unsigned char *data, unsigned long len);
void MD5_Final(unsigned char *md, MD5_CTX *c);
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md);
void MD5_Transform(MD5_CTX *c, unsigned char *b);
diff --git a/crypto/md5/md5_dgst.c b/crypto/md5/md5_dgst.c
index 2671b00172..830e04d02a 100644
--- a/crypto/md5/md5_dgst.c
+++ b/crypto/md5/md5_dgst.c
@@ -70,12 +70,6 @@ char *MD5_version="MD5" OPENSSL_VERSION_PTEXT;
#define INIT_DATA_C (unsigned long)0x98badcfeL
#define INIT_DATA_D (unsigned long)0x10325476L
-# ifdef MD5_ASM
- void md5_block_x86(MD5_CTX *c, unsigned long *p,int num);
-# define md5_block md5_block_x86
-# else
- static void md5_block(MD5_CTX *c, unsigned long *p,int num);
-# endif
void MD5_Init(MD5_CTX *c)
{
c->A=INIT_DATA_A;
@@ -87,183 +81,31 @@ void MD5_Init(MD5_CTX *c)
c->num=0;
}
-void MD5_Update(MD5_CTX *c, const void *_data, unsigned long len)
+#ifndef md5_block_host_order
+void md5_block_host_order (MD5_CTX *c, const MD5_LONG *X, int num)
{
- register const unsigned char *data=_data;
- register ULONG *p;
- int sw,sc;
- ULONG l;
-
- if (len == 0) return;
-
- l=(c->Nl+(len<<3))&0xffffffffL;
- /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to
- * Wei Dai <weidai@eskimo.com> for pointing it out. */
- if (l < c->Nl) /* overflow */
- c->Nh++;
- c->Nh+=(len>>29);
- c->Nl=l;
-
- if (c->num != 0)
- {
- p=c->data;
- sw=c->num>>2;
- sc=c->num&0x03;
-
- if ((c->num+len) >= MD5_CBLOCK)
- {
- l= p[sw];
- p_c2l(data,l,sc);
- p[sw++]=l;
- for (; sw<MD5_LBLOCK; sw++)
- {
- c2l(data,l);
- p[sw]=l;
- }
- len-=(MD5_CBLOCK-c->num);
-
- md5_block(c,p,64);
- c->num=0;
- /* drop through and do the rest */
- }
- else
- {
- int ew,ec;
-
- c->num+=(int)len;
- if ((sc+len) < 4) /* ugly, add char's to a word */
- {
- l= p[sw];
- p_c2l_p(data,l,sc,len);
- p[sw]=l;
- }
- else
- {
- ew=(c->num>>2);
- ec=(c->num&0x03);
- l= p[sw];
- p_c2l(data,l,sc);
- p[sw++]=l;
- for (; sw < ew; sw++)
- { c2l(data,l); p[sw]=l; }
- if (ec)
- {
- c2l_p(data,l,ec);
- p[sw]=l;
- }
- }
- return;
- }
- }
- /* we now can process the input data in blocks of MD5_CBLOCK
- * chars and save the leftovers to c->data. */
-#ifdef L_ENDIAN
- if ((((unsigned long)data)%sizeof(ULONG)) == 0)
- {
- sw=(int)len/MD5_CBLOCK;
- if (sw > 0)
- {
- sw*=MD5_CBLOCK;
- md5_block(c,(ULONG *)data,sw);
- data+=sw;
- len-=sw;
- }
- }
-#endif
- p=c->data;
- while (len >= MD5_CBLOCK)
- {
-#if defined(L_ENDIAN) || defined(B_ENDIAN)
- if (p != (unsigned long *)data)
- memcpy(p,data,MD5_CBLOCK);
- data+=MD5_CBLOCK;
-#ifdef B_ENDIAN
- for (sw=(MD5_LBLOCK/4); sw; sw--)
- {
- Endian_Reverse32(p[0]);
- Endian_Reverse32(p[1]);
- Endian_Reverse32(p[2]);
- Endian_Reverse32(p[3]);
- p+=4;
- }
-#endif
-#else
- for (sw=(MD5_LBLOCK/4); sw; sw--)
- {
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- c2l(data,l); *(p++)=l;
- }
-#endif
- p=c->data;
- md5_block(c,p,64);
- len-=MD5_CBLOCK;
- }
- sc=(int)len;
- c->num=sc;
- if (sc)
- {
- sw=sc>>2; /* words to copy */
-#ifdef L_ENDIAN
- p[sw]=0;
- memcpy(p,data,sc);
-#else
- sc&=0x03;
- for ( ; sw; sw--)
- { c2l(data,l); *(p++)=l; }
- c2l_p(data,l,sc);
- *p=l;
-#endif
- }
- }
-
-void MD5_Transform(MD5_CTX *c, unsigned char *b)
- {
- ULONG p[16];
-#if !defined(L_ENDIAN)
- ULONG *q;
- int i;
-#endif
-
-#if defined(B_ENDIAN) || defined(L_ENDIAN)
- memcpy(p,b,64);
-#ifdef B_ENDIAN
- q=p;
- for (i=(MD5_LBLOCK/4); i; i--)
- {
- Endian_Reverse32(q[0]);
- Endian_Reverse32(q[1]);
- Endian_Reverse32(q[2]);
- Endian_Reverse32(q[3]);
- q+=4;
- }
-#endif
-#else
- q=p;
- for (i=(MD5_LBLOCK/4); i; i--)
- {
- ULONG l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- c2l(b,l); *(q++)=l;
- }
-#endif
- md5_block(c,p,64);
- }
-
-#ifndef MD5_ASM
-
-static void md5_block(MD5_CTX *c, register ULONG *X, int num)
- {
- register ULONG A,B,C,D;
+ register unsigned long A,B,C,D;
+ /*
+ * In case you wonder why A-D are declared as long and not
+ * as MD5_LONG. Doing so results in slight performance
+ * boost on LP64 architectures. The catch is we don't
+ * really care if 32 MSBs of a 64-bit register get polluted
+ * with eventual overflows as we *save* only 32 LSBs in
+ * *either* case. Now declaring 'em long excuses the compiler
+ * from keeping 32 MSBs zeroed resulting in 13% performance
+ * improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
+ * Well, to be honest it should say that this *prevents*
+ * performance degradation.
+ *
+ * <appro@fy.chalmers.se>
+ */
A=c->A;
B=c->B;
C=c->C;
D=c->D;
- for (;;)
+
+ for (;num--;X+=HASH_LBLOCK)
{
/* Round 0 */
R0(A,B,C,D,X[ 0], 7,0xd76aa478L);
@@ -334,74 +176,127 @@ static void md5_block(MD5_CTX *c, register ULONG *X, int num)
R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
R3(B,C,D,A,X[ 9],21,0xeb86d391L);
- A+=c->A&0xffffffffL;
- B+=c->B&0xffffffffL;
- c->A=A;
- c->B=B;
- C+=c->C&0xffffffffL;
- D+=c->D&0xffffffffL;
- c->C=C;
- c->D=D;
- X+=16;
- num-=64;
- if (num <= 0) break;
+ A = c->A += A;
+ B = c->B += B;
+ C = c->C += C;
+ D = c->D += D;
}
}
#endif
-void MD5_Final(unsigned char *md, MD5_CTX *c)
+#ifndef md5_block_data_order
+void md5_block_data_order (MD5_CTX *c, const unsigned char *data, int num)
{
- register int i,j;
- register ULONG l;
- register ULONG *p;
- static unsigned char end[4]={0x80,0x00,0x00,0x00};
- unsigned char *cp=end;
+ register unsigned long A,B,C,D,l;
+ /*
+ * In case you wonder why A-D are declared as long and not
+ * as MD5_LONG. Doing so results in slight performance
+ * boost on LP64 architectures. The catch is we don't
+ * really care if 32 MSBs of a 64-bit register get polluted
+ * with eventual overflows as we *save* only 32 LSBs in
+ * *either* case. Now declaring 'em long excuses the compiler
+ * from keeping 32 MSBs zeroed resulting in 13% performance
+ * improvement under SPARC Solaris7/64 and 5% under AlphaLinux.
+ * Well, to be honest it should say that this *prevents*
+ * performance degradation.
+ *
+ * <appro@fy.chalmers.se>
+ */
+ MD5_LONG X[MD5_LBLOCK];
+ /*
+ * In case you wonder why don't I use c->data for this.
+ * RISCs usually have a handful of registers and if X is
+ * declared as automatic array good optimizing compiler
+ * shall accomodate at least part of it in register bank
+ * instead of memory.
+ *
+ * <appro@fy.chalmers.se>
+ */
- /* c->num should definitly have room for at least one more byte. */
- p=c->data;
- j=c->num;
- i=j>>2;
+ A=c->A;
+ B=c->B;
+ C=c->C;
+ D=c->D;
- /* purify often complains about the following line as an
- * Uninitialized Memory Read. While this can be true, the
- * following p_c2l macro will reset l when that case is true.
- * This is because j&0x03 contains the number of 'valid' bytes
- * already in p[i]. If and only if j&0x03 == 0, the UMR will
- * occur but this is also the only time p_c2l will do
- * l= *(cp++) instead of l|= *(cp++)
- * Many thanks to Alex Tang <altitude@cic.net> for pickup this
- * 'potential bug' */
-#ifdef PURIFY
- if ((j&0x03) == 0) p[i]=0;
-#endif
- l=p[i];
- p_c2l(cp,l,j&0x03);
- p[i]=l;
- i++;
- /* i is the next 'undefined word' */
- if (c->num >= MD5_LAST_BLOCK)
+ for (;num--;)
{
- for (; i<MD5_LBLOCK; i++)
- p[i]=0;
- md5_block(c,p,64);
- i=0;
- }
- for (; i<(MD5_LBLOCK-2); i++)
- p[i]=0;
- p[MD5_LBLOCK-2]=c->Nl;
- p[MD5_LBLOCK-1]=c->Nh;
- md5_block(c,p,64);
- cp=md;
- l=c->A; l2c(l,cp);
- l=c->B; l2c(l,cp);
- l=c->C; l2c(l,cp);
- l=c->D; l2c(l,cp);
+ HOST_c2l(data,l); X[ 0]=l; HOST_c2l(data,l); X[ 1]=l;
+ /* Round 0 */
+ R0(A,B,C,D,X[ 0], 7,0xd76aa478L); HOST_c2l(data,l); X[ 2]=l;
+ R0(D,A,B,C,X[ 1],12,0xe8c7b756L); HOST_c2l(data,l); X[ 3]=l;
+ R0(C,D,A,B,X[ 2],17,0x242070dbL); HOST_c2l(data,l); X[ 4]=l;
+ R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); HOST_c2l(data,l); X[ 5]=l;
+ R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); HOST_c2l(data,l); X[ 6]=l;
+ R0(D,A,B,C,X[ 5],12,0x4787c62aL); HOST_c2l(data,l); X[ 7]=l;
+ R0(C,D,A,B,X[ 6],17,0xa8304613L); HOST_c2l(data,l); X[ 8]=l;
+ R0(B,C,D,A,X[ 7],22,0xfd469501L); HOST_c2l(data,l); X[ 9]=l;
+ R0(A,B,C,D,X[ 8], 7,0x698098d8L); HOST_c2l(data,l); X[10]=l;
+ R0(D,A,B,C,X[ 9],12,0x8b44f7afL); HOST_c2l(data,l); X[11]=l;
+ R0(C,D,A,B,X[10],17,0xffff5bb1L); HOST_c2l(data,l); X[12]=l;
+ R0(B,C,D,A,X[11],22,0x895cd7beL); HOST_c2l(data,l); X[13]=l;
+ R0(A,B,C,D,X[12], 7,0x6b901122L); HOST_c2l(data,l); X[14]=l;
+ R0(D,A,B,C,X[13],12,0xfd987193L); HOST_c2l(data,l); X[15]=l;
+ R0(C,D,A,B,X[14],17,0xa679438eL);
+ R0(B,C,D,A,X[15],22,0x49b40821L);
+ /* Round 1 */
+ R1(A,B,C,D,X[ 1], 5,0xf61e2562L);
+ R1(D,A,B,C,X[ 6], 9,0xc040b340L);
+ R1(C,D,A,B,X[11],14,0x265e5a51L);
+ R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL);
+ R1(A,B,C,D,X[ 5], 5,0xd62f105dL);
+ R1(D,A,B,C,X[10], 9,0x02441453L);
+ R1(C,D,A,B,X[15],14,0xd8a1e681L);
+ R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L);
+ R1(A,B,C,D,X[ 9], 5,0x21e1cde6L);
+ R1(D,A,B,C,X[14], 9,0xc33707d6L);
+ R1(C,D,A,B,X[ 3],14,0xf4d50d87L);
+ R1(B,C,D,A,X[ 8],20,0x455a14edL);
+ R1(A,B,C,D,X[13], 5,0xa9e3e905L);
+ R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L);
+ R1(C,D,A,B,X[ 7],14,0x676f02d9L);
+ R1(B,C,D,A,X[12],20,0x8d2a4c8aL);
+ /* Round 2 */
+ R2(A,B,C,D,X[ 5], 4,0xfffa3942L);
+ R2(D,A,B,C,X[ 8],11,0x8771f681L);
+ R2(C,D,A,B,X[11],16,0x6d9d6122L);
+ R2(B,C,D,A,X[14],23,0xfde5380cL);
+ R2(A,B,C,D,X[ 1], 4,0xa4beea44L);
+ R2(D,A,B,C,X[ 4],11,0x4bdecfa9L);
+ R2(C,D,A,B,X[ 7],16,0xf6bb4b60L);
+ R2(B,C,D,A,X[10],23,0xbebfbc70L);
+ R2(A,B,C,D,X[13], 4,0x289b7ec6L);
+ R2(D,A,B,C,X[ 0],11,0xeaa127faL);
+ R2(C,D,A,B,X[ 3],16,0xd4ef3085L);
+ R2(B,C,D,A,X[ 6],23,0x04881d05L);
+ R2(A,B,C,D,X[ 9], 4,0xd9d4d039L);
+ R2(D,A,B,C,X[12],11,0xe6db99e5L);
+ R2(C,D,A,B,X[15],16,0x1fa27cf8L);
+ R2(B,C,D,A,X[ 2],23,0xc4ac5665L);
+ /* Round 3 */
+ R3(A,B,C,D,X[ 0], 6,0xf4292244L);
+ R3(D,A,B,C,X[ 7],10,0x432aff97L);
+ R3(C,D,A,B,X[14],15,0xab9423a7L);
+ R3(B,C,D,A,X[ 5],21,0xfc93a039L);
+ R3(A,B,C,D,X[12], 6,0x655b59c3L);
+ R3(D,A,B,C,X[ 3],10,0x8f0ccc92L);
+ R3(C,D,A,B,X[10],15,0xffeff47dL);
+ R3(B,C,D,A,X[ 1],21,0x85845dd1L);
+ R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL);
+ R3(D,A,B,C,X[15],10,0xfe2ce6e0L);
+ R3(C,D,A,B,X[ 6],15,0xa3014314L);
+ R3(B,C,D,A,X[13],21,0x4e0811a1L);
+ R3(A,B,C,D,X[ 4], 6,0xf7537e82L);
+ R3(D,A,B,C,X[11],10,0xbd3af235L);
+ R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL);
+ R3(B,C,D,A,X[ 9],21,0xeb86d391L);
- /* clear stuff, md5_block may be leaving some stuff on the stack
- * but I'm not worried :-) */
- c->num=0;
-/* memset((char *)&c,0,sizeof(c));*/
+ A = c->A += A;
+ B = c->B += B;
+ C = c->C += C;
+ D = c->D += D;
+ }
}
+#endif
#ifdef undef
int printit(unsigned long *l)
diff --git a/crypto/md5/md5_locl.h b/crypto/md5/md5_locl.h
index fe7397a495..56708ba8ea 100644
--- a/crypto/md5/md5_locl.h
+++ b/crypto/md5/md5_locl.h
@@ -56,98 +56,79 @@
* [including the GNU Public Licence.]
*/
-/* On sparc, this actually slows things down :-( */
-#if defined(sun)
-#undef B_ENDIAN
-#endif
-
#include <stdlib.h>
#include <string.h>
#include <openssl/md5.h>
-#define ULONG unsigned long
-#define UCHAR unsigned char
-#define UINT unsigned int
-
-#undef c2l
-#define c2l(c,l) (l = ((unsigned long)(*((c)++))) , \
- l|=(((unsigned long)(*((c)++)))<< 8), \
- l|=(((unsigned long)(*((c)++)))<<16), \
- l|=(((unsigned long)(*((c)++)))<<24))
+#ifndef MD5_LONG_LOG2
+#define MD5_LONG_LOG2 2 /* default to 32 bits */
+#endif
-#undef p_c2l
-#define p_c2l(c,l,n) { \
- switch (n) { \
- case 0: l =((unsigned long)(*((c)++))); \
- case 1: l|=((unsigned long)(*((c)++)))<< 8; \
- case 2: l|=((unsigned long)(*((c)++)))<<16; \
- case 3: l|=((unsigned long)(*((c)++)))<<24; \
- } \
- }
+#ifdef MD5_ASM
+# if defined(__i386) || defined(WIN32)
+# define md5_block_host_order md5_block_asm_host_order
+# elif defined(__sparc) && defined(ULTRASPARC)
+ void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,int num);
+# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned
+# endif
+#endif
-/* NOTE the pointer is not incremented at the end of this */
-#undef c2l_p
-#define c2l_p(c,l,n) { \
- l=0; \
- (c)+=n; \
- switch (n) { \
- case 3: l =((unsigned long)(*(--(c))))<<16; \
- case 2: l|=((unsigned long)(*(--(c))))<< 8; \
- case 1: l|=((unsigned long)(*(--(c)))) ; \
- } \
- }
+void md5_block_host_order (MD5_CTX *c, const MD5_LONG *p,int num);
+void md5_block_data_order (MD5_CTX *c, const unsigned char *p,int num);
-#undef p_c2l_p
-#define p_c2l_p(c,l,sc,len) { \
- switch (sc) \
- { \
- case 0: l =((unsigned long)(*((c)++))); \
- if (--len == 0) break; \
- case 1: l|=((unsigned long)(*((c)++)))<< 8; \
- if (--len == 0) break; \
- case 2: l|=((unsigned long)(*((c)++)))<<16; \
- } \
- }
+#if defined(__i386)
+/*
+ * *_block_host_order is expected to handle aligned data while
+ * *_block_data_order - unaligned. As algorithm and host (x86)
+ * are in this case of the same "endianess" these two are
+ * otherwise indistinguishable. But normally you don't want to
+ * call the same function because unaligned access in places
+ * where alignment is expected is usually a "Bad Thing". Indeed,
+ * on RISCs you get punished with BUS ERROR signal or *severe*
+ * performance degradation. Intel CPUs are in turn perfectly
+ * capable of loading unaligned data without such drastic side
+ * effect. Yes, they say it's slower than aligned load, but no
+ * exception is generated and therefore performance degradation
+ * is *incomparable* with RISCs. What we should weight here is
+ * costs of unaligned access against costs of aligning data.
+ * According to my measurements allowing unaligned access results
+ * in ~9% performance improvement on Pentium II operating at
+ * 266MHz. I won't be surprised if the difference will be higher
+ * on faster systems:-)
+ *
+ * <appro@fy.chalmers.se>
+ */
+#define md5_block_data_order md5_block_host_order
+#endif
-#undef l2c
-#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \
- *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
- *((c)++)=(unsigned char)(((l)>>16)&0xff), \
- *((c)++)=(unsigned char)(((l)>>24)&0xff))
+#define DATA_ORDER_IS_LITTLE_ENDIAN
+
+#define HASH_LONG MD5_LONG
+#define HASH_LONG_LOG2 MD5_LONG_LOG2
+#define HASH_CTX MD5_CTX
+#define HASH_CBLOCK MD5_CBLOCK
+#define HASH_LBLOCK MD5_LBLOCK
+#define HASH_UPDATE MD5_Update
+#define HASH_TRANSFORM MD5_Transform
+#define HASH_FINAL MD5_Final
+#define HASH_BLOCK_HOST_ORDER md5_block_host_order
+#if defined(B_ENDIAN) || defined(md5_block_data_order)
+#define HASH_BLOCK_DATA_ORDER md5_block_data_order
+/*
+ * Little-endians (Intel and Alpha) feel better without this.
+ * It looks like memcpy does better job than generic
+ * md5_block_data_order on copying-n-aligning input data.
+ * But franlky speaking I didn't expect such result on Alpha.
+ * On the other hand I've got this with egcs-1.0.2 and if
+ * program is compiled with another (better?) compiler it
+ * might turn out other way around.
+ *
+ * <appro@fy.chalmers.se>
+ */
+#endif
-/* NOTE - c is not incremented as per l2c */
-#undef l2cn
-#define l2cn(l1,l2,c,n) { \
- c+=n; \
- switch (n) { \
- case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \
- case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \
- case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \
- case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \
- case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \
- case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \
- case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \
- case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \
- } \
- }
+#include "../md32_common.h"
-/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */
-#if defined(WIN32)
-/* 5 instructions with rotate instruction, else 9 */
-#define Endian_Reverse32(a) \
- { \
- unsigned long l=(a); \
- (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \
- }
-#else
-/* 6 instructions with rotate instruction, else 8 */
-#define Endian_Reverse32(a) \
- { \
- unsigned long l=(a); \
- l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \
- (a)=ROTATE(l,16L); \
- }
-#endif
/*
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
@@ -162,14 +143,6 @@
#define H(b,c,d) ((b) ^ (c) ^ (d))
#define I(b,c,d) (((~(d)) | (b)) ^ (c))
-#undef ROTATE
-#if defined(WIN32)
-#define ROTATE(a,n) _lrotl(a,n)
-#else
-#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
-#endif
-
-
#define R0(a,b,c,d,k,s,t) { \
a+=((k)+(t)+F((b),(c),(d))); \
a=ROTATE(a,s); \
diff --git a/crypto/md5/md5_one.c b/crypto/md5/md5_one.c
index c761401160..c98721f4d8 100644
--- a/crypto/md5/md5_one.c
+++ b/crypto/md5/md5_one.c
@@ -57,7 +57,8 @@
*/
#include <stdio.h>
-#include "md5_locl.h"
+#include <string.h>
+#include <openssl/md5.h>
unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md)
{