diff options
author | Ulf Möller <ulf@openssl.org> | 1999-05-13 15:16:42 +0200 |
---|---|---|
committer | Ulf Möller <ulf@openssl.org> | 1999-05-13 15:16:42 +0200 |
commit | bd3576d2ddedb0492f5bd3c1e47c15778e4fbe3c (patch) | |
tree | e64a4b13276c3663c3ad9f6f4235909ecadc8852 /crypto/md5 | |
parent | VMS support. (diff) | |
download | openssl-bd3576d2ddedb0492f5bd3c1e47c15778e4fbe3c.tar.xz openssl-bd3576d2ddedb0492f5bd3c1e47c15778e4fbe3c.zip |
Reorganize and speed up MD5.
Submitted by: Andy Polyakov <appro@fy.chalmers.se>
Diffstat (limited to 'crypto/md5')
-rw-r--r-- | crypto/md5/Makefile.ssl | 10 | ||||
-rw-r--r-- | crypto/md5/asm/md5-586.pl | 3 | ||||
-rw-r--r-- | crypto/md5/asm/md5-sparcv9.S | 1035 | ||||
-rw-r--r-- | crypto/md5/md5.h | 36 | ||||
-rw-r--r-- | crypto/md5/md5_dgst.c | 365 | ||||
-rw-r--r-- | crypto/md5/md5_locl.h | 153 | ||||
-rw-r--r-- | crypto/md5/md5_one.c | 3 |
7 files changed, 1269 insertions, 336 deletions
diff --git a/crypto/md5/Makefile.ssl b/crypto/md5/Makefile.ssl index e27cfca0b0..f8eaf628b3 100644 --- a/crypto/md5/Makefile.ssl +++ b/crypto/md5/Makefile.ssl @@ -66,6 +66,14 @@ asm/mx86bsdi.o: asm/mx86unix.cpp asm/mx86unix.cpp: asm/md5-586.pl (cd asm; $(PERL) md5-586.pl cpp >mx86unix.cpp) +# works for both SC and gcc +asm/md5-sparcv8plus.o: asm/md5-sparcv9.S + $(CPP) -DULTRASPARC -DMD5_BLOCK_DATA_ORDER asm/md5-sparcv9.S | as -xarch=v8plus /dev/fd/0 -o asm/md5-sparcv8plus.o + +asm/md5-sparcv9.o: asm/md5-sparcv9.S + $(CC) -xarch=v9 -DULTRASPARC -DMD5_BLOCK_DATA_ORDER -c asm/md5-sparcv9.S -o asm/md5-sparcv9.o + + files: $(PERL) $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO @@ -103,5 +111,5 @@ clean: # DO NOT DELETE THIS LINE -- make depend depends on it. md5_dgst.o: ../../include/openssl/md5.h ../../include/openssl/opensslv.h -md5_dgst.o: md5_locl.h +md5_dgst.o: ../md32_common.h md5_locl.h md5_one.o: ../../include/openssl/md5.h md5_locl.h diff --git a/crypto/md5/asm/md5-586.pl b/crypto/md5/asm/md5-586.pl index 0249e100e1..5fc6a205ce 100644 --- a/crypto/md5/asm/md5-586.pl +++ b/crypto/md5/asm/md5-586.pl @@ -29,7 +29,7 @@ $X="esi"; 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 ); -&md5_block("md5_block_x86"); +&md5_block("md5_block_asm_host_order"); &asm_finish(); sub Np @@ -183,6 +183,7 @@ sub md5_block &mov($X, &wparam(1)); # esi &mov($C, &wparam(2)); &push("ebp"); + &shl($C, 6); &push("ebx"); &add($C, $X); # offset we end at &sub($C, 64); diff --git a/crypto/md5/asm/md5-sparcv9.S b/crypto/md5/asm/md5-sparcv9.S new file mode 100644 index 0000000000..955b3680b5 --- /dev/null +++ b/crypto/md5/asm/md5-sparcv9.S @@ -0,0 +1,1035 @@ +.ident "md5-sparcv9.S, Version 1.0" +.ident "SPARC V9 ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" +.file "md5-sparcv9.S" + +/* + * ==================================================================== + * Copyright (c) 1999 Andy Polyakov <appro@fy.chalmers.se>. + * + * Rights for redistribution and usage in source and binary forms are + * granted as long as above copyright notices are retained. Warranty + * of any kind is (of course:-) disclaimed. + * ==================================================================== + */ + +/* + * This is my modest contribution to OpenSSL project (see + * http://www.openssl.org/ for more information about it) and is an + * assembler implementation of MD5 block hash function. I've hand-coded + * this for the sole reason to reach UltraSPARC-specific "load in + * little-endian byte order" instruction. This gives up to 15% + * performance improvement for cases when input message is aligned at + * 32 bits boundary. The module was tested under both 32 *and* 64 bit + * kernels. For updates see http://fy.chalmers.se/~appro/hpe/. + * + * To compile with SC4.x/SC5.x: + * + * cc -xarch=v[9|8plus] -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \ + * -c md5-sparcv9.S + * + * and with gcc: + * + * gcc -mcpu=ultrasparc -DULTRASPARC -DMD5_BLOCK_DATA_ORDER \ + * -c md5-sparcv9.S + * + * or if above fails (it does if you have gas): + * + * gcc -E -DULTRASPARC -DMD5_BLOCK_DATA_ORDER md5_block.sparc.S | \ + * as -xarch=v8plus /dev/fd/0 -o md5-sparcv9.o + */ + +#define A %o0 +#define B %o1 +#define C %o2 +#define D %o3 +#define T1 %o4 +#define T2 %o5 + +#define R0 %l0 +#define R1 %l1 +#define R2 %l2 +#define R3 %l3 +#define R4 %l4 +#define R5 %l5 +#define R6 %l6 +#define R7 %l7 +#define R8 %i3 +#define R9 %i4 +#define R10 %i5 +#define R11 %g1 +#define R12 %g2 +#define R13 %g3 +#define RX %g4 + +#define Aptr %i0+0 +#define Bptr %i0+4 +#define Cptr %i0+8 +#define Dptr %i0+12 + +#define Aval R5 /* those not used at the end of the last round */ +#define Bval R6 +#define Cval R7 +#define Dval R8 + +#if defined(MD5_BLOCK_DATA_ORDER) +# if defined(ULTRASPARC) +# define LOAD lda +# define X(i) [%i1+i*4]%asi +# define md5_block md5_block_asm_data_order_aligned +# define ASI_PRIMARY_LITTLE 0x88 +# else +# error "MD5_BLOCK_DATA_ORDER is supported only on UltraSPARC!" +# endif +#else +# define LOAD ld +# define X(i) [%i1+i*4] +# define md5_block md5_block_asm_host_order +#endif + +.section ".text",#alloc,#execinstr +#if defined(__SUNPRO_C) && defined(__sparcv9) + /* They've said -xarch=v9 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME -192 +#else +# define FRAME -96 +#endif + +.align 32 + +.global md5_block +md5_block: + save %sp,FRAME,%sp + + ld [Dptr],D +#ifdef ASI_PRIMARY_LITTLE + mov %asi,%o7 ! How dare I? Well, I just do:-) +#else + nop +#endif + ld [Cptr],C +#ifdef ASI_PRIMARY_LITTLE + mov ASI_PRIMARY_LITTLE,%asi +#else + nop +#endif + ld [Bptr],B + nop + ld [Aptr],A + nop + LOAD X(0),R0 + nop + ba .Lmd5_block_loop + nop + +.align 32 +.Lmd5_block_loop: + +!!!!!!!!Round 0 + + xor C,D,T1 + sethi %hi(0xd76aa478),T2 + and T1,B,T1 + or T2,%lo(0xd76aa478),T2 != + xor T1,D,T1 + add T1,R0,T1 + LOAD X(1),R1 + add T1,T2,T1 != + add A,T1,A + sll A,7,T2 + srl A,32-7,A + or A,T2,A != + xor B,C,T1 + add A,B,A + + sethi %hi(0xe8c7b756),T2 + and T1,A,T1 != + or T2,%lo(0xe8c7b756),T2 + xor T1,C,T1 + LOAD X(2),R2 + add T1,R1,T1 != + add T1,T2,T1 + add D,T1,D + sll D,12,T2 + srl D,32-12,D != + or D,T2,D + xor A,B,T1 + add D,A,D + + sethi %hi(0x242070db),T2 != + and T1,D,T1 + or T2,%lo(0x242070db),T2 + xor T1,B,T1 + add T1,R2,T1 != + LOAD X(3),R3 + add T1,T2,T1 + add C,T1,C + sll C,17,T2 != + srl C,32-17,C + or C,T2,C + xor D,A,T1 + add C,D,C != + + sethi %hi(0xc1bdceee),T2 + and T1,C,T1 + or T2,%lo(0xc1bdceee),T2 + xor T1,A,T1 != + add T1,R3,T1 + LOAD X(4),R4 + add T1,T2,T1 + add B,T1,B != + sll B,22,T2 + srl B,32-22,B + or B,T2,B + xor C,D,T1 != + add B,C,B + + sethi %hi(0xf57c0faf),T2 + and T1,B,T1 + or T2,%lo(0xf57c0faf),T2 != + xor T1,D,T1 + add T1,R4,T1 + LOAD X(5),R5 + add T1,T2,T1 != + add A,T1,A + sll A,7,T2 + srl A,32-7,A + or A,T2,A != + xor B,C,T1 + add A,B,A + + sethi %hi(0x4787c62a),T2 + and T1,A,T1 != + or T2,%lo(0x4787c62a),T2 + xor T1,C,T1 + LOAD X(6),R6 + add T1,R5,T1 != + add T1,T2,T1 + add D,T1,D + sll D,12,T2 + srl D,32-12,D != + or D,T2,D + xor A,B,T1 + add D,A,D + + sethi %hi(0xa8304613),T2 != + and T1,D,T1 + or T2,%lo(0xa8304613),T2 + xor T1,B,T1 + add T1,R6,T1 != + LOAD X(7),R7 + add T1,T2,T1 + add C,T1,C + sll C,17,T2 != + srl C,32-17,C + or C,T2,C + xor D,A,T1 + add C,D,C != + + sethi %hi(0xfd469501),T2 + and T1,C,T1 + or T2,%lo(0xfd469501),T2 + xor T1,A,T1 != + add T1,R7,T1 + LOAD X(8),R8 + add T1,T2,T1 + add B,T1,B != + sll B,22,T2 + srl B,32-22,B + or B,T2,B + xor C,D,T1 != + add B,C,B + + sethi %hi(0x698098d8),T2 + and T1,B,T1 + or T2,%lo(0x698098d8),T2 != + xor T1,D,T1 + add T1,R8,T1 + LOAD X(9),R9 + add T1,T2,T1 != + add A,T1,A + sll A,7,T2 + srl A,32-7,A + or A,T2,A != + xor B,C,T1 + add A,B,A + + sethi %hi(0x8b44f7af),T2 + and T1,A,T1 != + or T2,%lo(0x8b44f7af),T2 + xor T1,C,T1 + LOAD X(10),R10 + add T1,R9,T1 != + add T1,T2,T1 + add D,T1,D + sll D,12,T2 + srl D,32-12,D != + or D,T2,D + xor A,B,T1 + add D,A,D + + sethi %hi(0xffff5bb1),T2 != + and T1,D,T1 + or T2,%lo(0xffff5bb1),T2 + xor T1,B,T1 + add T1,R10,T1 != + LOAD X(11),R11 + add T1,T2,T1 + add C,T1,C + sll C,17,T2 != + srl C,32-17,C + or C,T2,C + xor D,A,T1 + add C,D,C != + + sethi %hi(0x895cd7be),T2 + and T1,C,T1 + or T2,%lo(0x895cd7be),T2 + xor T1,A,T1 != + add T1,R11,T1 + LOAD X(12),R12 + add T1,T2,T1 + add B,T1,B != + sll B,22,T2 + srl B,32-22,B + or B,T2,B + xor C,D,T1 != + add B,C,B + + sethi %hi(0x6b901122),T2 + and T1,B,T1 + or T2,%lo(0x6b901122),T2 != + xor T1,D,T1 + add T1,R12,T1 + LOAD X(13),R13 + add T1,T2,T1 != + add A,T1,A + sll A,7,T2 + srl A,32-7,A + or A,T2,A != + xor B,C,T1 + add A,B,A + + sethi %hi(0xfd987193),T2 + and T1,A,T1 != + or T2,%lo(0xfd987193),T2 + xor T1,C,T1 + LOAD X(14),RX + add T1,R13,T1 != + add T1,T2,T1 + add D,T1,D + sll D,12,T2 + srl D,32-12,D != + or D,T2,D + xor A,B,T1 + add D,A,D + + sethi %hi(0xa679438e),T2 != + and T1,D,T1 + or T2,%lo(0xa679438e),T2 + xor T1,B,T1 + add T1,RX,T1 != + LOAD X(15),RX + add T1,T2,T1 + add C,T1,C + sll C,17,T2 != + srl C,32-17,C + or C,T2,C + xor D,A,T1 + add C,D,C != + + sethi %hi(0x49b40821),T2 + and T1,C,T1 + or T2,%lo(0x49b40821),T2 + xor T1,A,T1 != + add T1,RX,T1 + !pre-LOADed X(1),R1 + add T1,T2,T1 + add B,T1,B + sll B,22,T2 != + srl B,32-22,B + or B,T2,B + add B,C,B + +!!!!!!!!Round 1 + + xor B,C,T1 != + sethi %hi(0xf61e2562),T2 + and T1,D,T1 + or T2,%lo(0xf61e2562),T2 + xor T1,C,T1 != + add T1,R1,T1 + !pre-LOADed X(6),R6 + add T1,T2,T1 + add A,T1,A + sll A,5,T2 != + srl A,32-5,A + or A,T2,A + add A,B,A + + xor A,B,T1 != + sethi %hi(0xc040b340),T2 + and T1,C,T1 + or T2,%lo(0xc040b340),T2 + xor T1,B,T1 != + add T1,R6,T1 + !pre-LOADed X(11),R11 + add T1,T2,T1 + add D,T1,D + sll D,9,T2 != + srl D,32-9,D + or D,T2,D + add D,A,D + + xor D,A,T1 != + sethi %hi(0x265e5a51),T2 + and T1,B,T1 + or T2,%lo(0x265e5a51),T2 + xor T1,A,T1 != + add T1,R11,T1 + !pre-LOADed X(0),R0 + add T1,T2,T1 + add C,T1,C + sll C,14,T2 != + srl C,32-14,C + or C,T2,C + add C,D,C + + xor C,D,T1 != + sethi %hi(0xe9b6c7aa),T2 + and T1,A,T1 + or T2,%lo(0xe9b6c7aa),T2 + xor T1,D,T1 != + add T1,R0,T1 + !pre-LOADed X(5),R5 + add T1,T2,T1 + add B,T1,B + sll B,20,T2 != + srl B,32-20,B + or B,T2,B + add B,C,B + + xor B,C,T1 != + sethi %hi(0xd62f105d),T2 + and T1,D,T1 + or T2,%lo(0xd62f105d),T2 + xor T1,C,T1 != + add T1,R5,T1 + !pre-LOADed X(10),R10 + add T1,T2,T1 + add A,T1,A + sll A,5,T2 != + srl A,32-5,A + or A,T2,A + add A,B,A + + xor A,B,T1 != + sethi %hi(0x02441453),T2 + and T1,C,T1 + or T2,%lo(0x02441453),T2 + xor T1,B,T1 != + add T1,R10,T1 + LOAD X(15),RX + add T1,T2,T1 + add D,T1,D != + sll D,9,T2 + srl D,32-9,D + or D,T2,D + add D,A,D != + + xor D,A,T1 + sethi %hi(0xd8a1e681),T2 + and T1,B,T1 + or T2,%lo(0xd8a1e681),T2 != + xor T1,A,T1 + add T1,RX,T1 + !pre-LOADed X(4),R4 + add T1,T2,T1 + add C,T1,C != + sll C,14,T2 + srl C,32-14,C + or C,T2,C + add C,D,C != + + xor C,D,T1 + sethi %hi(0xe7d3fbc8),T2 + and T1,A,T1 + or T2,%lo(0xe7d3fbc8),T2 != + xor T1,D,T1 + add T1,R4,T1 + !pre-LOADed X(9),R9 + add T1,T2,T1 + add B,T1,B != + sll B,20,T2 + srl B,32-20,B + or B,T2,B + add B,C,B != + + xor B,C,T1 + sethi %hi(0x21e1cde6),T2 + and T1,D,T1 + or T2,%lo(0x21e1cde6),T2 != + xor T1,C,T1 + add T1,R9,T1 + LOAD X(14),RX + add T1,T2,T1 != + add A,T1,A + sll A,5,T2 + srl A,32-5,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0xc33707d6),T2 + and T1,C,T1 != + or T2,%lo(0xc33707d6),T2 + xor T1,B,T1 + add T1,RX,T1 + !pre-LOADed X(3),R3 + add T1,T2,T1 != + add D,T1,D + sll D,9,T2 + srl D,32-9,D + or D,T2,D != + add D,A,D + + xor D,A,T1 + sethi %hi(0xf4d50d87),T2 + and T1,B,T1 != + or T2,%lo(0xf4d50d87),T2 + xor T1,A,T1 + add T1,R3,T1 + !pre-LOADed X(8),R8 + add T1,T2,T1 != + add C,T1,C + sll C,14,T2 + srl C,32-14,C + or C,T2,C != + add C,D,C + + xor C,D,T1 + sethi %hi(0x455a14ed),T2 + and T1,A,T1 != + or T2,%lo(0x455a14ed),T2 + xor T1,D,T1 + add T1,R8,T1 + !pre-LOADed X(13),R13 + add T1,T2,T1 != + add B,T1,B + sll B,20,T2 + srl B,32-20,B + or B,T2,B != + add B,C,B + + xor B,C,T1 + sethi %hi(0xa9e3e905),T2 + and T1,D,T1 != + or T2,%lo(0xa9e3e905),T2 + xor T1,C,T1 + add T1,R13,T1 + !pre-LOADed X(2),R2 + add T1,T2,T1 != + add A,T1,A + sll A,5,T2 + srl A,32-5,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0xfcefa3f8),T2 + and T1,C,T1 != + or T2,%lo(0xfcefa3f8),T2 + xor T1,B,T1 + add T1,R2,T1 + !pre-LOADed X(7),R7 + add T1,T2,T1 != + add D,T1,D + sll D,9,T2 + srl D,32-9,D + or D,T2,D != + add D,A,D + + xor D,A,T1 + sethi %hi(0x676f02d9),T2 + and T1,B,T1 != + or T2,%lo(0x676f02d9),T2 + xor T1,A,T1 + add T1,R7,T1 + !pre-LOADed X(12),R12 + add T1,T2,T1 != + add C,T1,C + sll C,14,T2 + srl C,32-14,C + or C,T2,C != + add C,D,C + + xor C,D,T1 + sethi %hi(0x8d2a4c8a),T2 + and T1,A,T1 != + or T2,%lo(0x8d2a4c8a),T2 + xor T1,D,T1 + add T1,R12,T1 + !pre-LOADed X(5),R5 + add T1,T2,T1 != + add B,T1,B + sll B,20,T2 + srl B,32-20,B + or B,T2,B != + add B,C,B + +!!!!!!!!Round 2 + + xor B,C,T1 + sethi %hi(0xfffa3942),T2 + xor T1,D,T1 != + or T2,%lo(0xfffa3942),T2 + add T1,R5,T1 + !pre-LOADed X(8),R8 + add T1,T2,T1 + add A,T1,A != + sll A,4,T2 + srl A,32-4,A + or A,T2,A + add A,B,A != + + xor A,B,T1 + sethi %hi(0x8771f681),T2 + xor T1,C,T1 + or T2,%lo(0x8771f681),T2 != + add T1,R8,T1 + !pre-LOADed X(11),R11 + add T1,T2,T1 + add D,T1,D + sll D,11,T2 != + srl D,32-11,D + or D,T2,D + add D,A,D + + xor D,A,T1 != + sethi %hi(0x6d9d6122),T2 + xor T1,B,T1 + or T2,%lo(0x6d9d6122),T2 + add T1,R11,T1 != + LOAD X(14),RX + add T1,T2,T1 + add C,T1,C + sll C,16,T2 != + srl C,32-16,C + or C,T2,C + add C,D,C + + xor C,D,T1 != + sethi %hi(0xfde5380c),T2 + xor T1,A,T1 + or T2,%lo(0xfde5380c),T2 + add T1,RX,T1 != + !pre-LOADed X(1),R1 + add T1,T2,T1 + add B,T1,B + sll B,23,T2 + srl B,32-23,B != + or B,T2,B + add B,C,B + + xor B,C,T1 + sethi %hi(0xa4beea44),T2 != + xor T1,D,T1 + or T2,%lo(0xa4beea44),T2 + add T1,R1,T1 + !pre-LOADed X(4),R4 + add T1,T2,T1 != + add A,T1,A + sll A,4,T2 + srl A,32-4,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0x4bdecfa9),T2 + xor T1,C,T1 != + or T2,%lo(0x4bdecfa9),T2 + add T1,R4,T1 + !pre-LOADed X(7),R7 + add T1,T2,T1 + add D,T1,D != + sll D,11,T2 + srl D,32-11,D + or D,T2,D + add D,A,D != + + xor D,A,T1 + sethi %hi(0xf6bb4b60),T2 + xor T1,B,T1 + or T2,%lo(0xf6bb4b60),T2 != + add T1,R7,T1 + !pre-LOADed X(10),R10 + add T1,T2,T1 + add C,T1,C + sll C,16,T2 != + srl C,32-16,C + or C,T2,C + add C,D,C + + xor C,D,T1 != + sethi %hi(0xbebfbc70),T2 + xor T1,A,T1 + or T2,%lo(0xbebfbc70),T2 + add T1,R10,T1 != + !pre-LOADed X(13),R13 + add T1,T2,T1 + add B,T1,B + sll B,23,T2 + srl B,32-23,B != + or B,T2,B + add B,C,B + + xor B,C,T1 + sethi %hi(0x289b7ec6),T2 != + xor T1,D,T1 + or T2,%lo(0x289b7ec6),T2 + add T1,R13,T1 + !pre-LOADed X(0),R0 + add T1,T2,T1 != + add A,T1,A + sll A,4,T2 + srl A,32-4,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0xeaa127fa),T2 + xor T1,C,T1 != + or T2,%lo(0xeaa127fa),T2 + add T1,R0,T1 + !pre-LOADed X(3),R3 + add T1,T2,T1 + add D,T1,D != + sll D,11,T2 + srl D,32-11,D + or D,T2,D + add D,A,D != + + xor D,A,T1 + sethi %hi(0xd4ef3085),T2 + xor T1,B,T1 + or T2,%lo(0xd4ef3085),T2 != + add T1,R3,T1 + !pre-LOADed X(6),R6 + add T1,T2,T1 + add C,T1,C + sll C,16,T2 != + srl C,32-16,C + or C,T2,C + add C,D,C + + xor C,D,T1 != + sethi %hi(0x04881d05),T2 + xor T1,A,T1 + or T2,%lo(0x04881d05),T2 + add T1,R6,T1 != + !pre-LOADed X(9),R9 + add T1,T2,T1 + add B,T1,B + sll B,23,T2 + srl B,32-23,B != + or B,T2,B + add B,C,B + + xor B,C,T1 + sethi %hi(0xd9d4d039),T2 != + xor T1,D,T1 + or T2,%lo(0xd9d4d039),T2 + add T1,R9,T1 + !pre-LOADed X(12),R12 + add T1,T2,T1 != + add A,T1,A + sll A,4,T2 + srl A,32-4,A + or A,T2,A != + add A,B,A + + xor A,B,T1 + sethi %hi(0xe6db99e5),T2 + xor T1,C,T1 != + or T2,%lo(0xe6db99e5),T2 + add T1,R12,T1 + LOAD X(15),RX + add T1,T2,T1 != + add D,T1,D + sll D,11,T2 + srl D,32-11,D + or D,T2,D != + add D,A,D + + xor D,A,T1 + sethi %hi(0x1fa27cf8),T2 + xor T1,B,T1 != + or T2,%lo(0x1fa27cf8),T2 + add T1,RX,T1 + !pre-LOADed X(2),R2 + add T1,T2,T1 + add C,T1,C != + sll C,16,T2 + srl C,32-16,C + or C,T2,C + add C,D,C != + + xor C,D,T1 + sethi %hi(0xc4ac5665),T2 + xor T1,A,T1 + or T2,%lo(0xc4ac5665),T2 != + add T1,R2,T1 + !pre-LOADed X(0),R0 + add T1,T2,T1 + add B,T1,B + sll B,23,T2 != + srl B,32-23,B + or B,T2,B + add B,C,B + +!!!!!!!!Round 3 + + orn B,D,T1 != + sethi %hi(0xf4292244),T2 + xor T1,C,T1 + or T2,%lo(0xf4292244),T2 + add T1,R0,T1 != + !pre-LOADed X(7),R7 + add T1,T2,T1 + add A,T1,A + sll A,6,T2 + srl A,32-6,A != + or A,T2,A + add A,B,A + + orn A,C,T1 + sethi %hi(0x432aff97),T2 != + xor T1,B,T1 + or T2,%lo(0x432aff97),T2 + LOAD X(14),RX + add T1,R7,T1 != + add T1,T2,T1 + add D,T1,D + sll D,10,T2 + srl D,32-10,D != + or D,T2,D + add D,A,D + + orn D,B,T1 + sethi %hi(0xab9423a7),T2 != + xor T1,A,T1 + or T2,%lo(0xab9423a7),T2 + add T1,RX,T1 + !pre-LOADed X(5),R5 + add T1,T2,T1 != + add C,T1,C + sll C,15,T2 + srl C,32-15,C + or C,T2,C != + add C,D,C + + orn C,A,T1 + sethi %hi(0xfc93a039),T2 + xor T1,D,T1 != + or T2,%lo(0xfc93a039),T2 + add T1,R5,T1 + !pre-LOADed X(12),R12 + add T1,T2,T1 + add B,T1,B != + sll B,21,T2 + srl B,32-21,B + or B,T2,B + add B,C,B != + + orn B,D,T1 + sethi %hi(0x655b59c3),T2 + xor T1,C,T1 + or T2,%lo(0x655b59c3),T2 != + add T1,R12,T1 + !pre-LOADed X(3),R3 + add T1,T2,T1 + add A,T1,A + sll A,6,T2 != + srl A,32-6,A + or A,T2,A + add A,B,A + + orn A,C,T1 != + sethi %hi(0x8f0ccc92),T2 + xor T1,B,T1 + or T2,%lo(0x8f0ccc92),T2 + add T1,R3,T1 != + !pre-LOADed X(10),R10 + add T1,T2,T1 + add D,T1,D + sll D,10,T2 + srl D,32-10,D != + or D,T2,D + add D,A,D + + orn D,B,T1 + sethi %hi(0xffeff47d),T2 != + xor T1,A,T1 + or T2,%lo(0xffeff47d),T2 + add T1,R10,T1 + !pre-LOADed X(1),R1 + add T1,T2,T1 != + add C,T1,C + sll C,15,T2 + srl C,32-15,C + or C,T2,C != + add C,D,C + + orn C,A,T1 + sethi %hi(0x85845dd1),T2 + xor T1,D,T1 != + or T2,%lo(0x85845dd1),T2 + add T1,R1,T1 + !pre-LOADed X(8),R8 + add T1,T2,T1 + add B,T1,B != + sll B,21,T2 + srl B,32-21,B + or B,T2,B + add B,C,B != + + orn B,D,T1 + sethi %hi(0x6fa87e4f),T2 + xor T1,C,T1 + or T2,%lo(0x6fa87e4f),T2 != + add T1,R8,T1 + LOAD X(15),RX + add T1,T2,T1 + add A,T1,A != + sll A,6,T2 + srl A,32-6,A + or A,T2,A + add A,B,A != + + orn A,C,T1 + sethi %hi(0xfe2ce6e0),T2 + xor T1,B,T1 + or T2,%lo(0xfe2ce6e0),T2 != + add T1,RX,T1 + !pre-LOADed X(6),R6 + add T1,T2,T1 + add D,T1,D + sll D,10,T2 != + srl D,32-10,D + or D,T2,D + add D,A,D + + orn D,B,T1 != + sethi %hi(0xa3014314),T2 + xor T1,A,T1 + or T2,%lo(0xa3014314),T2 + add T1,R6,T1 != + !pre-LOADed X(13),R13 + add T1,T2,T1 + add C,T1,C + sll C,15,T2 + srl C,32-15,C != + or C,T2,C + add C,D,C + + orn C,A,T1 + sethi %hi(0x4e0811a1),T2 != + xor T1,D,T1 + or T2,%lo(0x4e0811a1),T2 + !pre-LOADed X(4),R4 + ld [Aptr],Aval + add T1,R13,T1 != + add T1,T2,T1 + add B,T1,B + sll B,21,T2 + srl B,32-21,B != + or B,T2,B + add B,C,B + + orn B,D,T1 + sethi %hi(0xf7537e82),T2 != + xor T1,C,T1 + or T2,%lo(0xf7537e82),T2 + !pre-LOADed X(11),R11 + ld [Dptr],Dval + add T1,R4,T1 != + add T1,T2,T1 + add A,T1,A + sll A,6,T2 + srl A,32-6,A != + or A,T2,A + add A,B,A + + orn A,C,T1 + sethi %hi(0xbd3af235),T2 != + xor T1,B,T1 + or T2,%lo(0xbd3af235),T2 + !pre-LOADed X(2),R2 + ld [Cptr],Cval + add T1,R11,T1 != + add T1,T2,T1 + add D,T1,D + sll D,10,T2 + srl D,32-10,D != + or D,T2,D + add D,A,D + + orn D,B,T1 + sethi %hi(0x2ad7d2bb),T2 != + xor T1,A,T1 + or T2,%lo(0x2ad7d2bb),T2 + !pre-LOADed X(9),R9 + ld [Bptr],Bval + add T1,R2,T1 != + add Aval,A,Aval + add T1,T2,T1 + st Aval,[Aptr] + add C,T1,C != + sll C,15,T2 + add Dval,D,Dval + srl C,32-15,C + or C,T2,C != + st Dval,[Dptr] + add C,D,C + + orn C,A,T1 + sethi %hi(0xeb86d391),T2 != + xor T1,D,T1 + or T2,%lo(0xeb86d391),T2 + add T1,R9,T1 + !pre-LOADed X(0),R0 + mov Aval,A != + add T1,T2,T1 + mov Dval,D + add B,T1,B + sll B,21,T2 != + add Cval,C,Cval + srl B,32-21,B + st Cval,[Cptr] + or B,T2,B != + add B,C,B + + deccc %i2 + mov Cval,C + add B,Bval,B != + inc 64,%i1 + nop + st B,[Bptr] + nop != + +#ifdef ULTRASPARC + bg,a,pt %icc,.Lmd5_block_loop +#else + bg,a .Lmd5_block_loop +#endif + LOAD X(0),R0 + +#ifdef ASI_PRIMARY_LITTLE + mov %o7,%asi +#endif + ret + restore %g0,0,%o0 + +.type md5_block,#function +.size md5_block,(.-md5_block) diff --git a/crypto/md5/md5.h b/crypto/md5/md5.h index 6e97fe1e4f..148fb963d4 100644 --- a/crypto/md5/md5.h +++ b/crypto/md5/md5.h @@ -67,23 +67,43 @@ extern "C" { #error MD5 is disabled. #endif +/* + * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + * ! MD5_LONG has to be at least 32 bits wide. If it's wider, then ! + * ! MD5_LONG_LOG2 has to be defined along. ! + * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + */ + +#if defined(WIN16) || defined(__LP32__) +#define MD5_LONG unsigned long +#elif defined(_CRAY) || defined(__ILP64__) +#define MD5_LONG unsigned long +#define MD5_LONG_LOG2 3 +/* + * _CRAY note. I could declare short, but I have no idea what impact + * does it have on performance on none-T3E machines. I could declare + * int, but at least on C90 sizeof(int) can be chosen at compile time. + * So I've chosen long... + * <appro@fy.chalmers.se> + */ +#else +#define MD5_LONG unsigned int +#endif + #define MD5_CBLOCK 64 -#define MD5_LBLOCK 16 -#define MD5_BLOCK 16 -#define MD5_LAST_BLOCK 56 -#define MD5_LENGTH_BLOCK 8 +#define MD5_LBLOCK (MD5_CBLOCK/4) #define MD5_DIGEST_LENGTH 16 typedef struct MD5state_st { - unsigned long A,B,C,D; - unsigned long Nl,Nh; - unsigned long data[MD5_LBLOCK]; + MD5_LONG A,B,C,D; + MD5_LONG Nl,Nh; + MD5_LONG data[MD5_LBLOCK]; int num; } MD5_CTX; void MD5_Init(MD5_CTX *c); -void MD5_Update(MD5_CTX *c, const void *data, unsigned long len); +void MD5_Update(MD5_CTX *c, const unsigned char *data, unsigned long len); void MD5_Final(unsigned char *md, MD5_CTX *c); unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md); void MD5_Transform(MD5_CTX *c, unsigned char *b); diff --git a/crypto/md5/md5_dgst.c b/crypto/md5/md5_dgst.c index 2671b00172..830e04d02a 100644 --- a/crypto/md5/md5_dgst.c +++ b/crypto/md5/md5_dgst.c @@ -70,12 +70,6 @@ char *MD5_version="MD5" OPENSSL_VERSION_PTEXT; #define INIT_DATA_C (unsigned long)0x98badcfeL #define INIT_DATA_D (unsigned long)0x10325476L -# ifdef MD5_ASM - void md5_block_x86(MD5_CTX *c, unsigned long *p,int num); -# define md5_block md5_block_x86 -# else - static void md5_block(MD5_CTX *c, unsigned long *p,int num); -# endif void MD5_Init(MD5_CTX *c) { c->A=INIT_DATA_A; @@ -87,183 +81,31 @@ void MD5_Init(MD5_CTX *c) c->num=0; } -void MD5_Update(MD5_CTX *c, const void *_data, unsigned long len) +#ifndef md5_block_host_order +void md5_block_host_order (MD5_CTX *c, const MD5_LONG *X, int num) { - register const unsigned char *data=_data; - register ULONG *p; - int sw,sc; - ULONG l; - - if (len == 0) return; - - l=(c->Nl+(len<<3))&0xffffffffL; - /* 95-05-24 eay Fixed a bug with the overflow handling, thanks to - * Wei Dai <weidai@eskimo.com> for pointing it out. */ - if (l < c->Nl) /* overflow */ - c->Nh++; - c->Nh+=(len>>29); - c->Nl=l; - - if (c->num != 0) - { - p=c->data; - sw=c->num>>2; - sc=c->num&0x03; - - if ((c->num+len) >= MD5_CBLOCK) - { - l= p[sw]; - p_c2l(data,l,sc); - p[sw++]=l; - for (; sw<MD5_LBLOCK; sw++) - { - c2l(data,l); - p[sw]=l; - } - len-=(MD5_CBLOCK-c->num); - - md5_block(c,p,64); - c->num=0; - /* drop through and do the rest */ - } - else - { - int ew,ec; - - c->num+=(int)len; - if ((sc+len) < 4) /* ugly, add char's to a word */ - { - l= p[sw]; - p_c2l_p(data,l,sc,len); - p[sw]=l; - } - else - { - ew=(c->num>>2); - ec=(c->num&0x03); - l= p[sw]; - p_c2l(data,l,sc); - p[sw++]=l; - for (; sw < ew; sw++) - { c2l(data,l); p[sw]=l; } - if (ec) - { - c2l_p(data,l,ec); - p[sw]=l; - } - } - return; - } - } - /* we now can process the input data in blocks of MD5_CBLOCK - * chars and save the leftovers to c->data. */ -#ifdef L_ENDIAN - if ((((unsigned long)data)%sizeof(ULONG)) == 0) - { - sw=(int)len/MD5_CBLOCK; - if (sw > 0) - { - sw*=MD5_CBLOCK; - md5_block(c,(ULONG *)data,sw); - data+=sw; - len-=sw; - } - } -#endif - p=c->data; - while (len >= MD5_CBLOCK) - { -#if defined(L_ENDIAN) || defined(B_ENDIAN) - if (p != (unsigned long *)data) - memcpy(p,data,MD5_CBLOCK); - data+=MD5_CBLOCK; -#ifdef B_ENDIAN - for (sw=(MD5_LBLOCK/4); sw; sw--) - { - Endian_Reverse32(p[0]); - Endian_Reverse32(p[1]); - Endian_Reverse32(p[2]); - Endian_Reverse32(p[3]); - p+=4; - } -#endif -#else - for (sw=(MD5_LBLOCK/4); sw; sw--) - { - c2l(data,l); *(p++)=l; - c2l(data,l); *(p++)=l; - c2l(data,l); *(p++)=l; - c2l(data,l); *(p++)=l; - } -#endif - p=c->data; - md5_block(c,p,64); - len-=MD5_CBLOCK; - } - sc=(int)len; - c->num=sc; - if (sc) - { - sw=sc>>2; /* words to copy */ -#ifdef L_ENDIAN - p[sw]=0; - memcpy(p,data,sc); -#else - sc&=0x03; - for ( ; sw; sw--) - { c2l(data,l); *(p++)=l; } - c2l_p(data,l,sc); - *p=l; -#endif - } - } - -void MD5_Transform(MD5_CTX *c, unsigned char *b) - { - ULONG p[16]; -#if !defined(L_ENDIAN) - ULONG *q; - int i; -#endif - -#if defined(B_ENDIAN) || defined(L_ENDIAN) - memcpy(p,b,64); -#ifdef B_ENDIAN - q=p; - for (i=(MD5_LBLOCK/4); i; i--) - { - Endian_Reverse32(q[0]); - Endian_Reverse32(q[1]); - Endian_Reverse32(q[2]); - Endian_Reverse32(q[3]); - q+=4; - } -#endif -#else - q=p; - for (i=(MD5_LBLOCK/4); i; i--) - { - ULONG l; - c2l(b,l); *(q++)=l; - c2l(b,l); *(q++)=l; - c2l(b,l); *(q++)=l; - c2l(b,l); *(q++)=l; - } -#endif - md5_block(c,p,64); - } - -#ifndef MD5_ASM - -static void md5_block(MD5_CTX *c, register ULONG *X, int num) - { - register ULONG A,B,C,D; + register unsigned long A,B,C,D; + /* + * In case you wonder why A-D are declared as long and not + * as MD5_LONG. Doing so results in slight performance + * boost on LP64 architectures. The catch is we don't + * really care if 32 MSBs of a 64-bit register get polluted + * with eventual overflows as we *save* only 32 LSBs in + * *either* case. Now declaring 'em long excuses the compiler + * from keeping 32 MSBs zeroed resulting in 13% performance + * improvement under SPARC Solaris7/64 and 5% under AlphaLinux. + * Well, to be honest it should say that this *prevents* + * performance degradation. + * + * <appro@fy.chalmers.se> + */ A=c->A; B=c->B; C=c->C; D=c->D; - for (;;) + + for (;num--;X+=HASH_LBLOCK) { /* Round 0 */ R0(A,B,C,D,X[ 0], 7,0xd76aa478L); @@ -334,74 +176,127 @@ static void md5_block(MD5_CTX *c, register ULONG *X, int num) R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL); R3(B,C,D,A,X[ 9],21,0xeb86d391L); - A+=c->A&0xffffffffL; - B+=c->B&0xffffffffL; - c->A=A; - c->B=B; - C+=c->C&0xffffffffL; - D+=c->D&0xffffffffL; - c->C=C; - c->D=D; - X+=16; - num-=64; - if (num <= 0) break; + A = c->A += A; + B = c->B += B; + C = c->C += C; + D = c->D += D; } } #endif -void MD5_Final(unsigned char *md, MD5_CTX *c) +#ifndef md5_block_data_order +void md5_block_data_order (MD5_CTX *c, const unsigned char *data, int num) { - register int i,j; - register ULONG l; - register ULONG *p; - static unsigned char end[4]={0x80,0x00,0x00,0x00}; - unsigned char *cp=end; + register unsigned long A,B,C,D,l; + /* + * In case you wonder why A-D are declared as long and not + * as MD5_LONG. Doing so results in slight performance + * boost on LP64 architectures. The catch is we don't + * really care if 32 MSBs of a 64-bit register get polluted + * with eventual overflows as we *save* only 32 LSBs in + * *either* case. Now declaring 'em long excuses the compiler + * from keeping 32 MSBs zeroed resulting in 13% performance + * improvement under SPARC Solaris7/64 and 5% under AlphaLinux. + * Well, to be honest it should say that this *prevents* + * performance degradation. + * + * <appro@fy.chalmers.se> + */ + MD5_LONG X[MD5_LBLOCK]; + /* + * In case you wonder why don't I use c->data for this. + * RISCs usually have a handful of registers and if X is + * declared as automatic array good optimizing compiler + * shall accomodate at least part of it in register bank + * instead of memory. + * + * <appro@fy.chalmers.se> + */ - /* c->num should definitly have room for at least one more byte. */ - p=c->data; - j=c->num; - i=j>>2; + A=c->A; + B=c->B; + C=c->C; + D=c->D; - /* purify often complains about the following line as an - * Uninitialized Memory Read. While this can be true, the - * following p_c2l macro will reset l when that case is true. - * This is because j&0x03 contains the number of 'valid' bytes - * already in p[i]. If and only if j&0x03 == 0, the UMR will - * occur but this is also the only time p_c2l will do - * l= *(cp++) instead of l|= *(cp++) - * Many thanks to Alex Tang <altitude@cic.net> for pickup this - * 'potential bug' */ -#ifdef PURIFY - if ((j&0x03) == 0) p[i]=0; -#endif - l=p[i]; - p_c2l(cp,l,j&0x03); - p[i]=l; - i++; - /* i is the next 'undefined word' */ - if (c->num >= MD5_LAST_BLOCK) + for (;num--;) { - for (; i<MD5_LBLOCK; i++) - p[i]=0; - md5_block(c,p,64); - i=0; - } - for (; i<(MD5_LBLOCK-2); i++) - p[i]=0; - p[MD5_LBLOCK-2]=c->Nl; - p[MD5_LBLOCK-1]=c->Nh; - md5_block(c,p,64); - cp=md; - l=c->A; l2c(l,cp); - l=c->B; l2c(l,cp); - l=c->C; l2c(l,cp); - l=c->D; l2c(l,cp); + HOST_c2l(data,l); X[ 0]=l; HOST_c2l(data,l); X[ 1]=l; + /* Round 0 */ + R0(A,B,C,D,X[ 0], 7,0xd76aa478L); HOST_c2l(data,l); X[ 2]=l; + R0(D,A,B,C,X[ 1],12,0xe8c7b756L); HOST_c2l(data,l); X[ 3]=l; + R0(C,D,A,B,X[ 2],17,0x242070dbL); HOST_c2l(data,l); X[ 4]=l; + R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); HOST_c2l(data,l); X[ 5]=l; + R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); HOST_c2l(data,l); X[ 6]=l; + R0(D,A,B,C,X[ 5],12,0x4787c62aL); HOST_c2l(data,l); X[ 7]=l; + R0(C,D,A,B,X[ 6],17,0xa8304613L); HOST_c2l(data,l); X[ 8]=l; + R0(B,C,D,A,X[ 7],22,0xfd469501L); HOST_c2l(data,l); X[ 9]=l; + R0(A,B,C,D,X[ 8], 7,0x698098d8L); HOST_c2l(data,l); X[10]=l; + R0(D,A,B,C,X[ 9],12,0x8b44f7afL); HOST_c2l(data,l); X[11]=l; + R0(C,D,A,B,X[10],17,0xffff5bb1L); HOST_c2l(data,l); X[12]=l; + R0(B,C,D,A,X[11],22,0x895cd7beL); HOST_c2l(data,l); X[13]=l; + R0(A,B,C,D,X[12], 7,0x6b901122L); HOST_c2l(data,l); X[14]=l; + R0(D,A,B,C,X[13],12,0xfd987193L); HOST_c2l(data,l); X[15]=l; + R0(C,D,A,B,X[14],17,0xa679438eL); + R0(B,C,D,A,X[15],22,0x49b40821L); + /* Round 1 */ + R1(A,B,C,D,X[ 1], 5,0xf61e2562L); + R1(D,A,B,C,X[ 6], 9,0xc040b340L); + R1(C,D,A,B,X[11],14,0x265e5a51L); + R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL); + R1(A,B,C,D,X[ 5], 5,0xd62f105dL); + R1(D,A,B,C,X[10], 9,0x02441453L); + R1(C,D,A,B,X[15],14,0xd8a1e681L); + R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L); + R1(A,B,C,D,X[ 9], 5,0x21e1cde6L); + R1(D,A,B,C,X[14], 9,0xc33707d6L); + R1(C,D,A,B,X[ 3],14,0xf4d50d87L); + R1(B,C,D,A,X[ 8],20,0x455a14edL); + R1(A,B,C,D,X[13], 5,0xa9e3e905L); + R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L); + R1(C,D,A,B,X[ 7],14,0x676f02d9L); + R1(B,C,D,A,X[12],20,0x8d2a4c8aL); + /* Round 2 */ + R2(A,B,C,D,X[ 5], 4,0xfffa3942L); + R2(D,A,B,C,X[ 8],11,0x8771f681L); + R2(C,D,A,B,X[11],16,0x6d9d6122L); + R2(B,C,D,A,X[14],23,0xfde5380cL); + R2(A,B,C,D,X[ 1], 4,0xa4beea44L); + R2(D,A,B,C,X[ 4],11,0x4bdecfa9L); + R2(C,D,A,B,X[ 7],16,0xf6bb4b60L); + R2(B,C,D,A,X[10],23,0xbebfbc70L); + R2(A,B,C,D,X[13], 4,0x289b7ec6L); + R2(D,A,B,C,X[ 0],11,0xeaa127faL); + R2(C,D,A,B,X[ 3],16,0xd4ef3085L); + R2(B,C,D,A,X[ 6],23,0x04881d05L); + R2(A,B,C,D,X[ 9], 4,0xd9d4d039L); + R2(D,A,B,C,X[12],11,0xe6db99e5L); + R2(C,D,A,B,X[15],16,0x1fa27cf8L); + R2(B,C,D,A,X[ 2],23,0xc4ac5665L); + /* Round 3 */ + R3(A,B,C,D,X[ 0], 6,0xf4292244L); + R3(D,A,B,C,X[ 7],10,0x432aff97L); + R3(C,D,A,B,X[14],15,0xab9423a7L); + R3(B,C,D,A,X[ 5],21,0xfc93a039L); + R3(A,B,C,D,X[12], 6,0x655b59c3L); + R3(D,A,B,C,X[ 3],10,0x8f0ccc92L); + R3(C,D,A,B,X[10],15,0xffeff47dL); + R3(B,C,D,A,X[ 1],21,0x85845dd1L); + R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL); + R3(D,A,B,C,X[15],10,0xfe2ce6e0L); + R3(C,D,A,B,X[ 6],15,0xa3014314L); + R3(B,C,D,A,X[13],21,0x4e0811a1L); + R3(A,B,C,D,X[ 4], 6,0xf7537e82L); + R3(D,A,B,C,X[11],10,0xbd3af235L); + R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL); + R3(B,C,D,A,X[ 9],21,0xeb86d391L); - /* clear stuff, md5_block may be leaving some stuff on the stack - * but I'm not worried :-) */ - c->num=0; -/* memset((char *)&c,0,sizeof(c));*/ + A = c->A += A; + B = c->B += B; + C = c->C += C; + D = c->D += D; + } } +#endif #ifdef undef int printit(unsigned long *l) diff --git a/crypto/md5/md5_locl.h b/crypto/md5/md5_locl.h index fe7397a495..56708ba8ea 100644 --- a/crypto/md5/md5_locl.h +++ b/crypto/md5/md5_locl.h @@ -56,98 +56,79 @@ * [including the GNU Public Licence.] */ -/* On sparc, this actually slows things down :-( */ -#if defined(sun) -#undef B_ENDIAN -#endif - #include <stdlib.h> #include <string.h> #include <openssl/md5.h> -#define ULONG unsigned long -#define UCHAR unsigned char -#define UINT unsigned int - -#undef c2l -#define c2l(c,l) (l = ((unsigned long)(*((c)++))) , \ - l|=(((unsigned long)(*((c)++)))<< 8), \ - l|=(((unsigned long)(*((c)++)))<<16), \ - l|=(((unsigned long)(*((c)++)))<<24)) +#ifndef MD5_LONG_LOG2 +#define MD5_LONG_LOG2 2 /* default to 32 bits */ +#endif -#undef p_c2l -#define p_c2l(c,l,n) { \ - switch (n) { \ - case 0: l =((unsigned long)(*((c)++))); \ - case 1: l|=((unsigned long)(*((c)++)))<< 8; \ - case 2: l|=((unsigned long)(*((c)++)))<<16; \ - case 3: l|=((unsigned long)(*((c)++)))<<24; \ - } \ - } +#ifdef MD5_ASM +# if defined(__i386) || defined(WIN32) +# define md5_block_host_order md5_block_asm_host_order +# elif defined(__sparc) && defined(ULTRASPARC) + void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,int num); +# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned +# endif +#endif -/* NOTE the pointer is not incremented at the end of this */ -#undef c2l_p -#define c2l_p(c,l,n) { \ - l=0; \ - (c)+=n; \ - switch (n) { \ - case 3: l =((unsigned long)(*(--(c))))<<16; \ - case 2: l|=((unsigned long)(*(--(c))))<< 8; \ - case 1: l|=((unsigned long)(*(--(c)))) ; \ - } \ - } +void md5_block_host_order (MD5_CTX *c, const MD5_LONG *p,int num); +void md5_block_data_order (MD5_CTX *c, const unsigned char *p,int num); -#undef p_c2l_p -#define p_c2l_p(c,l,sc,len) { \ - switch (sc) \ - { \ - case 0: l =((unsigned long)(*((c)++))); \ - if (--len == 0) break; \ - case 1: l|=((unsigned long)(*((c)++)))<< 8; \ - if (--len == 0) break; \ - case 2: l|=((unsigned long)(*((c)++)))<<16; \ - } \ - } +#if defined(__i386) +/* + * *_block_host_order is expected to handle aligned data while + * *_block_data_order - unaligned. As algorithm and host (x86) + * are in this case of the same "endianess" these two are + * otherwise indistinguishable. But normally you don't want to + * call the same function because unaligned access in places + * where alignment is expected is usually a "Bad Thing". Indeed, + * on RISCs you get punished with BUS ERROR signal or *severe* + * performance degradation. Intel CPUs are in turn perfectly + * capable of loading unaligned data without such drastic side + * effect. Yes, they say it's slower than aligned load, but no + * exception is generated and therefore performance degradation + * is *incomparable* with RISCs. What we should weight here is + * costs of unaligned access against costs of aligning data. + * According to my measurements allowing unaligned access results + * in ~9% performance improvement on Pentium II operating at + * 266MHz. I won't be surprised if the difference will be higher + * on faster systems:-) + * + * <appro@fy.chalmers.se> + */ +#define md5_block_data_order md5_block_host_order +#endif -#undef l2c -#define l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ - *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ - *((c)++)=(unsigned char)(((l)>>16)&0xff), \ - *((c)++)=(unsigned char)(((l)>>24)&0xff)) +#define DATA_ORDER_IS_LITTLE_ENDIAN + +#define HASH_LONG MD5_LONG +#define HASH_LONG_LOG2 MD5_LONG_LOG2 +#define HASH_CTX MD5_CTX +#define HASH_CBLOCK MD5_CBLOCK +#define HASH_LBLOCK MD5_LBLOCK +#define HASH_UPDATE MD5_Update +#define HASH_TRANSFORM MD5_Transform +#define HASH_FINAL MD5_Final +#define HASH_BLOCK_HOST_ORDER md5_block_host_order +#if defined(B_ENDIAN) || defined(md5_block_data_order) +#define HASH_BLOCK_DATA_ORDER md5_block_data_order +/* + * Little-endians (Intel and Alpha) feel better without this. + * It looks like memcpy does better job than generic + * md5_block_data_order on copying-n-aligning input data. + * But franlky speaking I didn't expect such result on Alpha. + * On the other hand I've got this with egcs-1.0.2 and if + * program is compiled with another (better?) compiler it + * might turn out other way around. + * + * <appro@fy.chalmers.se> + */ +#endif -/* NOTE - c is not incremented as per l2c */ -#undef l2cn -#define l2cn(l1,l2,c,n) { \ - c+=n; \ - switch (n) { \ - case 8: *(--(c))=(unsigned char)(((l2)>>24)&0xff); \ - case 7: *(--(c))=(unsigned char)(((l2)>>16)&0xff); \ - case 6: *(--(c))=(unsigned char)(((l2)>> 8)&0xff); \ - case 5: *(--(c))=(unsigned char)(((l2) )&0xff); \ - case 4: *(--(c))=(unsigned char)(((l1)>>24)&0xff); \ - case 3: *(--(c))=(unsigned char)(((l1)>>16)&0xff); \ - case 2: *(--(c))=(unsigned char)(((l1)>> 8)&0xff); \ - case 1: *(--(c))=(unsigned char)(((l1) )&0xff); \ - } \ - } +#include "../md32_common.h" -/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */ -#if defined(WIN32) -/* 5 instructions with rotate instruction, else 9 */ -#define Endian_Reverse32(a) \ - { \ - unsigned long l=(a); \ - (a)=((ROTATE(l,8)&0x00FF00FF)|(ROTATE(l,24)&0xFF00FF00)); \ - } -#else -/* 6 instructions with rotate instruction, else 8 */ -#define Endian_Reverse32(a) \ - { \ - unsigned long l=(a); \ - l=(((l&0xFF00FF00)>>8L)|((l&0x00FF00FF)<<8L)); \ - (a)=ROTATE(l,16L); \ - } -#endif /* #define F(x,y,z) (((x) & (y)) | ((~(x)) & (z))) #define G(x,y,z) (((x) & (z)) | ((y) & (~(z)))) @@ -162,14 +143,6 @@ #define H(b,c,d) ((b) ^ (c) ^ (d)) #define I(b,c,d) (((~(d)) | (b)) ^ (c)) -#undef ROTATE -#if defined(WIN32) -#define ROTATE(a,n) _lrotl(a,n) -#else -#define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) -#endif - - #define R0(a,b,c,d,k,s,t) { \ a+=((k)+(t)+F((b),(c),(d))); \ a=ROTATE(a,s); \ diff --git a/crypto/md5/md5_one.c b/crypto/md5/md5_one.c index c761401160..c98721f4d8 100644 --- a/crypto/md5/md5_one.c +++ b/crypto/md5/md5_one.c @@ -57,7 +57,8 @@ */ #include <stdio.h> -#include "md5_locl.h" +#include <string.h> +#include <openssl/md5.h> unsigned char *MD5(unsigned char *d, unsigned long n, unsigned char *md) { |