diff options
-rwxr-xr-x | crypto/aes/asm/aesv8-armx.pl | 6 | ||||
-rwxr-xr-x | crypto/aes/asm/vpaes-armv8.pl | 53 | ||||
-rwxr-xr-x | crypto/bn/asm/armv8-mont.pl | 1 | ||||
-rwxr-xr-x | crypto/chacha/asm/chacha-armv8.pl | 13 | ||||
-rw-r--r-- | crypto/ec/asm/ecp_nistz256-armv8.pl | 100 | ||||
-rw-r--r-- | crypto/ec/asm/ecp_sm2p256-armv8.pl | 22 | ||||
-rwxr-xr-x | crypto/modes/asm/aes-gcm-armv8_64.pl | 1 | ||||
-rw-r--r-- | crypto/modes/asm/ghashv8-armx.pl | 1 | ||||
-rwxr-xr-x | crypto/poly1305/asm/poly1305-armv8.pl | 8 | ||||
-rwxr-xr-x | crypto/sha/asm/keccak1600-armv8.pl | 10 | ||||
-rw-r--r-- | crypto/sha/asm/sha1-armv8.pl | 6 | ||||
-rw-r--r-- | crypto/sha/asm/sha512-armv8.pl | 16 |
12 files changed, 164 insertions, 73 deletions
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl index 33a2dd53da..289210b2c6 100755 --- a/crypto/aes/asm/aesv8-armx.pl +++ b/crypto/aes/asm/aesv8-armx.pl @@ -107,12 +107,13 @@ my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= $code.=<<___; +.rodata .align 5 .Lrcon: .long 0x01,0x01,0x01,0x01 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat .long 0x1b,0x1b,0x1b,0x1b - +.previous .globl ${prefix}_set_encrypt_key .type ${prefix}_set_encrypt_key,%function .align 5 @@ -139,7 +140,8 @@ $code.=<<___; tst $bits,#0x3f b.ne .Lenc_key_abort - adr $ptr,.Lrcon + adrp $ptr,.Lrcon + add $ptr,$ptr,:lo12:.Lrcon cmp $bits,#192 veor $zero,$zero,$zero diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl index 49988e9c2b..3b5c907af1 100755 --- a/crypto/aes/asm/vpaes-armv8.pl +++ b/crypto/aes/asm/vpaes-armv8.pl @@ -55,7 +55,7 @@ open OUT,"| \"$^X\" $xlate $flavour \"$output\"" $code.=<<___; #include "arm_arch.h" -.text +.rodata .type _vpaes_consts,%object .align 7 // totally strategic alignment @@ -146,6 +146,9 @@ _vpaes_consts: .asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)" .size _vpaes_consts,.-_vpaes_consts .align 6 + +.text + ___ { @@ -165,7 +168,8 @@ $code.=<<___; .type _vpaes_encrypt_preheat,%function .align 4 _vpaes_encrypt_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv movi v17.16b, #0x0f ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo @@ -193,7 +197,8 @@ _vpaes_encrypt_preheat: _vpaes_encrypt_core: mov x9, $key ldr w8, [$key,#240] // pull rounds - adr x11, .Lk_mc_forward+16 + adrp x11, .Lk_mc_forward+16 + add x11, x11, :lo12:.Lk_mc_forward+16 // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -280,7 +285,8 @@ vpaes_encrypt: _vpaes_encrypt_2x: mov x9, $key ldr w8, [$key,#240] // pull rounds - adr x11, .Lk_mc_forward+16 + adrp x11, .Lk_mc_forward+16 + add x11, x11, :lo12:.Lk_mc_forward+16 // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -383,9 +389,11 @@ _vpaes_encrypt_2x: .type _vpaes_decrypt_preheat,%function .align 4 _vpaes_decrypt_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv movi v17.16b, #0x0f - adr x11, .Lk_dipt + adrp x11, .Lk_dipt + add x11, x11, :lo12:.Lk_dipt ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv ld1 {v20.2d-v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo ld1 {v24.2d-v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd @@ -407,10 +415,12 @@ _vpaes_decrypt_core: // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11 eor x11, x11, #0x30 // xor \$0x30, %r11 - adr x10, .Lk_sr + adrp x10, .Lk_sr + add x10, x10, :lo12:.Lk_sr and x11, x11, #0x30 // and \$0x30, %r11 add x11, x11, x10 - adr x10, .Lk_mc_forward+48 + adrp x10, .Lk_mc_forward+48 + add x10, x10, :lo12:.Lk_mc_forward+48 ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -518,10 +528,12 @@ _vpaes_decrypt_2x: // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11 eor x11, x11, #0x30 // xor \$0x30, %r11 - adr x10, .Lk_sr + adrp x10, .Lk_sr + add x10, x10, :lo12:.Lk_sr and x11, x11, #0x30 // and \$0x30, %r11 add x11, x11, x10 - adr x10, .Lk_mc_forward+48 + adrp x10, .Lk_mc_forward+48 + add x10, x10, :lo12:.Lk_mc_forward+48 ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -657,14 +669,18 @@ $code.=<<___; .type _vpaes_key_preheat,%function .align 4 _vpaes_key_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv movi v16.16b, #0x5b // .Lk_s63 - adr x11, .Lk_sb1 + adrp x11, .Lk_sb1 + add x11, x11, :lo12:.Lk_sb1 movi v17.16b, #0x0f // .Lk_s0F ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt - adr x10, .Lk_dksd + adrp x10, .Lk_dksd + add x10, x10, :lo12:.Lk_dksd ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1 - adr x11, .Lk_mc_forward + adrp x11, .Lk_mc_forward + add x11, x11, :lo12:.Lk_mc_forward ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9 ld1 {v8.2d}, [x10] // .Lk_rcon @@ -688,7 +704,8 @@ _vpaes_schedule_core: bl _vpaes_schedule_transform mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 - adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10 + adrp x10, .Lk_sr // lea .Lk_sr(%rip),%r10 + add x10, x10, :lo12:.Lk_sr add x8, x8, x10 cbnz $dir, .Lschedule_am_decrypting @@ -814,12 +831,14 @@ _vpaes_schedule_core: .align 4 .Lschedule_mangle_last: // schedule last round key from xmm0 - adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew + adrp x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew + add x11, x11, :lo12:.Lk_deskew cbnz $dir, .Lschedule_mangle_last_dec // encrypting ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 - adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform + adrp x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform + add x11, x11, :lo12:.Lk_opt add $out, $out, #32 // add \$32, %rdx tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute diff --git a/crypto/bn/asm/armv8-mont.pl b/crypto/bn/asm/armv8-mont.pl index 21ab12bdf0..912a7cf2f9 100755 --- a/crypto/bn/asm/armv8-mont.pl +++ b/crypto/bn/asm/armv8-mont.pl @@ -1898,6 +1898,7 @@ __bn_mul4x_mont: ___ } $code.=<<___; +.rodata .asciz "Montgomery Multiplication for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .align 4 ___ diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl index bd5ff88777..3768f7f2e0 100755 --- a/crypto/chacha/asm/chacha-armv8.pl +++ b/crypto/chacha/asm/chacha-armv8.pl @@ -140,7 +140,7 @@ $code.=<<___; .extern ChaCha20_ctr32_sve #endif -.text +.rodata .align 5 .Lsigma: @@ -151,6 +151,8 @@ $code.=<<___; .long 0x02010003,0x06050407,0x0a09080b,0x0e0d0c0f .asciz "ChaCha20 for ARMv8, CRYPTOGAMS by \@dot-asm" +.text + .globl ChaCha20_ctr32_dflt .type ChaCha20_ctr32_dflt,%function .align 5 @@ -170,7 +172,8 @@ ChaCha20_ctr32_dflt: stp x29,x30,[sp,#-96]! add x29,sp,#0 - adr @x[0],.Lsigma + adrp @x[0],.Lsigma + add @x[0],@x[0],:lo12:.Lsigma stp x19,x20,[sp,#16] stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] @@ -473,7 +476,8 @@ ChaCha20_neon: stp x29,x30,[sp,#-96]! add x29,sp,#0 - adr @x[0],.Lsigma + adrp @x[0],.Lsigma + add @x[0],@x[0],:lo12:.Lsigma stp x19,x20,[sp,#16] stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] @@ -884,7 +888,8 @@ ChaCha20_512_neon: stp x29,x30,[sp,#-96]! add x29,sp,#0 - adr @x[0],.Lsigma + adrp @x[0],.Lsigma + add @x[0],@x[0],:lo12:.Lsigma stp x19,x20,[sp,#16] stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl index 6c5d0e8b3c..e9ee7ed920 100644 --- a/crypto/ec/asm/ecp_nistz256-armv8.pl +++ b/crypto/ec/asm/ecp_nistz256-armv8.pl @@ -55,7 +55,7 @@ my ($acc6,$acc7)=($ap,$bp); # used in __ecp_nistz256_sqr_mont $code.=<<___; #include "arm_arch.h" -.text +.rodata ___ ######################################################################## # Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7 @@ -117,6 +117,8 @@ $code.=<<___; .quad 0xccd1c8aaee00bc4f .asciz "ECP_NISTZ256 for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" +.text + // void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); .globl ecp_nistz256_to_mont .type ecp_nistz256_to_mont,%function @@ -127,12 +129,16 @@ ecp_nistz256_to_mont: add x29,sp,#0 stp x19,x20,[sp,#16] - ldr $bi,.LRR // bp[0] + adrp $bi,.LRR + ldr $bi,[$bi,:lo12:.LRR] // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 - adr $bp,.LRR // &bp[0] + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] + adrp $bp,.LRR // &bp[0] + add $bp,$bp,:lo12:.LRR bl __ecp_nistz256_mul_mont @@ -155,9 +161,12 @@ ecp_nistz256_from_mont: mov $bi,#1 // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 - adr $bp,.Lone // &bp[0] + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] + adrp $bp,.Lone // &bp[0] + add $bp,$bp,:lo12:.Lone bl __ecp_nistz256_mul_mont @@ -181,8 +190,10 @@ ecp_nistz256_mul_mont: ldr $bi,[$bp] // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_mul_mont @@ -204,8 +215,10 @@ ecp_nistz256_sqr_mont: ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_sqr_mont @@ -229,8 +242,10 @@ ecp_nistz256_add: ldp $t0,$t1,[$bp] ldp $acc2,$acc3,[$ap,#16] ldp $t2,$t3,[$bp,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_add @@ -250,8 +265,10 @@ ecp_nistz256_div_by_2: ldp $acc0,$acc1,[$ap] ldp $acc2,$acc3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_div_by_2 @@ -271,8 +288,10 @@ ecp_nistz256_mul_by_2: ldp $acc0,$acc1,[$ap] ldp $acc2,$acc3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] mov $t0,$acc0 mov $t1,$acc1 mov $t2,$acc2 @@ -296,8 +315,10 @@ ecp_nistz256_mul_by_3: ldp $acc0,$acc1,[$ap] ldp $acc2,$acc3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] mov $t0,$acc0 mov $t1,$acc1 mov $t2,$acc2 @@ -333,8 +354,10 @@ ecp_nistz256_sub: ldp $acc0,$acc1,[$ap] ldp $acc2,$acc3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_sub_from @@ -357,8 +380,10 @@ ecp_nistz256_neg: mov $acc1,xzr mov $acc2,xzr mov $acc3,xzr - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_sub_from @@ -736,9 +761,11 @@ ecp_nistz256_point_double: mov $rp_real,$rp ldp $acc2,$acc3,[$ap,#48] mov $ap_real,$ap - ldr $poly1,.Lpoly+8 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] mov $t0,$acc0 - ldr $poly3,.Lpoly+24 + ldr $poly3,[$poly3,#24] mov $t1,$acc1 ldp $a0,$a1,[$ap_real,#64] // forward load for p256_sqr_mont mov $t2,$acc2 @@ -897,8 +924,10 @@ ecp_nistz256_point_add: mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in2infty,$t0,$t2 @@ -1151,8 +1180,10 @@ ecp_nistz256_point_add_affine: mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] ldp $a0,$a1,[$ap,#64] // in1_z ldp $a2,$a3,[$ap,#64+16] @@ -1303,7 +1334,8 @@ $code.=<<___; stp $acc2,$acc3,[$rp_real,#$i+16] ___ $code.=<<___ if ($i == 0); - adr $bp_real,.Lone_mont-64 + adrp $bp_real,.Lone_mont-64 + add $bp_real,$bp_real,:lo12:.Lone_mont-64 ___ } $code.=<<___; @@ -1354,7 +1386,8 @@ ecp_nistz256_ord_mul_mont: stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] - adr $ordk,.Lord + adrp $ordk,.Lord + add $ordk,$ordk,:lo12:.Lord ldr $bi,[$bp] // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] @@ -1497,7 +1530,8 @@ ecp_nistz256_ord_sqr_mont: stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] - adr $ordk,.Lord + adrp $ordk,.Lord + add $ordk,$ordk,:lo12:.Lord ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] diff --git a/crypto/ec/asm/ecp_sm2p256-armv8.pl b/crypto/ec/asm/ecp_sm2p256-armv8.pl index 59e0b190e1..e40b01ad20 100644 --- a/crypto/ec/asm/ecp_sm2p256-armv8.pl +++ b/crypto/ec/asm/ecp_sm2p256-armv8.pl @@ -42,7 +42,8 @@ $code.=<<___; adc $t4,xzr,xzr // Load polynomial - adr x2,$mod + adrp x2,$mod + add x2,x2,:lo12:$mod ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] @@ -88,7 +89,8 @@ $code.=<<___; sbc $t4,xzr,xzr // Load polynomial - adr x2,$mod + adrp x2,$mod + add x2,x2,:lo12:$mod ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] @@ -134,7 +136,8 @@ $code.=<<___; lsr $s3,$s3,#1 // Load mod - adr x2,$mod + adrp x2,$mod + add x2,x2,:lo12:$mod ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] @@ -161,7 +164,7 @@ ___ $code.=<<___; #include "arm_arch.h" .arch armv8-a -.text +.rodata .align 5 // The polynomial p @@ -177,6 +180,8 @@ $code.=<<___; .Lord_div_2: .quad 0xa9ddfa049ceaa092,0xb901efb590e30295,0xffffffffffffffff,0x7fffffff7fffffff +.text + // void bn_rshift1(BN_ULONG *a); .globl bn_rshift1 .type bn_rshift1,%function @@ -272,7 +277,8 @@ ecp_sm2p256_mul_by_3: mov $t3,$s3 // Sub polynomial - adr x2,.Lpoly + adrp x2,.Lpoly + add x2,x2,:lo12:.Lpoly ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] subs $s0,$s0,$s4 @@ -302,7 +308,8 @@ ecp_sm2p256_mul_by_3: mov $t3,$s3 // Sub polynomial - adr x2,.Lpoly + adrp x2,.Lpoly + add x2,x2,:lo12:.Lpoly ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] subs $s0,$s0,$s4 @@ -508,7 +515,8 @@ $code.=<<___; mov $s6,$s2 mov $s7,$s3 - adr $t0,.Lpoly + adrp $t0,.Lpoly + add $t0,$t0,:lo12:.Lpoly ldp $t1,$t2,[$t0] ldp $t3,$t4,[$t0,#16] diff --git a/crypto/modes/asm/aes-gcm-armv8_64.pl b/crypto/modes/asm/aes-gcm-armv8_64.pl index e7b1a17895..312057accd 100755 --- a/crypto/modes/asm/aes-gcm-armv8_64.pl +++ b/crypto/modes/asm/aes-gcm-armv8_64.pl @@ -6035,6 +6035,7 @@ ___ } $code.=<<___; +.rodata .asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .align 2 #endif diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl index 6d26ab0fd3..08e8250dca 100644 --- a/crypto/modes/asm/ghashv8-armx.pl +++ b/crypto/modes/asm/ghashv8-armx.pl @@ -810,6 +810,7 @@ ___ } $code.=<<___; +.rodata .asciz "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .align 2 #endif diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl index 985347c088..393779b7f2 100755 --- a/crypto/poly1305/asm/poly1305-armv8.pl +++ b/crypto/poly1305/asm/poly1305-armv8.pl @@ -442,7 +442,8 @@ poly1305_blocks_neon: ldr x30,[sp,#8] add $in2,$inp,#32 - adr $zeros,.Lzeros + adrp $zeros,.Lzeros + add $zeros,$zeros,:lo12:.Lzeros subs $len,$len,#64 csel $in2,$zeros,$in2,lo @@ -454,7 +455,8 @@ poly1305_blocks_neon: .align 4 .Leven_neon: add $in2,$inp,#32 - adr $zeros,.Lzeros + adrp $zeros,.Lzeros + add $zeros,$zeros,:lo12:.Lzeros subs $len,$len,#64 csel $in2,$zeros,$in2,lo @@ -937,6 +939,8 @@ poly1305_emit_neon: ret .size poly1305_emit_neon,.-poly1305_emit_neon +.rodata + .align 5 .Lzeros: .long 0,0,0,0,0,0,0,0 diff --git a/crypto/sha/asm/keccak1600-armv8.pl b/crypto/sha/asm/keccak1600-armv8.pl index 7566a7e3ec..dcc66c5a83 100755 --- a/crypto/sha/asm/keccak1600-armv8.pl +++ b/crypto/sha/asm/keccak1600-armv8.pl @@ -82,7 +82,7 @@ my @rhotates = ([ 0, 1, 62, 28, 27 ], $code.=<<___; #include "arm_arch.h" -.text +.rodata .align 8 // strategic alignment and padding that allows to use // address value as loop termination condition... @@ -123,11 +123,14 @@ my @A = map([ "x$_", "x".($_+1), "x".($_+2), "x".($_+3), "x".($_+4) ], my @C = map("x$_", (26,27,28,30)); $code.=<<___; +.text + .type KeccakF1600_int,%function .align 5 KeccakF1600_int: AARCH64_SIGN_LINK_REGISTER - adr $C[2],iotas + adrp $C[2],iotas + add $C[2],$C[2],:lo12:iotas stp $C[2],x30,[sp,#16] // 32 bytes on top are mine b .Loop .align 4 @@ -556,7 +559,8 @@ $code.=<<___; .align 5 KeccakF1600_ce: mov x9,#24 - adr x10,iotas + adrp x10,iotas + add x10,x10,:lo12:iotas b .Loop_ce .align 4 .Loop_ce: diff --git a/crypto/sha/asm/sha1-armv8.pl b/crypto/sha/asm/sha1-armv8.pl index 5f23a20c1a..83282fdaac 100644 --- a/crypto/sha/asm/sha1-armv8.pl +++ b/crypto/sha/asm/sha1-armv8.pl @@ -259,7 +259,8 @@ sha1_block_armv8: stp x29,x30,[sp,#-16]! add x29,sp,#0 - adr x4,.Lconst + adrp x4,.Lconst + add x4,x4,:lo12:.Lconst eor $E,$E,$E ld1.32 {$ABCD},[$ctx],#16 ld1.32 {$E}[0],[$ctx] @@ -319,6 +320,9 @@ $code.=<<___; ldr x29,[sp],#16 ret .size sha1_block_armv8,.-sha1_block_armv8 + +.rodata + .align 6 .Lconst: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl index f900882fee..8cd7a150c8 100644 --- a/crypto/sha/asm/sha512-armv8.pl +++ b/crypto/sha/asm/sha512-armv8.pl @@ -235,7 +235,8 @@ $code.=<<___; ldp $E,$F,[$ctx,#4*$SZ] add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input ldp $G,$H,[$ctx,#6*$SZ] - adr $Ktbl,.LK$BITS + adrp $Ktbl,.LK$BITS + add $Ktbl,$Ktbl,:lo12:.LK$BITS stp $ctx,$num,[x29,#96] .Loop: @@ -285,6 +286,8 @@ $code.=<<___; ret .size $func,.-$func +.rodata + .align 6 .type .LK$BITS,%object .LK$BITS: @@ -355,6 +358,8 @@ $code.=<<___; .size .LK$BITS,.-.LK$BITS .asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by <appro\@openssl.org>" .align 2 + +.text ___ if ($SZ==4) { @@ -376,7 +381,8 @@ sha256_block_armv8: add x29,sp,#0 ld1.32 {$ABCD,$EFGH},[$ctx] - adr $Ktbl,.LK256 + adrp $Ktbl,.LK256 + add $Ktbl,$Ktbl,:lo12:.LK256 .Loop_hw: ld1 {@MSG[0]-@MSG[3]},[$inp],#64 @@ -641,7 +647,8 @@ sha256_block_neon: mov x29, sp sub sp,sp,#16*4 - adr $Ktbl,.LK256 + adrp $Ktbl,.LK256 + add $Ktbl,$Ktbl,:lo12:.LK256 add $num,$inp,$num,lsl#6 // len to point at the end of inp ld1.8 {@X[0]},[$inp], #16 @@ -755,7 +762,8 @@ sha512_block_armv8: ld1 {@MSG[4]-@MSG[7]},[$inp],#64 ld1.64 {@H[0]-@H[3]},[$ctx] // load context - adr $Ktbl,.LK512 + adrp $Ktbl,.LK512 + add $Ktbl,$Ktbl,:lo12:.LK512 rev64 @MSG[0],@MSG[0] rev64 @MSG[1],@MSG[1] |