From c6e65c1f8e627d2ff36134583628d49a7d2cc905 Mon Sep 17 00:00:00 2001 From: Theo Buehler Date: Mon, 15 Apr 2024 01:23:36 +0200 Subject: Make it able to run asm code on OpenBSD (arm64) In order to get asm code running on OpenBSD we must place all constants into .rodata sections. The change to crypto/perlasm/arm-xlate.pl adjusts changes from Theo for additional assembler variants/flavours we use for building OpenSSL. Fixes #23312 Reviewed-by: Hugo Landau Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/24137) --- crypto/aes/asm/aesv8-armx.pl | 6 +- crypto/aes/asm/vpaes-armv8.pl | 53 ++++++++++++------ crypto/bn/asm/armv8-mont.pl | 1 + crypto/chacha/asm/chacha-armv8.pl | 13 +++-- crypto/ec/asm/ecp_nistz256-armv8.pl | 100 +++++++++++++++++++++++----------- crypto/ec/asm/ecp_sm2p256-armv8.pl | 22 +++++--- crypto/modes/asm/aes-gcm-armv8_64.pl | 1 + crypto/modes/asm/ghashv8-armx.pl | 1 + crypto/poly1305/asm/poly1305-armv8.pl | 8 ++- crypto/sha/asm/keccak1600-armv8.pl | 10 +++- crypto/sha/asm/sha1-armv8.pl | 6 +- crypto/sha/asm/sha512-armv8.pl | 16 ++++-- 12 files changed, 164 insertions(+), 73 deletions(-) (limited to 'crypto') diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl index 33a2dd53da..289210b2c6 100755 --- a/crypto/aes/asm/aesv8-armx.pl +++ b/crypto/aes/asm/aesv8-armx.pl @@ -107,12 +107,13 @@ my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)= $code.=<<___; +.rodata .align 5 .Lrcon: .long 0x01,0x01,0x01,0x01 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat .long 0x1b,0x1b,0x1b,0x1b - +.previous .globl ${prefix}_set_encrypt_key .type ${prefix}_set_encrypt_key,%function .align 5 @@ -139,7 +140,8 @@ $code.=<<___; tst $bits,#0x3f b.ne .Lenc_key_abort - adr $ptr,.Lrcon + adrp $ptr,.Lrcon + add $ptr,$ptr,:lo12:.Lrcon cmp $bits,#192 veor $zero,$zero,$zero diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl index 49988e9c2b..3b5c907af1 100755 --- a/crypto/aes/asm/vpaes-armv8.pl +++ b/crypto/aes/asm/vpaes-armv8.pl @@ -55,7 +55,7 @@ open OUT,"| \"$^X\" $xlate $flavour \"$output\"" $code.=<<___; #include "arm_arch.h" -.text +.rodata .type _vpaes_consts,%object .align 7 // totally strategic alignment @@ -146,6 +146,9 @@ _vpaes_consts: .asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)" .size _vpaes_consts,.-_vpaes_consts .align 6 + +.text + ___ { @@ -165,7 +168,8 @@ $code.=<<___; .type _vpaes_encrypt_preheat,%function .align 4 _vpaes_encrypt_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv movi v17.16b, #0x0f ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo @@ -193,7 +197,8 @@ _vpaes_encrypt_preheat: _vpaes_encrypt_core: mov x9, $key ldr w8, [$key,#240] // pull rounds - adr x11, .Lk_mc_forward+16 + adrp x11, .Lk_mc_forward+16 + add x11, x11, :lo12:.Lk_mc_forward+16 // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -280,7 +285,8 @@ vpaes_encrypt: _vpaes_encrypt_2x: mov x9, $key ldr w8, [$key,#240] // pull rounds - adr x11, .Lk_mc_forward+16 + adrp x11, .Lk_mc_forward+16 + add x11, x11, :lo12:.Lk_mc_forward+16 // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -383,9 +389,11 @@ _vpaes_encrypt_2x: .type _vpaes_decrypt_preheat,%function .align 4 _vpaes_decrypt_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv movi v17.16b, #0x0f - adr x11, .Lk_dipt + adrp x11, .Lk_dipt + add x11, x11, :lo12:.Lk_dipt ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv ld1 {v20.2d-v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo ld1 {v24.2d-v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd @@ -407,10 +415,12 @@ _vpaes_decrypt_core: // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11 eor x11, x11, #0x30 // xor \$0x30, %r11 - adr x10, .Lk_sr + adrp x10, .Lk_sr + add x10, x10, :lo12:.Lk_sr and x11, x11, #0x30 // and \$0x30, %r11 add x11, x11, x10 - adr x10, .Lk_mc_forward+48 + adrp x10, .Lk_mc_forward+48 + add x10, x10, :lo12:.Lk_mc_forward+48 ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -518,10 +528,12 @@ _vpaes_decrypt_2x: // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11 eor x11, x11, #0x30 // xor \$0x30, %r11 - adr x10, .Lk_sr + adrp x10, .Lk_sr + add x10, x10, :lo12:.Lk_sr and x11, x11, #0x30 // and \$0x30, %r11 add x11, x11, x10 - adr x10, .Lk_mc_forward+48 + adrp x10, .Lk_mc_forward+48 + add x10, x10, :lo12:.Lk_mc_forward+48 ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 @@ -657,14 +669,18 @@ $code.=<<___; .type _vpaes_key_preheat,%function .align 4 _vpaes_key_preheat: - adr x10, .Lk_inv + adrp x10, .Lk_inv + add x10, x10, :lo12:.Lk_inv movi v16.16b, #0x5b // .Lk_s63 - adr x11, .Lk_sb1 + adrp x11, .Lk_sb1 + add x11, x11, :lo12:.Lk_sb1 movi v17.16b, #0x0f // .Lk_s0F ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt - adr x10, .Lk_dksd + adrp x10, .Lk_dksd + add x10, x10, :lo12:.Lk_dksd ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1 - adr x11, .Lk_mc_forward + adrp x11, .Lk_mc_forward + add x11, x11, :lo12:.Lk_mc_forward ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9 ld1 {v8.2d}, [x10] // .Lk_rcon @@ -688,7 +704,8 @@ _vpaes_schedule_core: bl _vpaes_schedule_transform mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 - adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10 + adrp x10, .Lk_sr // lea .Lk_sr(%rip),%r10 + add x10, x10, :lo12:.Lk_sr add x8, x8, x10 cbnz $dir, .Lschedule_am_decrypting @@ -814,12 +831,14 @@ _vpaes_schedule_core: .align 4 .Lschedule_mangle_last: // schedule last round key from xmm0 - adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew + adrp x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew + add x11, x11, :lo12:.Lk_deskew cbnz $dir, .Lschedule_mangle_last_dec // encrypting ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 - adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform + adrp x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform + add x11, x11, :lo12:.Lk_opt add $out, $out, #32 // add \$32, %rdx tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute diff --git a/crypto/bn/asm/armv8-mont.pl b/crypto/bn/asm/armv8-mont.pl index 21ab12bdf0..912a7cf2f9 100755 --- a/crypto/bn/asm/armv8-mont.pl +++ b/crypto/bn/asm/armv8-mont.pl @@ -1898,6 +1898,7 @@ __bn_mul4x_mont: ___ } $code.=<<___; +.rodata .asciz "Montgomery Multiplication for ARMv8, CRYPTOGAMS by " .align 4 ___ diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl index bd5ff88777..3768f7f2e0 100755 --- a/crypto/chacha/asm/chacha-armv8.pl +++ b/crypto/chacha/asm/chacha-armv8.pl @@ -140,7 +140,7 @@ $code.=<<___; .extern ChaCha20_ctr32_sve #endif -.text +.rodata .align 5 .Lsigma: @@ -151,6 +151,8 @@ $code.=<<___; .long 0x02010003,0x06050407,0x0a09080b,0x0e0d0c0f .asciz "ChaCha20 for ARMv8, CRYPTOGAMS by \@dot-asm" +.text + .globl ChaCha20_ctr32_dflt .type ChaCha20_ctr32_dflt,%function .align 5 @@ -170,7 +172,8 @@ ChaCha20_ctr32_dflt: stp x29,x30,[sp,#-96]! add x29,sp,#0 - adr @x[0],.Lsigma + adrp @x[0],.Lsigma + add @x[0],@x[0],:lo12:.Lsigma stp x19,x20,[sp,#16] stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] @@ -473,7 +476,8 @@ ChaCha20_neon: stp x29,x30,[sp,#-96]! add x29,sp,#0 - adr @x[0],.Lsigma + adrp @x[0],.Lsigma + add @x[0],@x[0],:lo12:.Lsigma stp x19,x20,[sp,#16] stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] @@ -884,7 +888,8 @@ ChaCha20_512_neon: stp x29,x30,[sp,#-96]! add x29,sp,#0 - adr @x[0],.Lsigma + adrp @x[0],.Lsigma + add @x[0],@x[0],:lo12:.Lsigma stp x19,x20,[sp,#16] stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl index 6c5d0e8b3c..e9ee7ed920 100644 --- a/crypto/ec/asm/ecp_nistz256-armv8.pl +++ b/crypto/ec/asm/ecp_nistz256-armv8.pl @@ -55,7 +55,7 @@ my ($acc6,$acc7)=($ap,$bp); # used in __ecp_nistz256_sqr_mont $code.=<<___; #include "arm_arch.h" -.text +.rodata ___ ######################################################################## # Convert ecp_nistz256_table.c to layout expected by ecp_nistz_gather_w7 @@ -117,6 +117,8 @@ $code.=<<___; .quad 0xccd1c8aaee00bc4f .asciz "ECP_NISTZ256 for ARMv8, CRYPTOGAMS by " +.text + // void ecp_nistz256_to_mont(BN_ULONG x0[4],const BN_ULONG x1[4]); .globl ecp_nistz256_to_mont .type ecp_nistz256_to_mont,%function @@ -127,12 +129,16 @@ ecp_nistz256_to_mont: add x29,sp,#0 stp x19,x20,[sp,#16] - ldr $bi,.LRR // bp[0] + adrp $bi,.LRR + ldr $bi,[$bi,:lo12:.LRR] // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 - adr $bp,.LRR // &bp[0] + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] + adrp $bp,.LRR // &bp[0] + add $bp,$bp,:lo12:.LRR bl __ecp_nistz256_mul_mont @@ -155,9 +161,12 @@ ecp_nistz256_from_mont: mov $bi,#1 // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 - adr $bp,.Lone // &bp[0] + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] + adrp $bp,.Lone // &bp[0] + add $bp,$bp,:lo12:.Lone bl __ecp_nistz256_mul_mont @@ -181,8 +190,10 @@ ecp_nistz256_mul_mont: ldr $bi,[$bp] // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_mul_mont @@ -204,8 +215,10 @@ ecp_nistz256_sqr_mont: ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_sqr_mont @@ -229,8 +242,10 @@ ecp_nistz256_add: ldp $t0,$t1,[$bp] ldp $acc2,$acc3,[$ap,#16] ldp $t2,$t3,[$bp,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_add @@ -250,8 +265,10 @@ ecp_nistz256_div_by_2: ldp $acc0,$acc1,[$ap] ldp $acc2,$acc3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_div_by_2 @@ -271,8 +288,10 @@ ecp_nistz256_mul_by_2: ldp $acc0,$acc1,[$ap] ldp $acc2,$acc3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] mov $t0,$acc0 mov $t1,$acc1 mov $t2,$acc2 @@ -296,8 +315,10 @@ ecp_nistz256_mul_by_3: ldp $acc0,$acc1,[$ap] ldp $acc2,$acc3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] mov $t0,$acc0 mov $t1,$acc1 mov $t2,$acc2 @@ -333,8 +354,10 @@ ecp_nistz256_sub: ldp $acc0,$acc1,[$ap] ldp $acc2,$acc3,[$ap,#16] - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_sub_from @@ -357,8 +380,10 @@ ecp_nistz256_neg: mov $acc1,xzr mov $acc2,xzr mov $acc3,xzr - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] bl __ecp_nistz256_sub_from @@ -736,9 +761,11 @@ ecp_nistz256_point_double: mov $rp_real,$rp ldp $acc2,$acc3,[$ap,#48] mov $ap_real,$ap - ldr $poly1,.Lpoly+8 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] mov $t0,$acc0 - ldr $poly3,.Lpoly+24 + ldr $poly3,[$poly3,#24] mov $t1,$acc1 ldp $a0,$a1,[$ap_real,#64] // forward load for p256_sqr_mont mov $t2,$acc2 @@ -897,8 +924,10 @@ ecp_nistz256_point_add: mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] orr $t0,$a0,$a1 orr $t2,$a2,$a3 orr $in2infty,$t0,$t2 @@ -1151,8 +1180,10 @@ ecp_nistz256_point_add_affine: mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp - ldr $poly1,.Lpoly+8 - ldr $poly3,.Lpoly+24 + adrp $poly3,.Lpoly + add $poly3,$poly3,:lo12:.Lpoly + ldr $poly1,[$poly3,#8] + ldr $poly3,[$poly3,#24] ldp $a0,$a1,[$ap,#64] // in1_z ldp $a2,$a3,[$ap,#64+16] @@ -1303,7 +1334,8 @@ $code.=<<___; stp $acc2,$acc3,[$rp_real,#$i+16] ___ $code.=<<___ if ($i == 0); - adr $bp_real,.Lone_mont-64 + adrp $bp_real,.Lone_mont-64 + add $bp_real,$bp_real,:lo12:.Lone_mont-64 ___ } $code.=<<___; @@ -1354,7 +1386,8 @@ ecp_nistz256_ord_mul_mont: stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] - adr $ordk,.Lord + adrp $ordk,.Lord + add $ordk,$ordk,:lo12:.Lord ldr $bi,[$bp] // bp[0] ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] @@ -1497,7 +1530,8 @@ ecp_nistz256_ord_sqr_mont: stp x21,x22,[sp,#32] stp x23,x24,[sp,#48] - adr $ordk,.Lord + adrp $ordk,.Lord + add $ordk,$ordk,:lo12:.Lord ldp $a0,$a1,[$ap] ldp $a2,$a3,[$ap,#16] diff --git a/crypto/ec/asm/ecp_sm2p256-armv8.pl b/crypto/ec/asm/ecp_sm2p256-armv8.pl index 59e0b190e1..e40b01ad20 100644 --- a/crypto/ec/asm/ecp_sm2p256-armv8.pl +++ b/crypto/ec/asm/ecp_sm2p256-armv8.pl @@ -42,7 +42,8 @@ $code.=<<___; adc $t4,xzr,xzr // Load polynomial - adr x2,$mod + adrp x2,$mod + add x2,x2,:lo12:$mod ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] @@ -88,7 +89,8 @@ $code.=<<___; sbc $t4,xzr,xzr // Load polynomial - adr x2,$mod + adrp x2,$mod + add x2,x2,:lo12:$mod ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] @@ -134,7 +136,8 @@ $code.=<<___; lsr $s3,$s3,#1 // Load mod - adr x2,$mod + adrp x2,$mod + add x2,x2,:lo12:$mod ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] @@ -161,7 +164,7 @@ ___ $code.=<<___; #include "arm_arch.h" .arch armv8-a -.text +.rodata .align 5 // The polynomial p @@ -177,6 +180,8 @@ $code.=<<___; .Lord_div_2: .quad 0xa9ddfa049ceaa092,0xb901efb590e30295,0xffffffffffffffff,0x7fffffff7fffffff +.text + // void bn_rshift1(BN_ULONG *a); .globl bn_rshift1 .type bn_rshift1,%function @@ -272,7 +277,8 @@ ecp_sm2p256_mul_by_3: mov $t3,$s3 // Sub polynomial - adr x2,.Lpoly + adrp x2,.Lpoly + add x2,x2,:lo12:.Lpoly ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] subs $s0,$s0,$s4 @@ -302,7 +308,8 @@ ecp_sm2p256_mul_by_3: mov $t3,$s3 // Sub polynomial - adr x2,.Lpoly + adrp x2,.Lpoly + add x2,x2,:lo12:.Lpoly ldp $s4,$s5,[x2] ldp $s6,$s7,[x2,#16] subs $s0,$s0,$s4 @@ -508,7 +515,8 @@ $code.=<<___; mov $s6,$s2 mov $s7,$s3 - adr $t0,.Lpoly + adrp $t0,.Lpoly + add $t0,$t0,:lo12:.Lpoly ldp $t1,$t2,[$t0] ldp $t3,$t4,[$t0,#16] diff --git a/crypto/modes/asm/aes-gcm-armv8_64.pl b/crypto/modes/asm/aes-gcm-armv8_64.pl index e7b1a17895..312057accd 100755 --- a/crypto/modes/asm/aes-gcm-armv8_64.pl +++ b/crypto/modes/asm/aes-gcm-armv8_64.pl @@ -6035,6 +6035,7 @@ ___ } $code.=<<___; +.rodata .asciz "GHASH for ARMv8, CRYPTOGAMS by " .align 2 #endif diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl index 6d26ab0fd3..08e8250dca 100644 --- a/crypto/modes/asm/ghashv8-armx.pl +++ b/crypto/modes/asm/ghashv8-armx.pl @@ -810,6 +810,7 @@ ___ } $code.=<<___; +.rodata .asciz "GHASH for ARMv8, CRYPTOGAMS by " .align 2 #endif diff --git a/crypto/poly1305/asm/poly1305-armv8.pl b/crypto/poly1305/asm/poly1305-armv8.pl index 985347c088..393779b7f2 100755 --- a/crypto/poly1305/asm/poly1305-armv8.pl +++ b/crypto/poly1305/asm/poly1305-armv8.pl @@ -442,7 +442,8 @@ poly1305_blocks_neon: ldr x30,[sp,#8] add $in2,$inp,#32 - adr $zeros,.Lzeros + adrp $zeros,.Lzeros + add $zeros,$zeros,:lo12:.Lzeros subs $len,$len,#64 csel $in2,$zeros,$in2,lo @@ -454,7 +455,8 @@ poly1305_blocks_neon: .align 4 .Leven_neon: add $in2,$inp,#32 - adr $zeros,.Lzeros + adrp $zeros,.Lzeros + add $zeros,$zeros,:lo12:.Lzeros subs $len,$len,#64 csel $in2,$zeros,$in2,lo @@ -937,6 +939,8 @@ poly1305_emit_neon: ret .size poly1305_emit_neon,.-poly1305_emit_neon +.rodata + .align 5 .Lzeros: .long 0,0,0,0,0,0,0,0 diff --git a/crypto/sha/asm/keccak1600-armv8.pl b/crypto/sha/asm/keccak1600-armv8.pl index 7566a7e3ec..dcc66c5a83 100755 --- a/crypto/sha/asm/keccak1600-armv8.pl +++ b/crypto/sha/asm/keccak1600-armv8.pl @@ -82,7 +82,7 @@ my @rhotates = ([ 0, 1, 62, 28, 27 ], $code.=<<___; #include "arm_arch.h" -.text +.rodata .align 8 // strategic alignment and padding that allows to use // address value as loop termination condition... @@ -123,11 +123,14 @@ my @A = map([ "x$_", "x".($_+1), "x".($_+2), "x".($_+3), "x".($_+4) ], my @C = map("x$_", (26,27,28,30)); $code.=<<___; +.text + .type KeccakF1600_int,%function .align 5 KeccakF1600_int: AARCH64_SIGN_LINK_REGISTER - adr $C[2],iotas + adrp $C[2],iotas + add $C[2],$C[2],:lo12:iotas stp $C[2],x30,[sp,#16] // 32 bytes on top are mine b .Loop .align 4 @@ -556,7 +559,8 @@ $code.=<<___; .align 5 KeccakF1600_ce: mov x9,#24 - adr x10,iotas + adrp x10,iotas + add x10,x10,:lo12:iotas b .Loop_ce .align 4 .Loop_ce: diff --git a/crypto/sha/asm/sha1-armv8.pl b/crypto/sha/asm/sha1-armv8.pl index 5f23a20c1a..83282fdaac 100644 --- a/crypto/sha/asm/sha1-armv8.pl +++ b/crypto/sha/asm/sha1-armv8.pl @@ -259,7 +259,8 @@ sha1_block_armv8: stp x29,x30,[sp,#-16]! add x29,sp,#0 - adr x4,.Lconst + adrp x4,.Lconst + add x4,x4,:lo12:.Lconst eor $E,$E,$E ld1.32 {$ABCD},[$ctx],#16 ld1.32 {$E}[0],[$ctx] @@ -319,6 +320,9 @@ $code.=<<___; ldr x29,[sp],#16 ret .size sha1_block_armv8,.-sha1_block_armv8 + +.rodata + .align 6 .Lconst: .long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl index f900882fee..8cd7a150c8 100644 --- a/crypto/sha/asm/sha512-armv8.pl +++ b/crypto/sha/asm/sha512-armv8.pl @@ -235,7 +235,8 @@ $code.=<<___; ldp $E,$F,[$ctx,#4*$SZ] add $num,$inp,$num,lsl#`log(16*$SZ)/log(2)` // end of input ldp $G,$H,[$ctx,#6*$SZ] - adr $Ktbl,.LK$BITS + adrp $Ktbl,.LK$BITS + add $Ktbl,$Ktbl,:lo12:.LK$BITS stp $ctx,$num,[x29,#96] .Loop: @@ -285,6 +286,8 @@ $code.=<<___; ret .size $func,.-$func +.rodata + .align 6 .type .LK$BITS,%object .LK$BITS: @@ -355,6 +358,8 @@ $code.=<<___; .size .LK$BITS,.-.LK$BITS .asciz "SHA$BITS block transform for ARMv8, CRYPTOGAMS by " .align 2 + +.text ___ if ($SZ==4) { @@ -376,7 +381,8 @@ sha256_block_armv8: add x29,sp,#0 ld1.32 {$ABCD,$EFGH},[$ctx] - adr $Ktbl,.LK256 + adrp $Ktbl,.LK256 + add $Ktbl,$Ktbl,:lo12:.LK256 .Loop_hw: ld1 {@MSG[0]-@MSG[3]},[$inp],#64 @@ -641,7 +647,8 @@ sha256_block_neon: mov x29, sp sub sp,sp,#16*4 - adr $Ktbl,.LK256 + adrp $Ktbl,.LK256 + add $Ktbl,$Ktbl,:lo12:.LK256 add $num,$inp,$num,lsl#6 // len to point at the end of inp ld1.8 {@X[0]},[$inp], #16 @@ -755,7 +762,8 @@ sha512_block_armv8: ld1 {@MSG[4]-@MSG[7]},[$inp],#64 ld1.64 {@H[0]-@H[3]},[$ctx] // load context - adr $Ktbl,.LK512 + adrp $Ktbl,.LK512 + add $Ktbl,$Ktbl,:lo12:.LK512 rev64 @MSG[0],@MSG[0] rev64 @MSG[1],@MSG[1] -- cgit v1.2.3