summaryrefslogtreecommitdiffstats
path: root/crypto/aes/asm/vpaes-armv8.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/aes/asm/vpaes-armv8.pl')
-rwxr-xr-xcrypto/aes/asm/vpaes-armv8.pl53
1 files changed, 36 insertions, 17 deletions
diff --git a/crypto/aes/asm/vpaes-armv8.pl b/crypto/aes/asm/vpaes-armv8.pl
index 49988e9c2b..3b5c907af1 100755
--- a/crypto/aes/asm/vpaes-armv8.pl
+++ b/crypto/aes/asm/vpaes-armv8.pl
@@ -55,7 +55,7 @@ open OUT,"| \"$^X\" $xlate $flavour \"$output\""
$code.=<<___;
#include "arm_arch.h"
-.text
+.rodata
.type _vpaes_consts,%object
.align 7 // totally strategic alignment
@@ -146,6 +146,9 @@ _vpaes_consts:
.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
.size _vpaes_consts,.-_vpaes_consts
.align 6
+
+.text
+
___
{
@@ -165,7 +168,8 @@ $code.=<<___;
.type _vpaes_encrypt_preheat,%function
.align 4
_vpaes_encrypt_preheat:
- adr x10, .Lk_inv
+ adrp x10, .Lk_inv
+ add x10, x10, :lo12:.Lk_inv
movi v17.16b, #0x0f
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo
@@ -193,7 +197,8 @@ _vpaes_encrypt_preheat:
_vpaes_encrypt_core:
mov x9, $key
ldr w8, [$key,#240] // pull rounds
- adr x11, .Lk_mc_forward+16
+ adrp x11, .Lk_mc_forward+16
+ add x11, x11, :lo12:.Lk_mc_forward+16
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -280,7 +285,8 @@ vpaes_encrypt:
_vpaes_encrypt_2x:
mov x9, $key
ldr w8, [$key,#240] // pull rounds
- adr x11, .Lk_mc_forward+16
+ adrp x11, .Lk_mc_forward+16
+ add x11, x11, :lo12:.Lk_mc_forward+16
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -383,9 +389,11 @@ _vpaes_encrypt_2x:
.type _vpaes_decrypt_preheat,%function
.align 4
_vpaes_decrypt_preheat:
- adr x10, .Lk_inv
+ adrp x10, .Lk_inv
+ add x10, x10, :lo12:.Lk_inv
movi v17.16b, #0x0f
- adr x11, .Lk_dipt
+ adrp x11, .Lk_dipt
+ add x11, x11, :lo12:.Lk_dipt
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
ld1 {v20.2d-v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo
ld1 {v24.2d-v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd
@@ -407,10 +415,12 @@ _vpaes_decrypt_core:
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
eor x11, x11, #0x30 // xor \$0x30, %r11
- adr x10, .Lk_sr
+ adrp x10, .Lk_sr
+ add x10, x10, :lo12:.Lk_sr
and x11, x11, #0x30 // and \$0x30, %r11
add x11, x11, x10
- adr x10, .Lk_mc_forward+48
+ adrp x10, .Lk_mc_forward+48
+ add x10, x10, :lo12:.Lk_mc_forward+48
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -518,10 +528,12 @@ _vpaes_decrypt_2x:
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
eor x11, x11, #0x30 // xor \$0x30, %r11
- adr x10, .Lk_sr
+ adrp x10, .Lk_sr
+ add x10, x10, :lo12:.Lk_sr
and x11, x11, #0x30 // and \$0x30, %r11
add x11, x11, x10
- adr x10, .Lk_mc_forward+48
+ adrp x10, .Lk_mc_forward+48
+ add x10, x10, :lo12:.Lk_mc_forward+48
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
@@ -657,14 +669,18 @@ $code.=<<___;
.type _vpaes_key_preheat,%function
.align 4
_vpaes_key_preheat:
- adr x10, .Lk_inv
+ adrp x10, .Lk_inv
+ add x10, x10, :lo12:.Lk_inv
movi v16.16b, #0x5b // .Lk_s63
- adr x11, .Lk_sb1
+ adrp x11, .Lk_sb1
+ add x11, x11, :lo12:.Lk_sb1
movi v17.16b, #0x0f // .Lk_s0F
ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt
- adr x10, .Lk_dksd
+ adrp x10, .Lk_dksd
+ add x10, x10, :lo12:.Lk_dksd
ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1
- adr x11, .Lk_mc_forward
+ adrp x11, .Lk_mc_forward
+ add x11, x11, :lo12:.Lk_mc_forward
ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb
ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9
ld1 {v8.2d}, [x10] // .Lk_rcon
@@ -688,7 +704,8 @@ _vpaes_schedule_core:
bl _vpaes_schedule_transform
mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7
- adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10
+ adrp x10, .Lk_sr // lea .Lk_sr(%rip),%r10
+ add x10, x10, :lo12:.Lk_sr
add x8, x8, x10
cbnz $dir, .Lschedule_am_decrypting
@@ -814,12 +831,14 @@ _vpaes_schedule_core:
.align 4
.Lschedule_mangle_last:
// schedule last round key from xmm0
- adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
+ adrp x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
+ add x11, x11, :lo12:.Lk_deskew
cbnz $dir, .Lschedule_mangle_last_dec
// encrypting
ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1
- adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
+ adrp x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
+ add x11, x11, :lo12:.Lk_opt
add $out, $out, #32 // add \$32, %rdx
tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute