752 lines
16 KiB
ArmAsm
752 lines
16 KiB
ArmAsm
# Copyright (c) (2015,2016,2019) Apple Inc. All rights reserved.
|
||
#
|
||
# corecrypto is licensed under Apple Inc.’s Internal Use License Agreement (which
|
||
# is contained in the License.txt file distributed with corecrypto) and only to
|
||
# people who accept that license. IMPORTANT: Any license rights granted to you by
|
||
# Apple Inc. (if any) are limited to internal use within your organization only on
|
||
# devices and computers you own or control, for the sole purpose of verifying the
|
||
# security characteristics and correct functioning of the Apple Software. You may
|
||
# not, directly or indirectly, redistribute the Apple Software or any portions thereof.
|
||
|
||
|
||
#if !defined(__arm64__) && defined(__ARM_NEON__)
|
||
|
||
#define ekey r2
|
||
#define eax r4
|
||
|
||
.macro save_all_neon
|
||
#if BUILDKERNEL
|
||
vstmdb sp!, {q12-q15}
|
||
vstmdb sp!, {q8-q11}
|
||
vstmdb sp!, {q0-q3}
|
||
#endif
|
||
vstmdb sp!, {q4-q7}
|
||
.endm
|
||
|
||
.macro restore_all_neon
|
||
vldmia sp!, {q4-q7}
|
||
#if BUILDKERNEL
|
||
vldmia sp!, {q0-q3}
|
||
vldmia sp!, {q8-q11}
|
||
vldmia sp!, {q12-q15}
|
||
#endif
|
||
.endm
|
||
|
||
.macro vpaes_push
|
||
push {r4-r7,lr}
|
||
add r7, sp, #12
|
||
push {r8,r10,r11}
|
||
.endm
|
||
|
||
.macro vpaes_pop
|
||
pop {r8,r10,r11}
|
||
pop {r4-r7,pc}
|
||
.endm
|
||
|
||
.p2align 6
|
||
.Lk_ipt:
|
||
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
|
||
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
|
||
|
||
.Lk_sbo:
|
||
.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878
|
||
.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
|
||
|
||
.Lk_mc_forward:
|
||
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
|
||
.quad 0x080B0A0904070605, 0x000302010C0F0E0D
|
||
.quad 0x0C0F0E0D080B0A09, 0x0407060500030201
|
||
.quad 0x000302010C0F0E0D, 0x080B0A0904070605
|
||
|
||
.Lk_mc_backward:
|
||
.quad 0x0605040702010003, 0x0E0D0C0F0A09080B
|
||
.quad 0x020100030E0D0C0F, 0x0A09080B06050407
|
||
.quad 0x0E0D0C0F0A09080B, 0x0605040702010003
|
||
.quad 0x0A09080B06050407, 0x020100030E0D0C0F
|
||
|
||
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
|
||
.quad 0x030E09040F0A0500, 0x0B06010C07020D08
|
||
.quad 0x0F060D040B020900, 0x070E050C030A0108
|
||
.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
|
||
|
||
|
||
.p2align 4
|
||
vpaes_encrypt_core:
|
||
mov r9, ekey
|
||
mov r11, #16
|
||
adr r10, .Lk_ipt
|
||
ldr eax, [ekey, #240]
|
||
vldmia r10!,{q3-q4}
|
||
vbic q1, q0, q9
|
||
vld1.8 {q5}, [r9]!
|
||
vshr.u32 q1, q1, #4
|
||
vand q0, q0, q9
|
||
|
||
vtbl.8 d4, {q3}, d0
|
||
vtbl.8 d5, {q3}, d1
|
||
|
||
adr r10, .Lk_mc_backward
|
||
|
||
vtbl.8 d0, {q4}, d2
|
||
vtbl.8 d1, {q4}, d3
|
||
veor q2, q2, q5
|
||
veor q0, q0, q2
|
||
cmp eax, #0
|
||
b .Lenc_entry
|
||
|
||
.p2align 4
|
||
.Lenc_loop:
|
||
|
||
vtbl.8 d8, {q13}, d4
|
||
vtbl.8 d9, {q13}, d5
|
||
vtbl.8 d0, {q12}, d6
|
||
vtbl.8 d1, {q12}, d7
|
||
veor q4, q4, q5
|
||
add r12, r10, r11
|
||
veor q5, q0, q4
|
||
vld1.8 {q4}, [r12 :128]
|
||
sub r12, r12, #64
|
||
vtbl.8 d12, {q15}, d4
|
||
vtbl.8 d13, {q15}, d5
|
||
vld1.8 {q1}, [r12 :128]
|
||
|
||
vtbl.8 d4, {q14}, d6
|
||
vtbl.8 d5, {q14}, d7
|
||
|
||
veor q2, q2, q6
|
||
|
||
vtbl.8 d6, {q5}, d8
|
||
vtbl.8 d7, {q5}, d9
|
||
vtbl.8 d0, {q5}, d2
|
||
vtbl.8 d1, {q5}, d3
|
||
veor q5, q0, q2
|
||
|
||
add r11, r11, #16
|
||
veor q3, q3, q5
|
||
vtbl.8 d0, {q5}, d2
|
||
vtbl.8 d1, {q5}, d3
|
||
and r11, r11, #48
|
||
subs eax, eax, #1
|
||
veor q0, q0, q3
|
||
|
||
.Lenc_entry:
|
||
|
||
|
||
vbic q1, q0, q9
|
||
vand q0, q0, q9
|
||
vshr.u32 q1, q1, #4
|
||
|
||
vtbl.8 d10, {q11}, d0
|
||
vtbl.8 d11, {q11}, d1
|
||
|
||
veor q0, q0, q1
|
||
|
||
vtbl.8 d6, {q10}, d2
|
||
vtbl.8 d7, {q10}, d3
|
||
vtbl.8 d8, {q10}, d0
|
||
vtbl.8 d9, {q10}, d1
|
||
|
||
veor q3, q3, q5
|
||
veor q4, q4, q5
|
||
|
||
vtbl.8 d4, {q10}, d6
|
||
vtbl.8 d5, {q10}, d7
|
||
vtbl.8 d6, {q10}, d8
|
||
vtbl.8 d7, {q10}, d9
|
||
|
||
veor q2, q2, q0
|
||
veor q3, q3, q1
|
||
|
||
vld1.8 {q5}, [r9]!
|
||
bgt .Lenc_loop
|
||
|
||
adr r12, .Lk_sbo
|
||
|
||
vld1.8 {q1}, [r12]!
|
||
vtbl.8 d8, {q1}, d4
|
||
vtbl.8 d9, {q1}, d5
|
||
vld1.8 {q2}, [r12]
|
||
add r12, r10, r11
|
||
veor q4, q4, q5
|
||
add r12, r12, #64
|
||
vtbl.8 d0, {q2}, d6
|
||
vtbl.8 d1, {q2}, d7
|
||
vld1.8 {q1}, [r12]
|
||
veor q2, q0, q4
|
||
vtbl.8 d0, {q2}, d2
|
||
vtbl.8 d1, {q2}, d3
|
||
bx lr
|
||
|
||
|
||
.p2align 4
|
||
.Lk_dipt:
|
||
.quad 0x0F505B040B545F00, 0x154A411E114E451A
|
||
.quad 0x86E383E660056500, 0x12771772F491F194
|
||
.quad 0x000302010C0F0E0D, 0x080B0A0904070605 // .Lk_mc_forward+48
|
||
|
||
.Lk_dsb9:
|
||
.quad 0x851C03539A86D600, 0xCAD51F504F994CC9
|
||
.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565
|
||
.Lk_dsbd:
|
||
.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
|
||
.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
|
||
.Lk_dsbb:
|
||
.quad 0xD022649296B44200, 0x602646F6B0F2D404
|
||
.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
|
||
.Lk_dsbe:
|
||
.quad 0x46F2929626D4D000, 0x2242600464B4F6B0
|
||
.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32
|
||
.Lk_dsbo:
|
||
.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
|
||
.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C
|
||
|
||
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
|
||
.quad 0x0F060D040B020900, 0x070E050C030A0108
|
||
|
||
|
||
.p2align 4
|
||
vpaes_decrypt_core:
|
||
mov r9, r2 // dkey
|
||
ldr eax, [r2, #240] // Nr
|
||
adr r12, .Lk_dipt
|
||
vbic q1, q0, q9
|
||
vld1.64 {q3}, [r12 :128]!
|
||
vshr.u32 q1, q1, #4
|
||
vld1.8 {q5}, [r9]!
|
||
lsl r11, eax, #4
|
||
vand q2, q0, q9
|
||
vtbl.8 d4, {q3}, d4
|
||
vtbl.8 d5, {q3}, d5
|
||
vld1.64 {q4}, [r12 :128]!
|
||
eor r11, r11, #48
|
||
adr r10, .Lk_dsbd
|
||
vtbl.8 d0, {q4}, d2
|
||
vtbl.8 d1, {q4}, d3
|
||
and r11, r11, #48
|
||
veor q2, q2, q5
|
||
vld1.64 {q5}, [r12 :128]!
|
||
veor q0, q0, q2
|
||
cmp eax, #0
|
||
b .Ldec_entry
|
||
|
||
.p2align 4
|
||
.Ldec_loop:
|
||
|
||
sub r12, r10, 32
|
||
vld1.64 {q6-q7}, [r12 :128]!
|
||
vtbl.8 d8, {q6}, d4
|
||
vtbl.8 d9, {q6}, d5
|
||
vtbl.8 d2, {q7}, d6
|
||
vtbl.8 d3, {q7}, d7
|
||
vld1.64 {q6-q7}, [r12 :128]!
|
||
veor q0, q0, q4
|
||
vtbl.8 d8, {q6}, d4
|
||
vtbl.8 d9, {q6}, d5
|
||
veor q6, q0, q1
|
||
vtbl.8 d2, {q7}, d6
|
||
vtbl.8 d3, {q7}, d7
|
||
vtbl.8 d0, {q6}, d10
|
||
vtbl.8 d1, {q6}, d11
|
||
vld1.64 {q6-q7}, [r12 :128]!
|
||
|
||
veor q0, q0, q4
|
||
vtbl.8 d8, {q6}, d4
|
||
vtbl.8 d9, {q6}, d5
|
||
veor q6, q0, q1
|
||
vtbl.8 d2, {q7}, d6
|
||
vtbl.8 d3, {q7}, d7
|
||
vtbl.8 d0, {q6}, d10
|
||
vtbl.8 d1, {q6}, d11
|
||
vld1.64 {q6-q7}, [r12 :128]!
|
||
|
||
veor q0, q0, q4
|
||
vtbl.8 d8, {q6}, d4
|
||
vtbl.8 d9, {q6}, d5
|
||
veor q6, q0, q1
|
||
vtbl.8 d2, {q7}, d6
|
||
vtbl.8 d3, {q7}, d7
|
||
vtbl.8 d0, {q6}, d10
|
||
vtbl.8 d1, {q6}, d11
|
||
|
||
veor q0, q0, q4
|
||
|
||
vext.8 q5, q5, q5, #12
|
||
veor q0, q0, q1
|
||
subs eax, eax, #1
|
||
|
||
.Ldec_entry:
|
||
|
||
vbic q1, q0, q9
|
||
vand q0, q0, q9
|
||
vshr.u32 q1, q1, #4
|
||
vtbl.8 d4, {q11}, d0
|
||
vtbl.8 d5, {q11}, d1
|
||
|
||
|
||
veor q0, q0, q1
|
||
vtbl.8 d6, {q10}, d2
|
||
vtbl.8 d7, {q10}, d3
|
||
|
||
|
||
veor q3, q3, q2
|
||
vtbl.8 d8, {q10}, d0
|
||
vtbl.8 d9, {q10}, d1
|
||
|
||
veor q4, q4, q2
|
||
vtbl.8 d4, {q10}, d6
|
||
vtbl.8 d5, {q10}, d7
|
||
|
||
veor q2, q2, q0
|
||
vtbl.8 d6, {q10}, d8
|
||
vtbl.8 d7, {q10}, d9
|
||
|
||
vld1.8 {q0}, [r9]!
|
||
veor q3, q3, q1
|
||
bne .Ldec_loop
|
||
|
||
vld1.64 {q6-q7}, [r12 :128]!
|
||
|
||
vtbl.8 d8, {q6}, d4
|
||
vtbl.8 d9, {q6}, d5
|
||
add r12, r12, r11, lsr #1
|
||
vtbl.8 d6, {q7}, d6
|
||
vtbl.8 d7, {q7}, d7
|
||
vld1.64 {q2}, [r12]
|
||
veor q0, q0, q4
|
||
veor q1, q0, q3
|
||
|
||
vtbl.8 d0, {q1}, d4
|
||
vtbl.8 d1, {q1}, d5
|
||
bx lr
|
||
|
||
.p2align 6
|
||
.Lk_ipt2:
|
||
.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808
|
||
.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
|
||
.Lk_rcon:
|
||
.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
|
||
.Lk_sr:
|
||
.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908
|
||
.quad 0x030E09040F0A0500, 0x0B06010C07020D08
|
||
.quad 0x0F060D040B020900, 0x070E050C030A0108
|
||
.quad 0x0B0E0104070A0D00, 0x0306090C0F020508
|
||
|
||
|
||
.p2align 4
|
||
vpaes_schedule_core:
|
||
bl vpaes_preheat
|
||
adr r10, .Lk_rcon
|
||
vld1.8 {q0}, [r0]
|
||
vld1.64 {q8}, [r10 :128]!
|
||
vmov q3, q0
|
||
adr r11, .Lk_ipt2
|
||
bl vpaes_schedule_transform
|
||
vmov q7, q0
|
||
|
||
cmp r3, #0
|
||
bne .Lschedule_am_decrypting
|
||
|
||
vst1.8 {q0}, [r2]
|
||
|
||
b .Lschedule_go
|
||
|
||
.Lschedule_am_decrypting:
|
||
|
||
add r12, r10, r8
|
||
vmov q1, q3
|
||
vld1.8 {q3}, [r12]
|
||
vtbl.8 d6, {q1}, d6
|
||
vtbl.8 d7, {q1}, d7
|
||
eor r8, r8, #48
|
||
vst1.8 {q3}, [r2]
|
||
|
||
|
||
.Lschedule_go:
|
||
cmp r1, #192
|
||
bgt .Lschedule_256
|
||
beq .Lschedule_192
|
||
|
||
.Lschedule_128:
|
||
mov r1, #10
|
||
|
||
.Loop_schedule_128:
|
||
bl vpaes_schedule_round
|
||
subs r1, r1, #1
|
||
beq .Lschedule_mangle_last
|
||
bl vpaes_schedule_mangle
|
||
b .Loop_schedule_128
|
||
|
||
.p2align 4
|
||
.Lschedule_192:
|
||
add r12, r0, #8
|
||
vld1.8 {q0}, [r12]
|
||
bl vpaes_schedule_transform
|
||
vmov d13, d1
|
||
veor d12, d12, d12
|
||
mov r1, #4
|
||
|
||
.Loop_schedule_192:
|
||
bl vpaes_schedule_round
|
||
vext.8 q0, q6, q0, #8
|
||
|
||
bl vpaes_schedule_mangle
|
||
bl vpaes_schedule_192_smear
|
||
bl vpaes_schedule_mangle
|
||
bl vpaes_schedule_round
|
||
subs r1, r1, #1
|
||
beq .Lschedule_mangle_last
|
||
bl vpaes_schedule_mangle
|
||
bl vpaes_schedule_192_smear
|
||
b .Loop_schedule_192
|
||
|
||
.p2align 4
|
||
.Lschedule_256:
|
||
add r12, r0, #16
|
||
vld1.8 {q0}, [r12]
|
||
bl vpaes_schedule_transform
|
||
mov r1, #7
|
||
|
||
.Loop_schedule_256:
|
||
bl vpaes_schedule_mangle
|
||
vmov q6, q0
|
||
|
||
bl vpaes_schedule_round
|
||
subs r1, r1, #1
|
||
beq .Lschedule_mangle_last
|
||
bl vpaes_schedule_mangle
|
||
|
||
vdup.32 q0, d1[1]
|
||
vmov q5, q7
|
||
vmov q7, q6
|
||
bl vpaes_schedule_low_round
|
||
vmov q7, q5
|
||
|
||
b .Loop_schedule_256
|
||
|
||
.p2align 4
|
||
.Lk_opt:
|
||
.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808
|
||
.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
|
||
|
||
.Lk_deskew:
|
||
.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
|
||
.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
|
||
|
||
.p2align 4
|
||
.Lschedule_mangle_last:
|
||
|
||
adr r11, .Lk_deskew
|
||
cmp r3, #0
|
||
bne .Lschedule_mangle_last_dec
|
||
|
||
add r12, r8, r10
|
||
vld1.8 {q1}, [r12]
|
||
adr r11, .Lk_opt
|
||
vtbl.8 d2, {q0}, d2
|
||
vtbl.8 d3, {q0}, d3
|
||
vmov q0, q1
|
||
add r2, r2, #32
|
||
|
||
.Lschedule_mangle_last_dec:
|
||
adr r12, .Lk_s63
|
||
sub r2, r2, #16
|
||
vld1.8 {q1}, [r12]
|
||
veor q0, q0, q1
|
||
bl vpaes_schedule_transform
|
||
vst1.8 {q0}, [r2]
|
||
|
||
restore_all_neon
|
||
|
||
eor r0, r0, r0
|
||
vpaes_pop
|
||
|
||
|
||
.p2align 4
|
||
vpaes_schedule_192_smear:
|
||
vdup.32 q1, d12[0]
|
||
vdup.32 q0, d15[1]
|
||
vmov s7, s26
|
||
vmov s0, s30
|
||
veor q6, q6, q1
|
||
veor q6, q6, q0
|
||
vmov q0, q6
|
||
veor d12, d12, d12
|
||
bx lr
|
||
|
||
|
||
.p2align 4
|
||
vpaes_schedule_round:
|
||
|
||
veor q1, q1, q1
|
||
vext.8 q1, q8, q1, #15
|
||
vext.8 q8, q8, q8, #15
|
||
veor q7, q7, q1
|
||
vdup.32 q0, d1[1]
|
||
vext.8 q0, q0, q0, #1
|
||
|
||
vpaes_schedule_low_round:
|
||
|
||
veor q1, q1, q1
|
||
adr r12, .Lk_s63
|
||
vext.8 q1, q1, q7, #12
|
||
veor q2, q2, q2
|
||
veor q7, q7, q1
|
||
vld1.8 {q1}, [r12]
|
||
vext.8 q2, q2, q7, #8
|
||
veor q7, q7, q1
|
||
veor q7, q7, q2
|
||
|
||
|
||
vbic q1, q0, q9
|
||
vshr.u32 q1, q1, #4
|
||
vand q0, q0, q9
|
||
|
||
vtbl.8 d4, {q11}, d0
|
||
vtbl.8 d5, {q11}, d1
|
||
|
||
veor q0, q0, q1
|
||
|
||
vtbl.8 d6, {q10}, d2
|
||
vtbl.8 d7, {q10}, d3
|
||
|
||
veor q3, q3, q2
|
||
|
||
vtbl.8 d8, {q10}, d0
|
||
vtbl.8 d9, {q10}, d1
|
||
|
||
veor q4, q4, q2
|
||
|
||
vtbl.8 d4, {q10}, d6
|
||
vtbl.8 d5, {q10}, d7
|
||
|
||
veor q2, q2, q0
|
||
|
||
|
||
vtbl.8 d6, {q10}, d8
|
||
vtbl.8 d7, {q10}, d9
|
||
|
||
veor q3, q3, q1
|
||
|
||
vtbl.8 d8, {q13}, d4
|
||
vtbl.8 d9, {q13}, d5
|
||
|
||
vtbl.8 d0, {q12}, d6
|
||
vtbl.8 d1, {q12}, d7
|
||
|
||
veor q0, q0, q4
|
||
veor q0, q0, q7
|
||
vmov q7, q0
|
||
|
||
bx lr
|
||
|
||
.p2align 4
|
||
vpaes_schedule_transform:
|
||
vbic q1, q0, q9
|
||
vldmia r11, {q4-q5}
|
||
vand q0, q0, q9
|
||
vshr.u32 q1, q1, #4
|
||
vtbl.8 d0, {q4}, d0
|
||
vtbl.8 d1, {q4}, d1
|
||
vtbl.8 d2, {q5}, d2
|
||
vtbl.8 d3, {q5}, d3
|
||
veor q0, q0, q1
|
||
bx lr
|
||
|
||
|
||
.p2align 4
|
||
.Lk_mc_forward2:
|
||
.quad 0x0407060500030201, 0x0C0F0E0D080B0A09
|
||
.Lk_s63:
|
||
.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B
|
||
|
||
.Lk_dksd:
|
||
.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
|
||
.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E
|
||
.Lk_dksb:
|
||
.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99
|
||
.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
|
||
.Lk_dkse:
|
||
.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086
|
||
.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487
|
||
.Lk_dks9:
|
||
.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC
|
||
.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE
|
||
|
||
.p2align 4
|
||
vpaes_schedule_mangle:
|
||
vstmdb sp!, {q6-q7}
|
||
adr r12, .Lk_mc_forward2
|
||
vmov q4, q0
|
||
cmp r3, #0
|
||
vldmia r12!, {q5-q6} // q5 = Lk_mc_forward2, q6 = Lk_s63
|
||
bne .Lschedule_mangle_dec
|
||
add r2, r2, #16
|
||
veor q4, q4, q6
|
||
|
||
vtbl.8 d6, {q4}, d10
|
||
vtbl.8 d7, {q4}, d11
|
||
vtbl.8 d8, {q3}, d10
|
||
vtbl.8 d9, {q3}, d11
|
||
vtbl.8 d2, {q4}, d10
|
||
vtbl.8 d3, {q4}, d11
|
||
veor q3, q3, q4
|
||
veor q3, q3, q1
|
||
b .Lschedule_mangle_both
|
||
|
||
.p2align 4
|
||
.Lschedule_mangle_dec:
|
||
|
||
vbic q1, q4, q9
|
||
vldmia r12!, {q6-q7}
|
||
vshr.u32 q1, q1, #4
|
||
vand q4, q4, q9
|
||
|
||
vtbl.8 d4, {q6}, d8
|
||
vtbl.8 d5, {q6}, d9
|
||
vtbl.8 d6, {q7}, d2
|
||
vtbl.8 d7, {q7}, d3
|
||
vldmia r12!, {q6-q7}
|
||
veor q2, q3, q2
|
||
vtbl.8 d6, {q2}, d10
|
||
vtbl.8 d7, {q2}, d11
|
||
|
||
|
||
vtbl.8 d4, {q6}, d8
|
||
vtbl.8 d5, {q6}, d9
|
||
veor q2, q2, q3
|
||
vtbl.8 d6, {q7}, d2
|
||
vtbl.8 d7, {q7}, d3
|
||
vldmia r12!, {q6-q7}
|
||
veor q2, q3, q2
|
||
vtbl.8 d6, {q2}, d10
|
||
vtbl.8 d7, {q2}, d11
|
||
|
||
vtbl.8 d4, {q6}, d8
|
||
vtbl.8 d5, {q6}, d9
|
||
veor q2, q2, q3
|
||
vtbl.8 d6, {q7}, d2
|
||
vtbl.8 d7, {q7}, d3
|
||
vldmia r12!, {q6-q7}
|
||
veor q2, q3, q2
|
||
vtbl.8 d6, {q2}, d10
|
||
vtbl.8 d7, {q2}, d11
|
||
|
||
vtbl.8 d4, {q6}, d8
|
||
vtbl.8 d5, {q6}, d9
|
||
veor q2, q2, q3
|
||
vtbl.8 d6, {q7}, d2
|
||
vtbl.8 d7, {q7}, d3
|
||
veor q3, q3, q2
|
||
|
||
sub r2, r2, #16
|
||
|
||
.Lschedule_mangle_both:
|
||
add r12, r10, r8
|
||
vld1.8 {q1}, [r12]
|
||
sub r8, r8, #16
|
||
vtbl.8 d4, {q3}, d2
|
||
vtbl.8 d5, {q3}, d3
|
||
and r8, r8, #48
|
||
vst1.8 {q2}, [r2]
|
||
vldmia sp!, {q6-q7}
|
||
bx lr
|
||
|
||
|
||
|
||
|
||
/*
|
||
int vpaes_set_encrypt_key(const uint8_t *userKey, int bits, void *key);
|
||
*/
|
||
|
||
#define userKey r0
|
||
#define AES_bits r1
|
||
#define key r2
|
||
#define t r12
|
||
.globl _AccelerateCrypto_vpaes_set_encrypt_key
|
||
.p2align 4
|
||
_AccelerateCrypto_vpaes_set_encrypt_key:
|
||
|
||
|
||
// 128/192/256 divide by 32 = 4/6/8 + 5 - 9/11/13
|
||
lsr t, AES_bits, #5
|
||
vpaes_push
|
||
mov r11, t
|
||
save_all_neon
|
||
add t, r11, #5
|
||
mov r3, #0
|
||
str t, [key, #240]
|
||
mov r8, #48
|
||
b vpaes_schedule_core
|
||
|
||
.globl _AccelerateCrypto_vpaes_set_decrypt_key
|
||
.p2align 4
|
||
_AccelerateCrypto_vpaes_set_decrypt_key:
|
||
lsr t, AES_bits, #5
|
||
vpaes_push
|
||
mov r11, t
|
||
save_all_neon
|
||
mov r8, #32
|
||
add t, r11, #5
|
||
and r8, r8, AES_bits, lsr #1
|
||
mov r3, #1
|
||
str t, [key, #240]
|
||
add key, key, #16
|
||
eor r8, r8, #32
|
||
add key, key, t, lsl #4
|
||
b vpaes_schedule_core
|
||
|
||
/*
|
||
void vpaes_encrypt(const unsigned char *in, unsigned char *out, const AES_KEY *key);
|
||
*/
|
||
#define in r0
|
||
#define out r1
|
||
#define key r2
|
||
|
||
.globl _AccelerateCrypto_vpaes_encrypt
|
||
.p2align 4
|
||
_AccelerateCrypto_vpaes_encrypt:
|
||
vpaes_push
|
||
save_all_neon
|
||
vld1.8 {q0}, [in]
|
||
bl vpaes_preheat
|
||
bl vpaes_encrypt_core
|
||
vst1.8 {q0}, [out]
|
||
restore_all_neon
|
||
eor r0, r0 // return 0 for SUCCESS
|
||
vpaes_pop
|
||
|
||
.globl _AccelerateCrypto_vpaes_decrypt
|
||
.p2align 4
|
||
_AccelerateCrypto_vpaes_decrypt:
|
||
vpaes_push
|
||
save_all_neon
|
||
vld1.8 {q0}, [in]
|
||
bl vpaes_preheat
|
||
bl vpaes_decrypt_core
|
||
vst1.8 {q0}, [out]
|
||
restore_all_neon
|
||
eor r0, r0 // return 0 for SUCCESS
|
||
vpaes_pop
|
||
|
||
.p2align 4
|
||
vpaes_preheat:
|
||
adr r12, .Lk_s0F
|
||
vldmia r12, {q9-q15}
|
||
bx lr
|
||
|
||
.p2align 6
|
||
// the following 7 16-bytes words are loaded into
|
||
.Lk_s0F:
|
||
.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F
|
||
.Lk_inv:
|
||
.quad 0x0E05060F0D080180, 0x040703090A0B0C02
|
||
.quad 0x01040A060F0B0780, 0x030D0E0C02050809
|
||
.Lk_sb1:
|
||
.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
|
||
.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
|
||
.Lk_sb2:
|
||
.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A
|
||
.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD
|
||
|
||
#endif // !defined(__arm64__) && defined(__ARM_NEON__)
|