143 lines
3.8 KiB
ArmAsm
143 lines
3.8 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
|
|
*
|
|
* Copyright (C) 2013 - 2017 Linaro Ltd.
|
|
* Copyright (C) 2024 Google LLC
|
|
*
|
|
* Author: Ard Biesheuvel <ardb@kernel.org>
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
.text
|
|
.arch armv8-a+crypto
|
|
|
|
.macro load_round_keys, rk, nr, tmp
|
|
sub w\tmp, \nr, #10
|
|
add \tmp, \rk, w\tmp, sxtw #4
|
|
ld1 {v10.4s-v13.4s}, [\rk]
|
|
ld1 {v14.4s-v17.4s}, [\tmp], #64
|
|
ld1 {v18.4s-v21.4s}, [\tmp], #64
|
|
ld1 {v3.4s-v5.4s}, [\tmp]
|
|
.endm
|
|
|
|
.macro dround, va, vb, vk
|
|
aese \va\().16b, \vk\().16b
|
|
aesmc \va\().16b, \va\().16b
|
|
aese \vb\().16b, \vk\().16b
|
|
aesmc \vb\().16b, \vb\().16b
|
|
.endm
|
|
|
|
.macro aes_encrypt, va, vb, nr
|
|
tbz \nr, #2, .L\@
|
|
dround \va, \vb, v10
|
|
dround \va, \vb, v11
|
|
tbz \nr, #1, .L\@
|
|
dround \va, \vb, v12
|
|
dround \va, \vb, v13
|
|
.L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
|
|
dround \va, \vb, \v
|
|
.endr
|
|
aese \va\().16b, v4.16b
|
|
aese \vb\().16b, v4.16b
|
|
.endm
|
|
|
|
.macro aes_ccm_do_crypt,enc
|
|
load_round_keys x3, w4, x10
|
|
|
|
ld1 {v0.16b}, [x5] /* load mac */
|
|
cbz x2, ce_aes_ccm_final
|
|
ldr x8, [x6, #8] /* load lower ctr */
|
|
CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
|
|
0: /* outer loop */
|
|
ld1 {v1.8b}, [x6] /* load upper ctr */
|
|
prfm pldl1strm, [x1]
|
|
add x8, x8, #1
|
|
rev x9, x8
|
|
ins v1.d[1], x9 /* no carry in lower ctr */
|
|
|
|
aes_encrypt v0, v1, w4
|
|
|
|
subs w2, w2, #16
|
|
bmi ce_aes_ccm_crypt_tail
|
|
ld1 {v2.16b}, [x1], #16 /* load next input block */
|
|
.if \enc == 1
|
|
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
|
|
eor v6.16b, v1.16b, v2.16b /* xor with crypted ctr */
|
|
.else
|
|
eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
|
|
eor v6.16b, v2.16b, v5.16b /* final round enc */
|
|
.endif
|
|
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
|
|
st1 {v6.16b}, [x0], #16 /* write output block */
|
|
bne 0b
|
|
CPU_LE( rev x8, x8 )
|
|
str x8, [x6, #8] /* store lsb end of ctr (BE) */
|
|
cbnz x7, ce_aes_ccm_final
|
|
st1 {v0.16b}, [x5] /* store mac */
|
|
ret
|
|
.endm
|
|
|
|
SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
|
|
eor v0.16b, v0.16b, v5.16b /* final round mac */
|
|
eor v1.16b, v1.16b, v5.16b /* final round enc */
|
|
|
|
add x1, x1, w2, sxtw /* rewind the input pointer (w2 < 0) */
|
|
add x0, x0, w2, sxtw /* rewind the output pointer */
|
|
|
|
adr_l x8, .Lpermute /* load permute vectors */
|
|
add x9, x8, w2, sxtw
|
|
sub x8, x8, w2, sxtw
|
|
ld1 {v7.16b-v8.16b}, [x9]
|
|
ld1 {v9.16b}, [x8]
|
|
|
|
ld1 {v2.16b}, [x1] /* load a full block of input */
|
|
tbl v1.16b, {v1.16b}, v7.16b /* move keystream to end of register */
|
|
eor v7.16b, v2.16b, v1.16b /* encrypt partial input block */
|
|
bif v2.16b, v7.16b, v22.16b /* select plaintext */
|
|
tbx v7.16b, {v6.16b}, v8.16b /* insert output from previous iteration */
|
|
tbl v2.16b, {v2.16b}, v9.16b /* copy plaintext to start of v2 */
|
|
eor v0.16b, v0.16b, v2.16b /* fold plaintext into mac */
|
|
|
|
st1 {v7.16b}, [x0] /* store output block */
|
|
cbz x7, 0f
|
|
|
|
SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
|
|
ld1 {v1.16b}, [x7] /* load 1st ctriv */
|
|
|
|
aes_encrypt v0, v1, w4
|
|
|
|
/* final round key cancels out */
|
|
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
|
|
0: st1 {v0.16b}, [x5] /* store result */
|
|
ret
|
|
SYM_FUNC_END(ce_aes_ccm_crypt_tail)
|
|
|
|
/*
|
|
* void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
|
|
* u8 const rk[], u32 rounds, u8 mac[],
|
|
* u8 ctr[], u8 const final_iv[]);
|
|
* void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
|
|
* u8 const rk[], u32 rounds, u8 mac[],
|
|
* u8 ctr[], u8 const final_iv[]);
|
|
*/
|
|
SYM_FUNC_START(ce_aes_ccm_encrypt)
|
|
movi v22.16b, #255
|
|
aes_ccm_do_crypt 1
|
|
SYM_FUNC_END(ce_aes_ccm_encrypt)
|
|
|
|
SYM_FUNC_START(ce_aes_ccm_decrypt)
|
|
movi v22.16b, #0
|
|
aes_ccm_do_crypt 0
|
|
SYM_FUNC_END(ce_aes_ccm_decrypt)
|
|
|
|
.section ".rodata", "a"
|
|
.align 6
|
|
.fill 15, 1, 0xff
|
|
.Lpermute:
|
|
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
|
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
|
.fill 15, 1, 0xff
|