crypto: arm/poly1305 - incorporate OpenSSL/CRYPTOGAMS NEON implementation
This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for NEON authored by Andy Polyakov, and contributed by him to the OpenSSL project. The file 'poly1305-armv4.pl' is taken straight from this upstream GitHub repository [0] at commit ec55a08dc0244ce570c4fc7cade330c60798952f, and already contains all the changes required to build it as part of a Linux kernel module. [0] https://github.com/dot-asm/cryptogams Co-developed-by: Andy Polyakov <appro@cryptogams.org> Signed-off-by: Andy Polyakov <appro@cryptogams.org> Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
f569ca1647
commit
a6b803b3dd
|
@ -132,6 +132,11 @@ config CRYPTO_CHACHA20_NEON
|
|||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_ARCH_HAVE_LIB_CHACHA
|
||||
|
||||
config CRYPTO_POLY1305_ARM
|
||||
tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_ARCH_HAVE_LIB_POLY1305
|
||||
|
||||
config CRYPTO_NHPOLY1305_NEON
|
||||
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
|
|
|
@ -10,6 +10,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
|||
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
|
||||
obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
|
||||
|
@ -35,12 +36,16 @@ crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
|||
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
|
||||
chacha-neon-y := chacha-scalar-core.o chacha-glue.o
|
||||
chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
|
||||
poly1305-arm-y := poly1305-core.o poly1305-glue.o
|
||||
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
|
||||
|
||||
ifdef REGENERATE_ARM_CRYPTO
|
||||
quiet_cmd_perl = PERL $@
|
||||
cmd_perl = $(PERL) $(<) > $(@)
|
||||
|
||||
$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
|
||||
$(call cmd,perl)
|
||||
|
||||
|
@ -48,4 +53,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
|
|||
$(call cmd,perl)
|
||||
endif
|
||||
|
||||
clean-files += sha256-core.S sha512-core.S
|
||||
clean-files += poly1305-core.S sha256-core.S sha512-core.S
|
||||
|
||||
# massage the perlasm code a bit so we only get the NEON routine if we need it
|
||||
poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
|
||||
poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
|
||||
AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,276 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
|
||||
*
|
||||
* Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/poly1305.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
void poly1305_init_arm(void *state, const u8 *key);
|
||||
void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
|
||||
void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
|
||||
|
||||
void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
|
||||
{
|
||||
}
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
||||
|
||||
void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
|
||||
{
|
||||
poly1305_init_arm(&dctx->h, key);
|
||||
dctx->s[0] = get_unaligned_le32(key + 16);
|
||||
dctx->s[1] = get_unaligned_le32(key + 20);
|
||||
dctx->s[2] = get_unaligned_le32(key + 24);
|
||||
dctx->s[3] = get_unaligned_le32(key + 28);
|
||||
dctx->buflen = 0;
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_init_arch);
|
||||
|
||||
static int arm_poly1305_init(struct shash_desc *desc)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
dctx->buflen = 0;
|
||||
dctx->rset = 0;
|
||||
dctx->sset = false;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
|
||||
u32 len, u32 hibit, bool do_neon)
|
||||
{
|
||||
if (unlikely(!dctx->sset)) {
|
||||
if (!dctx->rset) {
|
||||
poly1305_init_arm(&dctx->h, src);
|
||||
src += POLY1305_BLOCK_SIZE;
|
||||
len -= POLY1305_BLOCK_SIZE;
|
||||
dctx->rset = 1;
|
||||
}
|
||||
if (len >= POLY1305_BLOCK_SIZE) {
|
||||
dctx->s[0] = get_unaligned_le32(src + 0);
|
||||
dctx->s[1] = get_unaligned_le32(src + 4);
|
||||
dctx->s[2] = get_unaligned_le32(src + 8);
|
||||
dctx->s[3] = get_unaligned_le32(src + 12);
|
||||
src += POLY1305_BLOCK_SIZE;
|
||||
len -= POLY1305_BLOCK_SIZE;
|
||||
dctx->sset = true;
|
||||
}
|
||||
if (len < POLY1305_BLOCK_SIZE)
|
||||
return;
|
||||
}
|
||||
|
||||
len &= ~(POLY1305_BLOCK_SIZE - 1);
|
||||
|
||||
if (static_branch_likely(&have_neon) && likely(do_neon))
|
||||
poly1305_blocks_neon(&dctx->h, src, len, hibit);
|
||||
else
|
||||
poly1305_blocks_arm(&dctx->h, src, len, hibit);
|
||||
}
|
||||
|
||||
static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
|
||||
const u8 *src, u32 len, bool do_neon)
|
||||
{
|
||||
if (unlikely(dctx->buflen)) {
|
||||
u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
|
||||
|
||||
memcpy(dctx->buf + dctx->buflen, src, bytes);
|
||||
src += bytes;
|
||||
len -= bytes;
|
||||
dctx->buflen += bytes;
|
||||
|
||||
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
|
||||
arm_poly1305_blocks(dctx, dctx->buf,
|
||||
POLY1305_BLOCK_SIZE, 1, false);
|
||||
dctx->buflen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(len >= POLY1305_BLOCK_SIZE)) {
|
||||
arm_poly1305_blocks(dctx, src, len, 1, do_neon);
|
||||
src += round_down(len, POLY1305_BLOCK_SIZE);
|
||||
len %= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (unlikely(len)) {
|
||||
dctx->buflen = len;
|
||||
memcpy(dctx->buf, src, len);
|
||||
}
|
||||
}
|
||||
|
||||
static int arm_poly1305_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
arm_poly1305_do_update(dctx, src, srclen, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
|
||||
const u8 *src,
|
||||
unsigned int srclen)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
bool do_neon = crypto_simd_usable() && srclen > 128;
|
||||
|
||||
if (static_branch_likely(&have_neon) && do_neon)
|
||||
kernel_neon_begin();
|
||||
arm_poly1305_do_update(dctx, src, srclen, do_neon);
|
||||
if (static_branch_likely(&have_neon) && do_neon)
|
||||
kernel_neon_end();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
|
||||
unsigned int nbytes)
|
||||
{
|
||||
bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
crypto_simd_usable();
|
||||
|
||||
if (unlikely(dctx->buflen)) {
|
||||
u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
|
||||
|
||||
memcpy(dctx->buf + dctx->buflen, src, bytes);
|
||||
src += bytes;
|
||||
nbytes -= bytes;
|
||||
dctx->buflen += bytes;
|
||||
|
||||
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
|
||||
poly1305_blocks_arm(&dctx->h, dctx->buf,
|
||||
POLY1305_BLOCK_SIZE, 1);
|
||||
dctx->buflen = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
|
||||
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
|
||||
|
||||
if (static_branch_likely(&have_neon) && do_neon) {
|
||||
kernel_neon_begin();
|
||||
poly1305_blocks_neon(&dctx->h, src, len, 1);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
poly1305_blocks_arm(&dctx->h, src, len, 1);
|
||||
}
|
||||
src += len;
|
||||
nbytes %= POLY1305_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (unlikely(nbytes)) {
|
||||
dctx->buflen = nbytes;
|
||||
memcpy(dctx->buf, src, nbytes);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_update_arch);
|
||||
|
||||
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
|
||||
{
|
||||
__le32 digest[4];
|
||||
u64 f = 0;
|
||||
|
||||
if (unlikely(dctx->buflen)) {
|
||||
dctx->buf[dctx->buflen++] = 1;
|
||||
memset(dctx->buf + dctx->buflen, 0,
|
||||
POLY1305_BLOCK_SIZE - dctx->buflen);
|
||||
poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
|
||||
}
|
||||
|
||||
poly1305_emit_arm(&dctx->h, digest, dctx->s);
|
||||
|
||||
/* mac = (h + s) % (2^128) */
|
||||
f = (f >> 32) + le32_to_cpu(digest[0]);
|
||||
put_unaligned_le32(f, dst);
|
||||
f = (f >> 32) + le32_to_cpu(digest[1]);
|
||||
put_unaligned_le32(f, dst + 4);
|
||||
f = (f >> 32) + le32_to_cpu(digest[2]);
|
||||
put_unaligned_le32(f, dst + 8);
|
||||
f = (f >> 32) + le32_to_cpu(digest[3]);
|
||||
put_unaligned_le32(f, dst + 12);
|
||||
|
||||
*dctx = (struct poly1305_desc_ctx){};
|
||||
}
|
||||
EXPORT_SYMBOL(poly1305_final_arch);
|
||||
|
||||
static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
|
||||
{
|
||||
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
|
||||
|
||||
if (unlikely(!dctx->sset))
|
||||
return -ENOKEY;
|
||||
|
||||
poly1305_final_arch(dctx, dst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg arm_poly1305_algs[] = {{
|
||||
.init = arm_poly1305_init,
|
||||
.update = arm_poly1305_update,
|
||||
.final = arm_poly1305_final,
|
||||
.digestsize = POLY1305_DIGEST_SIZE,
|
||||
.descsize = sizeof(struct poly1305_desc_ctx),
|
||||
|
||||
.base.cra_name = "poly1305",
|
||||
.base.cra_driver_name = "poly1305-arm",
|
||||
.base.cra_priority = 150,
|
||||
.base.cra_blocksize = POLY1305_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
#ifdef CONFIG_KERNEL_MODE_NEON
|
||||
}, {
|
||||
.init = arm_poly1305_init,
|
||||
.update = arm_poly1305_update_neon,
|
||||
.final = arm_poly1305_final,
|
||||
.digestsize = POLY1305_DIGEST_SIZE,
|
||||
.descsize = sizeof(struct poly1305_desc_ctx),
|
||||
|
||||
.base.cra_name = "poly1305",
|
||||
.base.cra_driver_name = "poly1305-neon",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_blocksize = POLY1305_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
#endif
|
||||
}};
|
||||
|
||||
static int __init arm_poly1305_mod_init(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
|
||||
(elf_hwcap & HWCAP_NEON))
|
||||
static_branch_enable(&have_neon);
|
||||
else
|
||||
/* register only the first entry */
|
||||
return crypto_register_shash(&arm_poly1305_algs[0]);
|
||||
|
||||
return crypto_register_shashes(arm_poly1305_algs,
|
||||
ARRAY_SIZE(arm_poly1305_algs));
|
||||
}
|
||||
|
||||
static void __exit arm_poly1305_mod_exit(void)
|
||||
{
|
||||
if (!static_branch_likely(&have_neon)) {
|
||||
crypto_unregister_shash(&arm_poly1305_algs[0]);
|
||||
return;
|
||||
}
|
||||
crypto_unregister_shashes(arm_poly1305_algs,
|
||||
ARRAY_SIZE(arm_poly1305_algs));
|
||||
}
|
||||
|
||||
module_init(arm_poly1305_mod_init);
|
||||
module_exit(arm_poly1305_mod_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("poly1305");
|
||||
MODULE_ALIAS_CRYPTO("poly1305-arm");
|
||||
MODULE_ALIAS_CRYPTO("poly1305-neon");
|
|
@ -40,7 +40,7 @@ config CRYPTO_LIB_DES
|
|||
config CRYPTO_LIB_POLY1305_RSIZE
|
||||
int
|
||||
default 4 if X86_64
|
||||
default 9 if ARM64
|
||||
default 9 if ARM || ARM64
|
||||
default 1
|
||||
|
||||
config CRYPTO_ARCH_HAVE_LIB_POLY1305
|
||||
|
|
Loading…
Reference in New Issue