From 109568e110ed67d4be1b28609b9fa00fca97f8eb Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 9 Jan 2009 16:49:30 +1100 Subject: crypto: aes - Move key_length in struct crypto_aes_ctx to be the last field The Intel AES-NI AES acceleration instructions need key_enc, key_dec in struct crypto_aes_ctx to be 16 byte aligned, it make this easier to move key_length to be the last one. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-i586-asm_32.S | 6 +++--- arch/x86/crypto/aes-x86_64-asm_64.S | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index e41b147f450..5252c388017 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -46,9 +46,9 @@ #define in_blk 16 /* offsets in crypto_tfm structure */ -#define klen (crypto_tfm_ctx_offset + 0) -#define ekey (crypto_tfm_ctx_offset + 4) -#define dkey (crypto_tfm_ctx_offset + 244) +#define klen (crypto_tfm_ctx_offset + 480) +#define ekey (crypto_tfm_ctx_offset + 0) +#define dkey (crypto_tfm_ctx_offset + 240) // register mapping for encrypt and decrypt subroutines diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index a120f526c3d..7f28f62737d 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -56,13 +56,13 @@ .align 8; \ FUNC: movq r1,r2; \ movq r3,r4; \ - leaq BASE+KEY+48+4(r8),r9; \ + leaq BASE+KEY+48(r8),r9; \ movq r10,r11; \ movl (r7),r5 ## E; \ movl 4(r7),r1 ## E; \ movl 8(r7),r6 ## E; \ movl 12(r7),r7 ## E; \ - movl BASE+0(r8),r10 ## E; \ + movl BASE+480(r8),r10 ## E; \ xorl -48(r9),r5 ## E; \ xorl -44(r9),r1 ## E; \ xorl -40(r9),r6 ## E; \ -- cgit v1.2.3 From 07bf44f86989f5ed866510374fe761d1903681fb Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Fri, 9 Jan 2009 17:25:50 +1100 Subject: crypto: aes - Export x86 AES encrypt/decrypt functions Intel AES-NI AES acceleration instructions touch XMM state, to use that in soft_irq context, general x86 AES implementation is used as fallback. The first parameter is changed from struct crypto_tfm * to struct crypto_aes_ctx * to make it easier to deal with 16 bytes alignment requirement of AES-NI implementation. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-i586-asm_32.S | 18 +++++++++--------- arch/x86/crypto/aes-x86_64-asm_64.S | 6 ++---- arch/x86/crypto/aes_glue.c | 20 ++++++++++++++++---- arch/x86/include/asm/aes.h | 11 +++++++++++ 4 files changed, 38 insertions(+), 17 deletions(-) create mode 100644 arch/x86/include/asm/aes.h (limited to 'arch') diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index 5252c388017..b949ec2f9af 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -41,14 +41,14 @@ #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) /* offsets to parameters with one register pushed onto stack */ -#define tfm 8 +#define ctx 8 #define out_blk 12 #define in_blk 16 -/* offsets in crypto_tfm structure */ -#define klen (crypto_tfm_ctx_offset + 480) -#define ekey (crypto_tfm_ctx_offset + 0) -#define dkey (crypto_tfm_ctx_offset + 240) +/* offsets in crypto_aes_ctx structure */ +#define klen (480) +#define ekey (0) +#define dkey (240) // register mapping for encrypt and decrypt subroutines @@ -217,7 +217,7 @@ do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ // AES (Rijndael) Encryption Subroutine -/* void aes_enc_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ +/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ .global aes_enc_blk @@ -228,7 +228,7 @@ aes_enc_blk: push %ebp - mov tfm(%esp),%ebp + mov ctx(%esp),%ebp // CAUTION: the order and the values used in these assigns // rely on the register mappings @@ -292,7 +292,7 @@ aes_enc_blk: ret // AES (Rijndael) Decryption Subroutine -/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out_blk, const u8 *in_blk) */ +/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ .global aes_dec_blk @@ -303,7 +303,7 @@ aes_enc_blk: aes_dec_blk: push %ebp - mov tfm(%esp),%ebp + mov ctx(%esp),%ebp // CAUTION: the order and the values used in these assigns // rely on the register mappings diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 7f28f62737d..5b577d5a059 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -17,8 +17,6 @@ #include -#define BASE crypto_tfm_ctx_offset - #define R1 %rax #define R1E %eax #define R1X %ax @@ -56,13 +54,13 @@ .align 8; \ FUNC: movq r1,r2; \ movq r3,r4; \ - leaq BASE+KEY+48(r8),r9; \ + leaq KEY+48(r8),r9; \ movq r10,r11; \ movl (r7),r5 ## E; \ movl 4(r7),r1 ## E; \ movl 8(r7),r6 ## E; \ movl 12(r7),r7 ## E; \ - movl BASE+480(r8),r10 ## E; \ + movl 480(r8),r10 ## E; \ xorl -48(r9),r5 ## E; \ xorl -44(r9),r1 ## E; \ xorl -40(r9),r6 ## E; \ diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c index 71f45782711..49ae9fe32b2 100644 --- a/arch/x86/crypto/aes_glue.c +++ b/arch/x86/crypto/aes_glue.c @@ -5,17 +5,29 @@ #include -asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); -asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); +asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); +asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); + +void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) +{ + aes_enc_blk(ctx, dst, src); +} +EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86); + +void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) +{ + aes_dec_blk(ctx, dst, src); +} +EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86); static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - aes_enc_blk(tfm, dst, src); + aes_enc_blk(crypto_tfm_ctx(tfm), dst, src); } static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { - aes_dec_blk(tfm, dst, src); + aes_dec_blk(crypto_tfm_ctx(tfm), dst, src); } static struct crypto_alg aes_alg = { diff --git a/arch/x86/include/asm/aes.h b/arch/x86/include/asm/aes.h new file mode 100644 index 00000000000..80545a1cbe3 --- /dev/null +++ b/arch/x86/include/asm/aes.h @@ -0,0 +1,11 @@ +#ifndef ASM_X86_AES_H +#define ASM_X86_AES_H + +#include +#include + +void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, + const u8 *src); +#endif -- cgit v1.2.3 From 54b6a1bd5364aca95cd6ffae00f2b64c6511122c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 18 Jan 2009 16:28:34 +1100 Subject: crypto: aes-ni - Add support to Intel AES-NI instructions for x86_64 platform Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD) instructions that are going to be introduced in the next generation of Intel processor, as of 2009. These instructions enable fast and secure data encryption and decryption, using the Advanced Encryption Standard (AES), defined by FIPS Publication number 197. The architecture introduces six instructions that offer full hardware support for AES. Four of them support high performance data encryption and decryption, and the other two instructions support the AES key expansion procedure. The white paper can be downloaded from: http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf AES may be used in soft_irq context, but MMX/SSE context can not be touched safely in soft_irq context. So in_interrupt() is checked, if in IRQ or soft_irq context, the general x86_64 implementation are used instead. Signed-off-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/aesni-intel_asm.S | 896 +++++++++++++++++++++++++++++++++++++ arch/x86/crypto/aesni-intel_glue.c | 461 +++++++++++++++++++ arch/x86/include/asm/cpufeature.h | 1 + 4 files changed, 1361 insertions(+) create mode 100644 arch/x86/crypto/aesni-intel_asm.S create mode 100644 arch/x86/crypto/aesni-intel_glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 903de4aa509..ebe7deedd5b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o +obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o @@ -19,3 +20,5 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o + +aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S new file mode 100644 index 00000000000..caba9960170 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -0,0 +1,896 @@ +/* + * Implement AES algorithm in Intel AES-NI instructions. + * + * The white paper of AES-NI instructions can be downloaded from: + * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * Vinodh Gopal + * Kahraman Akdemir + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include + +.text + +#define STATE1 %xmm0 +#define STATE2 %xmm4 +#define STATE3 %xmm5 +#define STATE4 %xmm6 +#define STATE STATE1 +#define IN1 %xmm1 +#define IN2 %xmm7 +#define IN3 %xmm8 +#define IN4 %xmm9 +#define IN IN1 +#define KEY %xmm2 +#define IV %xmm3 + +#define KEYP %rdi +#define OUTP %rsi +#define INP %rdx +#define LEN %rcx +#define IVP %r8 +#define KLEN %r9d +#define T1 %r10 +#define TKEYP T1 +#define T2 %r11 + +_key_expansion_128: +_key_expansion_256a: + pshufd $0b11111111, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + movaps %xmm0, (%rcx) + add $0x10, %rcx + ret + +_key_expansion_192a: + pshufd $0b01010101, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + + movaps %xmm2, %xmm5 + movaps %xmm2, %xmm6 + pslldq $4, %xmm5 + pshufd $0b11111111, %xmm0, %xmm3 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + + movaps %xmm0, %xmm1 + shufps $0b01000100, %xmm0, %xmm6 + movaps %xmm6, (%rcx) + shufps $0b01001110, %xmm2, %xmm1 + movaps %xmm1, 16(%rcx) + add $0x20, %rcx + ret + +_key_expansion_192b: + pshufd $0b01010101, %xmm1, %xmm1 + shufps $0b00010000, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + shufps $0b10001100, %xmm0, %xmm4 + pxor %xmm4, %xmm0 + pxor %xmm1, %xmm0 + + movaps %xmm2, %xmm5 + pslldq $4, %xmm5 + pshufd $0b11111111, %xmm0, %xmm3 + pxor %xmm3, %xmm2 + pxor %xmm5, %xmm2 + + movaps %xmm0, (%rcx) + add $0x10, %rcx + ret + +_key_expansion_256b: + pshufd $0b10101010, %xmm1, %xmm1 + shufps $0b00010000, %xmm2, %xmm4 + pxor %xmm4, %xmm2 + shufps $0b10001100, %xmm2, %xmm4 + pxor %xmm4, %xmm2 + pxor %xmm1, %xmm2 + movaps %xmm2, (%rcx) + add $0x10, %rcx + ret + +/* + * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + * unsigned int key_len) + */ +ENTRY(aesni_set_key) + movups (%rsi), %xmm0 # user key (first 16 bytes) + movaps %xmm0, (%rdi) + lea 0x10(%rdi), %rcx # key addr + movl %edx, 480(%rdi) + pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x + cmp $24, %dl + jb .Lenc_key128 + je .Lenc_key192 + movups 0x10(%rsi), %xmm2 # other user key + movaps %xmm2, (%rcx) + add $0x10, %rcx + # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + call _key_expansion_256a + # aeskeygenassist $0x1, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + call _key_expansion_256b + # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + call _key_expansion_256a + # aeskeygenassist $0x2, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + call _key_expansion_256b + # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + call _key_expansion_256a + # aeskeygenassist $0x4, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + call _key_expansion_256b + # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + call _key_expansion_256a + # aeskeygenassist $0x8, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + call _key_expansion_256b + # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + call _key_expansion_256a + # aeskeygenassist $0x10, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + call _key_expansion_256b + # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + call _key_expansion_256a + # aeskeygenassist $0x20, %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + call _key_expansion_256b + # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + call _key_expansion_256a + jmp .Ldec_key +.Lenc_key192: + movq 0x10(%rsi), %xmm2 # other user key + # aeskeygenassist $0x1, %xmm2, %xmm1 # round 1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01 + call _key_expansion_192a + # aeskeygenassist $0x2, %xmm2, %xmm1 # round 2 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02 + call _key_expansion_192b + # aeskeygenassist $0x4, %xmm2, %xmm1 # round 3 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04 + call _key_expansion_192a + # aeskeygenassist $0x8, %xmm2, %xmm1 # round 4 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08 + call _key_expansion_192b + # aeskeygenassist $0x10, %xmm2, %xmm1 # round 5 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10 + call _key_expansion_192a + # aeskeygenassist $0x20, %xmm2, %xmm1 # round 6 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20 + call _key_expansion_192b + # aeskeygenassist $0x40, %xmm2, %xmm1 # round 7 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40 + call _key_expansion_192a + # aeskeygenassist $0x80, %xmm2, %xmm1 # round 8 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80 + call _key_expansion_192b + jmp .Ldec_key +.Lenc_key128: + # aeskeygenassist $0x1, %xmm0, %xmm1 # round 1 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01 + call _key_expansion_128 + # aeskeygenassist $0x2, %xmm0, %xmm1 # round 2 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02 + call _key_expansion_128 + # aeskeygenassist $0x4, %xmm0, %xmm1 # round 3 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04 + call _key_expansion_128 + # aeskeygenassist $0x8, %xmm0, %xmm1 # round 4 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08 + call _key_expansion_128 + # aeskeygenassist $0x10, %xmm0, %xmm1 # round 5 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10 + call _key_expansion_128 + # aeskeygenassist $0x20, %xmm0, %xmm1 # round 6 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20 + call _key_expansion_128 + # aeskeygenassist $0x40, %xmm0, %xmm1 # round 7 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40 + call _key_expansion_128 + # aeskeygenassist $0x80, %xmm0, %xmm1 # round 8 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80 + call _key_expansion_128 + # aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b + call _key_expansion_128 + # aeskeygenassist $0x36, %xmm0, %xmm1 # round 10 + .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36 + call _key_expansion_128 +.Ldec_key: + sub $0x10, %rcx + movaps (%rdi), %xmm0 + movaps (%rcx), %xmm1 + movaps %xmm0, 240(%rcx) + movaps %xmm1, 240(%rdi) + add $0x10, %rdi + lea 240-16(%rcx), %rsi +.align 4 +.Ldec_key_loop: + movaps (%rdi), %xmm0 + # aesimc %xmm0, %xmm1 + .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8 + movaps %xmm1, (%rsi) + add $0x10, %rdi + sub $0x10, %rsi + cmp %rcx, %rdi + jb .Ldec_key_loop + xor %rax, %rax + ret + +/* + * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + */ +ENTRY(aesni_enc) + movl 480(KEYP), KLEN # key length + movups (INP), STATE # input + call _aesni_enc1 + movups STATE, (OUTP) # output + ret + +/* + * _aesni_enc1: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: round count + * STATE: initial state (input) + * output: + * STATE: finial state (output) + * changed: + * KEY + * TKEYP (T1) + */ +_aesni_enc1: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE # round 0 + add $0x30, TKEYP + cmp $24, KLEN + jb .Lenc128 + lea 0x20(TKEYP), TKEYP + je .Lenc192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps -0x50(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 +.align 4 +.Lenc192: + movaps -0x40(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps -0x30(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 +.align 4 +.Lenc128: + movaps -0x20(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps -0x10(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps (TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x10(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x20(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x30(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x40(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x50(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x60(TKEYP), KEY + # aesenc KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + movaps 0x70(TKEYP), KEY + # aesenclast KEY, STATE # last round + .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 + ret + +/* + * _aesni_enc4: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: round count + * STATE1: initial state (input) + * STATE2 + * STATE3 + * STATE4 + * output: + * STATE1: finial state (output) + * STATE2 + * STATE3 + * STATE4 + * changed: + * KEY + * TKEYP (T1) + */ +_aesni_enc4: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE1 # round 0 + pxor KEY, STATE2 + pxor KEY, STATE3 + pxor KEY, STATE4 + add $0x30, TKEYP + cmp $24, KLEN + jb .L4enc128 + lea 0x20(TKEYP), TKEYP + je .L4enc192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps -0x50(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 +#.align 4 +.L4enc192: + movaps -0x40(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps -0x30(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 +#.align 4 +.L4enc128: + movaps -0x20(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps -0x10(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps (TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x10(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x20(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x30(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x40(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x50(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x60(TKEYP), KEY + # aesenc KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xc2 + # aesenc KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xe2 + # aesenc KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xea + # aesenc KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdc, 0xf2 + movaps 0x70(TKEYP), KEY + # aesenclast KEY, STATE1 # last round + .byte 0x66, 0x0f, 0x38, 0xdd, 0xc2 + # aesenclast KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdd, 0xe2 + # aesenclast KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdd, 0xea + # aesenclast KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdd, 0xf2 + ret + +/* + * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) + */ +ENTRY(aesni_dec) + mov 480(KEYP), KLEN # key length + add $240, KEYP + movups (INP), STATE # input + call _aesni_dec1 + movups STATE, (OUTP) #output + ret + +/* + * _aesni_dec1: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: key length + * STATE: initial state (input) + * output: + * STATE: finial state (output) + * changed: + * KEY + * TKEYP (T1) + */ +_aesni_dec1: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE # round 0 + add $0x30, TKEYP + cmp $24, KLEN + jb .Ldec128 + lea 0x20(TKEYP), TKEYP + je .Ldec192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps -0x50(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 +.align 4 +.Ldec192: + movaps -0x40(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps -0x30(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 +.align 4 +.Ldec128: + movaps -0x20(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps -0x10(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps (TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x10(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x20(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x30(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x40(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x50(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x60(TKEYP), KEY + # aesdec KEY, STATE + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + movaps 0x70(TKEYP), KEY + # aesdeclast KEY, STATE # last round + .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 + ret + +/* + * _aesni_dec4: internal ABI + * input: + * KEYP: key struct pointer + * KLEN: key length + * STATE1: initial state (input) + * STATE2 + * STATE3 + * STATE4 + * output: + * STATE1: finial state (output) + * STATE2 + * STATE3 + * STATE4 + * changed: + * KEY + * TKEYP (T1) + */ +_aesni_dec4: + movaps (KEYP), KEY # key + mov KEYP, TKEYP + pxor KEY, STATE1 # round 0 + pxor KEY, STATE2 + pxor KEY, STATE3 + pxor KEY, STATE4 + add $0x30, TKEYP + cmp $24, KLEN + jb .L4dec128 + lea 0x20(TKEYP), TKEYP + je .L4dec192 + add $0x20, TKEYP + movaps -0x60(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps -0x50(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 +.align 4 +.L4dec192: + movaps -0x40(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps -0x30(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 +.align 4 +.L4dec128: + movaps -0x20(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps -0x10(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps (TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x10(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x20(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x30(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x40(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x50(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x60(TKEYP), KEY + # aesdec KEY, STATE1 + .byte 0x66, 0x0f, 0x38, 0xde, 0xc2 + # aesdec KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xde, 0xe2 + # aesdec KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xde, 0xea + # aesdec KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xde, 0xf2 + movaps 0x70(TKEYP), KEY + # aesdeclast KEY, STATE1 # last round + .byte 0x66, 0x0f, 0x38, 0xdf, 0xc2 + # aesdeclast KEY, STATE2 + .byte 0x66, 0x0f, 0x38, 0xdf, 0xe2 + # aesdeclast KEY, STATE3 + .byte 0x66, 0x0f, 0x38, 0xdf, 0xea + # aesdeclast KEY, STATE4 + .byte 0x66, 0x0f, 0x38, 0xdf, 0xf2 + ret + +/* + * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len) + */ +ENTRY(aesni_ecb_enc) + test LEN, LEN # check length + jz .Lecb_enc_ret + mov 480(KEYP), KLEN + cmp $16, LEN + jb .Lecb_enc_ret + cmp $64, LEN + jb .Lecb_enc_loop1 +.align 4 +.Lecb_enc_loop4: + movups (INP), STATE1 + movups 0x10(INP), STATE2 + movups 0x20(INP), STATE3 + movups 0x30(INP), STATE4 + call _aesni_enc4 + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lecb_enc_loop4 + cmp $16, LEN + jb .Lecb_enc_ret +.align 4 +.Lecb_enc_loop1: + movups (INP), STATE1 + call _aesni_enc1 + movups STATE1, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lecb_enc_loop1 +.Lecb_enc_ret: + ret + +/* + * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len); + */ +ENTRY(aesni_ecb_dec) + test LEN, LEN + jz .Lecb_dec_ret + mov 480(KEYP), KLEN + add $240, KEYP + cmp $16, LEN + jb .Lecb_dec_ret + cmp $64, LEN + jb .Lecb_dec_loop1 +.align 4 +.Lecb_dec_loop4: + movups (INP), STATE1 + movups 0x10(INP), STATE2 + movups 0x20(INP), STATE3 + movups 0x30(INP), STATE4 + call _aesni_dec4 + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lecb_dec_loop4 + cmp $16, LEN + jb .Lecb_dec_ret +.align 4 +.Lecb_dec_loop1: + movups (INP), STATE1 + call _aesni_dec1 + movups STATE1, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lecb_dec_loop1 +.Lecb_dec_ret: + ret + +/* + * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_cbc_enc) + cmp $16, LEN + jb .Lcbc_enc_ret + mov 480(KEYP), KLEN + movups (IVP), STATE # load iv as initial state +.align 4 +.Lcbc_enc_loop: + movups (INP), IN # load input + pxor IN, STATE + call _aesni_enc1 + movups STATE, (OUTP) # store output + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lcbc_enc_loop + movups STATE, (IVP) +.Lcbc_enc_ret: + ret + +/* + * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_cbc_dec) + cmp $16, LEN + jb .Lcbc_dec_ret + mov 480(KEYP), KLEN + add $240, KEYP + movups (IVP), IV + cmp $64, LEN + jb .Lcbc_dec_loop1 +.align 4 +.Lcbc_dec_loop4: + movups (INP), IN1 + movaps IN1, STATE1 + movups 0x10(INP), IN2 + movaps IN2, STATE2 + movups 0x20(INP), IN3 + movaps IN3, STATE3 + movups 0x30(INP), IN4 + movaps IN4, STATE4 + call _aesni_dec4 + pxor IV, STATE1 + pxor IN1, STATE2 + pxor IN2, STATE3 + pxor IN3, STATE4 + movaps IN4, IV + movups STATE1, (OUTP) + movups STATE2, 0x10(OUTP) + movups STATE3, 0x20(OUTP) + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lcbc_dec_loop4 + cmp $16, LEN + jb .Lcbc_dec_ret +.align 4 +.Lcbc_dec_loop1: + movups (INP), IN + movaps IN, STATE + call _aesni_dec1 + pxor IV, STATE + movups STATE, (OUTP) + movaps IN, IV + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lcbc_dec_loop1 + movups IV, (IVP) +.Lcbc_dec_ret: + ret diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c new file mode 100644 index 00000000000..02af0af6549 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -0,0 +1,461 @@ +/* + * Support for Intel AES-NI instructions. This file contains glue + * code, the real AES implementation is in intel-aes_asm.S. + * + * Copyright (C) 2008, Intel Corp. + * Author: Huang Ying + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct async_aes_ctx { + struct cryptd_ablkcipher *cryptd_tfm; +}; + +#define AESNI_ALIGN 16 +#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) + +asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, + unsigned int key_len); +asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in); +asmlinkage void aesni_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in); +asmlinkage void aesni_ecb_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len); +asmlinkage void aesni_ecb_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len); +asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); +asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +static inline int kernel_fpu_using(void) +{ + if (in_interrupt() && !(read_cr0() & X86_CR0_TS)) + return 1; + return 0; +} + +static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) +{ + unsigned long addr = (unsigned long)raw_ctx; + unsigned long align = AESNI_ALIGN; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return (struct crypto_aes_ctx *)ALIGN(addr, align); +} + +static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, + const u8 *in_key, unsigned int key_len) +{ + struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx); + u32 *flags = &tfm->crt_flags; + int err; + + if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && + key_len != AES_KEYSIZE_256) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } + + if (kernel_fpu_using()) + err = crypto_aes_expand_key(ctx, in_key, key_len); + else { + kernel_fpu_begin(); + err = aesni_set_key(ctx, in_key, key_len); + kernel_fpu_end(); + } + + return err; +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len); +} + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + if (kernel_fpu_using()) + crypto_aes_encrypt_x86(ctx, dst, src); + else { + kernel_fpu_begin(); + aesni_enc(ctx, dst, src); + kernel_fpu_end(); + } +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); + + if (kernel_fpu_using()) + crypto_aes_decrypt_x86(ctx, dst, src); + else { + kernel_fpu_begin(); + aesni_dec(ctx, dst, src); + kernel_fpu_end(); + } +} + +static struct crypto_alg aesni_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-aesni", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aesni_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int ecb_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static int ecb_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static struct crypto_alg blk_ecb_alg = { + .cra_name = "__ecb-aes-aesni", + .cra_driver_name = "__driver-ecb-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_ecb_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = ecb_encrypt, + .decrypt = ecb_decrypt, + }, + }, +}; + +static int cbc_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static int cbc_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct crypto_aes_ctx *ctx = aes_ctx(crypto_blkcipher_ctx(desc->tfm)); + struct blkcipher_walk walk; + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt(desc, &walk); + + kernel_fpu_begin(); + while ((nbytes = walk.nbytes)) { + aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr, + nbytes & AES_BLOCK_MASK, walk.iv); + nbytes &= AES_BLOCK_SIZE - 1; + err = blkcipher_walk_done(desc, &walk, nbytes); + } + kernel_fpu_end(); + + return err; +} + +static struct crypto_alg blk_cbc_alg = { + .cra_name = "__cbc-aes-aesni", + .cra_driver_name = "__driver-cbc-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, + .cra_alignmask = 0, + .cra_type = &crypto_blkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(blk_cbc_alg.cra_list), + .cra_u = { + .blkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = aes_set_key, + .encrypt = cbc_encrypt, + .decrypt = cbc_decrypt, + }, + }, +}; + +static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len); +} + +static int ablk_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (kernel_fpu_using()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_ablkcipher_encrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + return crypto_blkcipher_crt(desc.tfm)->encrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static int ablk_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (kernel_fpu_using()) { + struct ablkcipher_request *cryptd_req = + ablkcipher_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_ablkcipher_decrypt(cryptd_req); + } else { + struct blkcipher_desc desc; + desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm); + desc.info = req->info; + desc.flags = 0; + return crypto_blkcipher_crt(desc.tfm)->decrypt( + &desc, req->dst, req->src, req->nbytes); + } +} + +static void ablk_exit(struct crypto_tfm *tfm) +{ + struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + cryptd_free_ablkcipher(ctx->cryptd_tfm); +} + +static void ablk_init_common(struct crypto_tfm *tfm, + struct cryptd_ablkcipher *cryptd_tfm) +{ + struct async_aes_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) + + crypto_ablkcipher_reqsize(&cryptd_tfm->base); +} + +static int ablk_ecb_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_ecb_alg = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_ecb_alg.cra_list), + .cra_init = ablk_ecb_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; + +static int ablk_cbc_init(struct crypto_tfm *tfm) +{ + struct cryptd_ablkcipher *cryptd_tfm; + + cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ablk_init_common(tfm, cryptd_tfm); + return 0; +} + +static struct crypto_alg ablk_cbc_alg = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct async_aes_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(ablk_cbc_alg.cra_list), + .cra_init = ablk_cbc_init, + .cra_exit = ablk_exit, + .cra_u = { + .ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = ablk_set_key, + .encrypt = ablk_encrypt, + .decrypt = ablk_decrypt, + }, + }, +}; + +static int __init aesni_init(void) +{ + int err; + + if (!cpu_has_aes) { + printk(KERN_ERR "Intel AES-NI instructions are not detected.\n"); + return -ENODEV; + } + if ((err = crypto_register_alg(&aesni_alg))) + goto aes_err; + if ((err = crypto_register_alg(&blk_ecb_alg))) + goto blk_ecb_err; + if ((err = crypto_register_alg(&blk_cbc_alg))) + goto blk_cbc_err; + if ((err = crypto_register_alg(&ablk_ecb_alg))) + goto ablk_ecb_err; + if ((err = crypto_register_alg(&ablk_cbc_alg))) + goto ablk_cbc_err; + + return err; + +ablk_cbc_err: + crypto_unregister_alg(&ablk_ecb_alg); +ablk_ecb_err: + crypto_unregister_alg(&blk_cbc_alg); +blk_cbc_err: + crypto_unregister_alg(&blk_ecb_alg); +blk_ecb_err: + crypto_unregister_alg(&aesni_alg); +aes_err: + return err; +} + +static void __exit aesni_exit(void) +{ + crypto_unregister_alg(&ablk_cbc_alg); + crypto_unregister_alg(&ablk_ecb_alg); + crypto_unregister_alg(&blk_cbc_alg); + crypto_unregister_alg(&blk_ecb_alg); + crypto_unregister_alg(&aesni_alg); +} + +module_init(aesni_init); +module_exit(aesni_exit); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("aes"); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 7301e60dc4a..0beba0d1468 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -213,6 +213,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm boot_cpu_has(X86_FEATURE_XMM) #define cpu_has_xmm2 boot_cpu_has(X86_FEATURE_XMM2) #define cpu_has_xmm3 boot_cpu_has(X86_FEATURE_XMM3) +#define cpu_has_aes boot_cpu_has(X86_FEATURE_AES) #define cpu_has_ht boot_cpu_has(X86_FEATURE_HT) #define cpu_has_mp boot_cpu_has(X86_FEATURE_MP) #define cpu_has_nx boot_cpu_has(X86_FEATURE_NX) -- cgit v1.2.3 From 563f346d04e8373739240604a51ce8529dd9f07e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 18 Jan 2009 20:33:33 +1100 Subject: crypto: sha-s390 - Switch to shash This patch converts the S390 sha algorithms to the new shash interface. With fixes by Jan Glauber. Signed-off-by: Herbert Xu --- arch/s390/crypto/sha.h | 6 ++-- arch/s390/crypto/sha1_s390.c | 40 +++++++++++---------- arch/s390/crypto/sha256_s390.c | 40 +++++++++++---------- arch/s390/crypto/sha512_s390.c | 80 ++++++++++++++++++++++-------------------- arch/s390/crypto/sha_common.c | 20 ++++++----- 5 files changed, 100 insertions(+), 86 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 1ceafa571ea..f4e9dc71675 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -29,7 +29,9 @@ struct s390_sha_ctx { int func; /* KIMD function to use */ }; -void s390_sha_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len); -void s390_sha_final(struct crypto_tfm *tfm, u8 *out); +struct shash_desc; + +int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len); +int s390_sha_final(struct shash_desc *desc, u8 *out); #endif diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index b3cb5a89b00..e85ba348722 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -23,17 +23,17 @@ * any later version. * */ +#include #include #include -#include #include #include "crypt_s390.h" #include "sha.h" -static void sha1_init(struct crypto_tfm *tfm) +static int sha1_init(struct shash_desc *desc) { - struct s390_sha_ctx *sctx = crypto_tfm_ctx(tfm); + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); sctx->state[0] = SHA1_H0; sctx->state[1] = SHA1_H1; @@ -42,34 +42,36 @@ static void sha1_init(struct crypto_tfm *tfm) sctx->state[4] = SHA1_H4; sctx->count = 0; sctx->func = KIMD_SHA_1; + + return 0; } -static struct crypto_alg alg = { - .cra_name = "sha1", - .cra_driver_name= "sha1-s390", - .cra_priority = CRYPT_S390_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_sha_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { .digest = { - .dia_digestsize = SHA1_DIGEST_SIZE, - .dia_init = sha1_init, - .dia_update = s390_sha_update, - .dia_final = s390_sha_final } } +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_init, + .update = s390_sha_update, + .final = s390_sha_final, + .descsize = sizeof(struct s390_sha_ctx), + .base = { + .cra_name = "sha1", + .cra_driver_name= "sha1-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } }; static int __init sha1_s390_init(void) { if (!crypt_s390_func_available(KIMD_SHA_1)) return -EOPNOTSUPP; - return crypto_register_alg(&alg); + return crypto_register_shash(&alg); } static void __exit sha1_s390_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_shash(&alg); } module_init(sha1_s390_init); diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 19c03fb6ba7..f9fefc56963 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -16,17 +16,17 @@ * any later version. * */ +#include #include #include -#include #include #include "crypt_s390.h" #include "sha.h" -static void sha256_init(struct crypto_tfm *tfm) +static int sha256_init(struct shash_desc *desc) { - struct s390_sha_ctx *sctx = crypto_tfm_ctx(tfm); + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); sctx->state[0] = SHA256_H0; sctx->state[1] = SHA256_H1; @@ -38,22 +38,24 @@ static void sha256_init(struct crypto_tfm *tfm) sctx->state[7] = SHA256_H7; sctx->count = 0; sctx->func = KIMD_SHA_256; + + return 0; } -static struct crypto_alg alg = { - .cra_name = "sha256", - .cra_driver_name = "sha256-s390", - .cra_priority = CRYPT_S390_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_sha_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { .digest = { - .dia_digestsize = SHA256_DIGEST_SIZE, - .dia_init = sha256_init, - .dia_update = s390_sha_update, - .dia_final = s390_sha_final } } +static struct shash_alg alg = { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_init, + .update = s390_sha_update, + .final = s390_sha_final, + .descsize = sizeof(struct s390_sha_ctx), + .base = { + .cra_name = "sha256", + .cra_driver_name= "sha256-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } }; static int sha256_s390_init(void) @@ -61,12 +63,12 @@ static int sha256_s390_init(void) if (!crypt_s390_func_available(KIMD_SHA_256)) return -EOPNOTSUPP; - return crypto_register_alg(&alg); + return crypto_register_shash(&alg); } static void __exit sha256_s390_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_shash(&alg); } module_init(sha256_s390_init); diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 23c7861f6ae..420acf41b72 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -12,16 +12,16 @@ * any later version. * */ +#include #include #include -#include #include "sha.h" #include "crypt_s390.h" -static void sha512_init(struct crypto_tfm *tfm) +static int sha512_init(struct shash_desc *desc) { - struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); *(__u64 *)&ctx->state[0] = 0x6a09e667f3bcc908ULL; *(__u64 *)&ctx->state[2] = 0xbb67ae8584caa73bULL; @@ -33,29 +33,31 @@ static void sha512_init(struct crypto_tfm *tfm) *(__u64 *)&ctx->state[14] = 0x5be0cd19137e2179ULL; ctx->count = 0; ctx->func = KIMD_SHA_512; + + return 0; } -static struct crypto_alg sha512_alg = { - .cra_name = "sha512", - .cra_driver_name = "sha512-s390", - .cra_priority = CRYPT_S390_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_sha_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(sha512_alg.cra_list), - .cra_u = { .digest = { - .dia_digestsize = SHA512_DIGEST_SIZE, - .dia_init = sha512_init, - .dia_update = s390_sha_update, - .dia_final = s390_sha_final } } +static struct shash_alg sha512_alg = { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_init, + .update = s390_sha_update, + .final = s390_sha_final, + .descsize = sizeof(struct s390_sha_ctx), + .base = { + .cra_name = "sha512", + .cra_driver_name= "sha512-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } }; MODULE_ALIAS("sha512"); -static void sha384_init(struct crypto_tfm *tfm) +static int sha384_init(struct shash_desc *desc) { - struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm); + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); *(__u64 *)&ctx->state[0] = 0xcbbb9d5dc1059ed8ULL; *(__u64 *)&ctx->state[2] = 0x629a292a367cd507ULL; @@ -67,22 +69,24 @@ static void sha384_init(struct crypto_tfm *tfm) *(__u64 *)&ctx->state[14] = 0x47b5481dbefa4fa4ULL; ctx->count = 0; ctx->func = KIMD_SHA_512; + + return 0; } -static struct crypto_alg sha384_alg = { - .cra_name = "sha384", - .cra_driver_name = "sha384-s390", - .cra_priority = CRYPT_S390_PRIORITY, - .cra_flags = CRYPTO_ALG_TYPE_DIGEST, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct s390_sha_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(sha384_alg.cra_list), - .cra_u = { .digest = { - .dia_digestsize = SHA384_DIGEST_SIZE, - .dia_init = sha384_init, - .dia_update = s390_sha_update, - .dia_final = s390_sha_final } } +static struct shash_alg sha384_alg = { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_init, + .update = s390_sha_update, + .final = s390_sha_final, + .descsize = sizeof(struct s390_sha_ctx), + .base = { + .cra_name = "sha384", + .cra_driver_name= "sha384-s390", + .cra_priority = CRYPT_S390_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_ctxsize = sizeof(struct s390_sha_ctx), + .cra_module = THIS_MODULE, + } }; MODULE_ALIAS("sha384"); @@ -93,18 +97,18 @@ static int __init init(void) if (!crypt_s390_func_available(KIMD_SHA_512)) return -EOPNOTSUPP; - if ((ret = crypto_register_alg(&sha512_alg)) < 0) + if ((ret = crypto_register_shash(&sha512_alg)) < 0) goto out; - if ((ret = crypto_register_alg(&sha384_alg)) < 0) - crypto_unregister_alg(&sha512_alg); + if ((ret = crypto_register_shash(&sha384_alg)) < 0) + crypto_unregister_shash(&sha512_alg); out: return ret; } static void __exit fini(void) { - crypto_unregister_alg(&sha512_alg); - crypto_unregister_alg(&sha384_alg); + crypto_unregister_shash(&sha512_alg); + crypto_unregister_shash(&sha384_alg); } module_init(init); diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index 9d6eb8c3d37..7903ec47e6b 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -13,14 +13,14 @@ * */ -#include +#include #include "sha.h" #include "crypt_s390.h" -void s390_sha_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len) +int s390_sha_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm); - unsigned int bsize = crypto_tfm_alg_blocksize(tfm); + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int bsize = crypto_shash_blocksize(desc->tfm); unsigned int index; int ret; @@ -51,13 +51,15 @@ void s390_sha_update(struct crypto_tfm *tfm, const u8 *data, unsigned int len) store: if (len) memcpy(ctx->buf + index , data, len); + + return 0; } EXPORT_SYMBOL_GPL(s390_sha_update); -void s390_sha_final(struct crypto_tfm *tfm, u8 *out) +int s390_sha_final(struct shash_desc *desc, u8 *out) { - struct s390_sha_ctx *ctx = crypto_tfm_ctx(tfm); - unsigned int bsize = crypto_tfm_alg_blocksize(tfm); + struct s390_sha_ctx *ctx = shash_desc_ctx(desc); + unsigned int bsize = crypto_shash_blocksize(desc->tfm); u64 bits; unsigned int index, end, plen; int ret; @@ -87,9 +89,11 @@ void s390_sha_final(struct crypto_tfm *tfm, u8 *out) BUG_ON(ret != end); /* copy digest to out */ - memcpy(out, ctx->state, crypto_hash_digestsize(crypto_hash_cast(tfm))); + memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm)); /* wipe context */ memset(ctx, 0, sizeof *ctx); + + return 0; } EXPORT_SYMBOL_GPL(s390_sha_final); -- cgit v1.2.3 From 049359d655277c382683a6030ae0bac485568ffc Mon Sep 17 00:00:00 2001 From: James Hsiao Date: Thu, 5 Feb 2009 16:18:13 +1100 Subject: crypto: amcc - Add crypt4xx driver This patch adds support for AMCC ppc4xx security device driver. This is the initial release that includes the driver framework with AES and SHA1 algorithms support. The remaining algorithms will be released in the near future. Signed-off-by: James Hsiao Signed-off-by: Herbert Xu --- arch/powerpc/boot/dts/canyonlands.dts | 7 +++++++ arch/powerpc/boot/dts/kilauea.dts | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts index 8b5ba8261a3..4447def69dc 100644 --- a/arch/powerpc/boot/dts/canyonlands.dts +++ b/arch/powerpc/boot/dts/canyonlands.dts @@ -127,6 +127,13 @@ dcr-reg = <0x010 0x002>; }; + CRYPTO: crypto@180000 { + compatible = "amcc,ppc460ex-crypto", "amcc,ppc4xx-crypto"; + reg = <4 0x00180000 0x80400>; + interrupt-parent = <&UIC0>; + interrupts = <0x1d 0x4>; + }; + MAL0: mcmal { compatible = "ibm,mcmal-460ex", "ibm,mcmal2"; dcr-reg = <0x180 0x062>; diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts index 2804444812e..5e6b08ff6f6 100644 --- a/arch/powerpc/boot/dts/kilauea.dts +++ b/arch/powerpc/boot/dts/kilauea.dts @@ -97,6 +97,13 @@ 0x6 0x4>; /* ECC SEC Error */ }; + CRYPTO: crypto@ef700000 { + compatible = "amcc,ppc405ex-crypto", "amcc,ppc4xx-crypto"; + reg = <0xef700000 0x80400>; + interrupt-parent = <&UIC0>; + interrupts = <0x17 0x2>; + }; + MAL0: mcmal { compatible = "ibm,mcmal-405ex", "ibm,mcmal2"; dcr-reg = <0x180 0x062>; -- cgit v1.2.3 From 949abe574739848b1e68271fbac86c3cb4506aad Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 21 Mar 2009 21:12:19 +0800 Subject: crypto: sha512-s390 - Add missing block size I missed the block size when converting sha512-s390 to shash. Tested-by: Heiko Carstens Signed-off-by: Herbert Xu --- arch/s390/crypto/sha512_s390.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 420acf41b72..83192bfc804 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -84,6 +84,7 @@ static struct shash_alg sha384_alg = { .cra_driver_name= "sha384-s390", .cra_priority = CRYPT_S390_PRIORITY, .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct s390_sha_ctx), .cra_module = THIS_MODULE, } -- cgit v1.2.3