Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Feb 2013 23:56:15 +0000 (15:56 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Feb 2013 23:56:15 +0000 (15:56 -0800)
Pull crypto update from Herbert Xu:
 "Here is the crypto update for 3.9:

   - Added accelerated implementation of crc32 using pclmulqdq.

   - Added test vector for fcrypt.

   - Added support for OMAP4/AM33XX cipher and hash.

   - Fixed loose crypto_user input checks.

   - Misc fixes"

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (43 commits)
  crypto: user - ensure user supplied strings are nul-terminated
  crypto: user - fix empty string test in report API
  crypto: user - fix info leaks in report API
  crypto: caam - Added property fsl,sec-era in SEC4.0 device tree binding.
  crypto: use ERR_CAST
  crypto: atmel-aes - adjust duplicate test
  crypto: crc32-pclmul - Kill warning on x86-32
  crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels
  crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC
  crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets
  crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_*
  crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions
  crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC
  crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions
  crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
  crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
  crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets
  crypto: aesni-intel - add ENDPROC statements for assembler functions
  crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets
  crypto: testmgr - add test vector for fcrypt
  ...

51 files changed:
Documentation/devicetree/bindings/crypto/fsl-sec4.txt
arch/x86/crypto/Makefile
arch/x86/crypto/aes-i586-asm_32.S
arch/x86/crypto/aes-x86_64-asm_64.S
arch/x86/crypto/aesni-intel_asm.S
arch/x86/crypto/blowfish-x86_64-asm_64.S
arch/x86/crypto/camellia-aesni-avx-asm_64.S
arch/x86/crypto/camellia-x86_64-asm_64.S
arch/x86/crypto/cast5-avx-x86_64-asm_64.S
arch/x86/crypto/cast6-avx-x86_64-asm_64.S
arch/x86/crypto/crc32-pclmul_asm.S [new file with mode: 0644]
arch/x86/crypto/crc32-pclmul_glue.c [new file with mode: 0644]
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
arch/x86/crypto/ghash-clmulni-intel_asm.S
arch/x86/crypto/salsa20-i586-asm_32.S
arch/x86/crypto/salsa20-x86_64-asm_64.S
arch/x86/crypto/salsa20_glue.c
arch/x86/crypto/serpent-avx-x86_64-asm_64.S
arch/x86/crypto/serpent-sse2-i586-asm_32.S
arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
arch/x86/crypto/sha1_ssse3_asm.S
arch/x86/crypto/twofish-avx-x86_64-asm_64.S
arch/x86/crypto/twofish-i586-asm_32.S
arch/x86/crypto/twofish-x86_64-asm_64-3way.S
arch/x86/crypto/twofish-x86_64-asm_64.S
crypto/Kconfig
crypto/Makefile
crypto/ablkcipher.c
crypto/aead.c
crypto/ahash.c
crypto/algapi.c
crypto/authenc.c
crypto/authencesn.c
crypto/blkcipher.c
crypto/ccm.c
crypto/chainiv.c
crypto/crc32.c [new file with mode: 0644]
crypto/crypto_user.c
crypto/ctr.c
crypto/cts.c
crypto/gcm.c
crypto/pcompress.c
crypto/rng.c
crypto/seqiv.c
crypto/shash.c
crypto/testmgr.c
drivers/crypto/atmel-aes.c
drivers/crypto/bfin_crc.c
drivers/crypto/omap-aes.c
drivers/crypto/omap-sham.c
drivers/crypto/s5p-sss.c

index 6d21c02..e402277 100644 (file)
@@ -113,7 +113,7 @@ PROPERTIES
 EXAMPLE
        crypto@300000 {
                compatible = "fsl,sec-v4.0";
-               fsl,sec-era = <0x2>;
+               fsl,sec-era = <2>;
                #address-cells = <1>;
                #size-cells = <1>;
                reg = <0x300000 0x10000>;
index e0ca7c9..63947a8 100644 (file)
@@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
 
 obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
 obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
+obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 
 aes-i586-y := aes-i586-asm_32.o aes_glue.o
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
@@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
 crc32c-intel-y := crc32c-intel_glue.o
 crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
+crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
index b949ec2..2849dbc 100644 (file)
@@ -36,6 +36,7 @@
 .file "aes-i586-asm.S"
 .text
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
 #define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
 // AES (Rijndael) Encryption Subroutine
 /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
 
-.global  aes_enc_blk
-
 .extern  crypto_ft_tab
 .extern  crypto_fl_tab
 
-.align 4
-
-aes_enc_blk:
+ENTRY(aes_enc_blk)
        push    %ebp
        mov     ctx(%esp),%ebp
 
@@ -290,18 +287,15 @@ aes_enc_blk:
        mov     %r0,(%ebp)
        pop     %ebp
        ret
+ENDPROC(aes_enc_blk)
 
 // AES (Rijndael) Decryption Subroutine
 /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
 
-.global  aes_dec_blk
-
 .extern  crypto_it_tab
 .extern  crypto_il_tab
 
-.align 4
-
-aes_dec_blk:
+ENTRY(aes_dec_blk)
        push    %ebp
        mov     ctx(%esp),%ebp
 
@@ -365,3 +359,4 @@ aes_dec_blk:
        mov     %r0,(%ebp)
        pop     %ebp
        ret
+ENDPROC(aes_dec_blk)
index 5b577d5..9105655 100644 (file)
@@ -15,6 +15,7 @@
 
 .text
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
 #define R1     %rax
 #define R11    %r11
 
 #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
-       .global FUNC;                   \
-       .type   FUNC,@function;         \
-       .align  8;                      \
-FUNC:  movq    r1,r2;                  \
+       ENTRY(FUNC);                    \
+       movq    r1,r2;                  \
        movq    r3,r4;                  \
        leaq    KEY+48(r8),r9;          \
        movq    r10,r11;                \
@@ -71,14 +70,15 @@ FUNC:       movq    r1,r2;                  \
        je      B192;                   \
        leaq    32(r9),r9;
 
-#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \
        movq    r1,r2;                  \
        movq    r3,r4;                  \
        movl    r5 ## E,(r9);           \
        movl    r6 ## E,4(r9);          \
        movl    r7 ## E,8(r9);          \
        movl    r8 ## E,12(r9);         \
-       ret;
+       ret;                            \
+       ENDPROC(FUNC);
 
 #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
        movzbl  r2 ## H,r5 ## E;        \
@@ -133,7 +133,7 @@ FUNC:       movq    r1,r2;                  \
 #define entry(FUNC,KEY,B128,B192) \
        prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
 
-#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
+#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11)
 
 #define encrypt_round(TAB,OFFSET) \
        round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
@@ -151,12 +151,12 @@ FUNC:     movq    r1,r2;                  \
 
 /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
 
-       entry(aes_enc_blk,0,enc128,enc192)
+       entry(aes_enc_blk,0,.Le128,.Le192)
        encrypt_round(crypto_ft_tab,-96)
        encrypt_round(crypto_ft_tab,-80)
-enc192:        encrypt_round(crypto_ft_tab,-64)
+.Le192:        encrypt_round(crypto_ft_tab,-64)
        encrypt_round(crypto_ft_tab,-48)
-enc128:        encrypt_round(crypto_ft_tab,-32)
+.Le128:        encrypt_round(crypto_ft_tab,-32)
        encrypt_round(crypto_ft_tab,-16)
        encrypt_round(crypto_ft_tab,  0)
        encrypt_round(crypto_ft_tab, 16)
@@ -166,16 +166,16 @@ enc128:   encrypt_round(crypto_ft_tab,-32)
        encrypt_round(crypto_ft_tab, 80)
        encrypt_round(crypto_ft_tab, 96)
        encrypt_final(crypto_fl_tab,112)
-       return
+       return(aes_enc_blk)
 
 /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
 
-       entry(aes_dec_blk,240,dec128,dec192)
+       entry(aes_dec_blk,240,.Ld128,.Ld192)
        decrypt_round(crypto_it_tab,-96)
        decrypt_round(crypto_it_tab,-80)
-dec192:        decrypt_round(crypto_it_tab,-64)
+.Ld192:        decrypt_round(crypto_it_tab,-64)
        decrypt_round(crypto_it_tab,-48)
-dec128:        decrypt_round(crypto_it_tab,-32)
+.Ld128:        decrypt_round(crypto_it_tab,-32)
        decrypt_round(crypto_it_tab,-16)
        decrypt_round(crypto_it_tab,  0)
        decrypt_round(crypto_it_tab, 16)
@@ -185,4 +185,4 @@ dec128:     decrypt_round(crypto_it_tab,-32)
        decrypt_round(crypto_it_tab, 80)
        decrypt_round(crypto_it_tab, 96)
        decrypt_final(crypto_il_tab,112)
-       return
+       return(aes_dec_blk)
index 3470624..04b7977 100644 (file)
@@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
 * poly = x^128 + x^127 + x^126 + x^121 + 1
 *
 *****************************************************************************/
-
 ENTRY(aesni_gcm_dec)
        push    %r12
        push    %r13
@@ -1437,6 +1436,7 @@ _return_T_done_decrypt:
        pop     %r13
        pop     %r12
        ret
+ENDPROC(aesni_gcm_dec)
 
 
 /*****************************************************************************
@@ -1700,10 +1700,12 @@ _return_T_done_encrypt:
        pop     %r13
        pop     %r12
        ret
+ENDPROC(aesni_gcm_enc)
 
 #endif
 
 
+.align 4
 _key_expansion_128:
 _key_expansion_256a:
        pshufd $0b11111111, %xmm1, %xmm1
@@ -1715,6 +1717,8 @@ _key_expansion_256a:
        movaps %xmm0, (TKEYP)
        add $0x10, TKEYP
        ret
+ENDPROC(_key_expansion_128)
+ENDPROC(_key_expansion_256a)
 
 .align 4
 _key_expansion_192a:
@@ -1739,6 +1743,7 @@ _key_expansion_192a:
        movaps %xmm1, 0x10(TKEYP)
        add $0x20, TKEYP
        ret
+ENDPROC(_key_expansion_192a)
 
 .align 4
 _key_expansion_192b:
@@ -1758,6 +1763,7 @@ _key_expansion_192b:
        movaps %xmm0, (TKEYP)
        add $0x10, TKEYP
        ret
+ENDPROC(_key_expansion_192b)
 
 .align 4
 _key_expansion_256b:
@@ -1770,6 +1776,7 @@ _key_expansion_256b:
        movaps %xmm2, (TKEYP)
        add $0x10, TKEYP
        ret
+ENDPROC(_key_expansion_256b)
 
 /*
  * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key)
        popl KEYP
 #endif
        ret
+ENDPROC(aesni_set_key)
 
 /*
  * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -1903,6 +1911,7 @@ ENTRY(aesni_enc)
        popl KEYP
 #endif
        ret
+ENDPROC(aesni_enc)
 
 /*
  * _aesni_enc1:                internal ABI
@@ -1960,6 +1969,7 @@ _aesni_enc1:
        movaps 0x70(TKEYP), KEY
        AESENCLAST KEY STATE
        ret
+ENDPROC(_aesni_enc1)
 
 /*
  * _aesni_enc4:        internal ABI
@@ -2068,6 +2078,7 @@ _aesni_enc4:
        AESENCLAST KEY STATE3
        AESENCLAST KEY STATE4
        ret
+ENDPROC(_aesni_enc4)
 
 /*
  * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
@@ -2090,6 +2101,7 @@ ENTRY(aesni_dec)
        popl KEYP
 #endif
        ret
+ENDPROC(aesni_dec)
 
 /*
  * _aesni_dec1:                internal ABI
@@ -2147,6 +2159,7 @@ _aesni_dec1:
        movaps 0x70(TKEYP), KEY
        AESDECLAST KEY STATE
        ret
+ENDPROC(_aesni_dec1)
 
 /*
  * _aesni_dec4:        internal ABI
@@ -2255,6 +2268,7 @@ _aesni_dec4:
        AESDECLAST KEY STATE3
        AESDECLAST KEY STATE4
        ret
+ENDPROC(_aesni_dec4)
 
 /*
  * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc)
        popl LEN
 #endif
        ret
+ENDPROC(aesni_ecb_enc)
 
 /*
  * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec)
        popl LEN
 #endif
        ret
+ENDPROC(aesni_ecb_dec)
 
 /*
  * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc)
        popl IVP
 #endif
        ret
+ENDPROC(aesni_cbc_enc)
 
 /*
  * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec)
        popl IVP
 #endif
        ret
+ENDPROC(aesni_cbc_dec)
 
 #ifdef __x86_64__
 .align 16
@@ -2527,6 +2545,7 @@ _aesni_inc_init:
        MOVQ_R64_XMM TCTR_LOW INC
        MOVQ_R64_XMM CTR TCTR_LOW
        ret
+ENDPROC(_aesni_inc_init)
 
 /*
  * _aesni_inc:         internal ABI
@@ -2555,6 +2574,7 @@ _aesni_inc:
        movaps CTR, IV
        PSHUFB_XMM BSWAP_MASK IV
        ret
+ENDPROC(_aesni_inc)
 
 /*
  * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
@@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc)
        movups IV, (IVP)
 .Lctr_enc_just_ret:
        ret
+ENDPROC(aesni_ctr_enc)
 #endif
index 391d245..246c670 100644 (file)
@@ -20,6 +20,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "blowfish-x86_64-asm.S"
 .text
 
        bswapq                  RX0; \
        xorq RX0,               (RIO);
 
-.align 8
-.global __blowfish_enc_blk
-.type   __blowfish_enc_blk,@function;
-
-__blowfish_enc_blk:
+ENTRY(__blowfish_enc_blk)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -148,19 +146,16 @@ __blowfish_enc_blk:
 
        movq %r10, RIO;
        test %cl, %cl;
-       jnz __enc_xor;
+       jnz .L__enc_xor;
 
        write_block();
        ret;
-__enc_xor:
+.L__enc_xor:
        xor_block();
        ret;
+ENDPROC(__blowfish_enc_blk)
 
-.align 8
-.global blowfish_dec_blk
-.type   blowfish_dec_blk,@function;
-
-blowfish_dec_blk:
+ENTRY(blowfish_dec_blk)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -189,6 +184,7 @@ blowfish_dec_blk:
        movq %r11, %rbp;
 
        ret;
+ENDPROC(blowfish_dec_blk)
 
 /**********************************************************************
   4-way blowfish, four blocks parallel
@@ -300,11 +296,7 @@ blowfish_dec_blk:
        bswapq                  RX3; \
        xorq RX3,               24(RIO);
 
-.align 8
-.global __blowfish_enc_blk_4way
-.type   __blowfish_enc_blk_4way,@function;
-
-__blowfish_enc_blk_4way:
+ENTRY(__blowfish_enc_blk_4way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -336,7 +328,7 @@ __blowfish_enc_blk_4way:
        movq %r11, RIO;
 
        test %bpl, %bpl;
-       jnz __enc_xor4;
+       jnz .L__enc_xor4;
 
        write_block4();
 
@@ -344,18 +336,15 @@ __blowfish_enc_blk_4way:
        popq %rbp;
        ret;
 
-__enc_xor4:
+.L__enc_xor4:
        xor_block4();
 
        popq %rbx;
        popq %rbp;
        ret;
+ENDPROC(__blowfish_enc_blk_4way)
 
-.align 8
-.global blowfish_dec_blk_4way
-.type   blowfish_dec_blk_4way,@function;
-
-blowfish_dec_blk_4way:
+ENTRY(blowfish_dec_blk_4way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -387,4 +376,4 @@ blowfish_dec_blk_4way:
        popq %rbp;
 
        ret;
-
+ENDPROC(blowfish_dec_blk_4way)
index 2306d2e..cfc1634 100644 (file)
@@ -15,6 +15,8 @@
  *     http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
  */
 
+#include <linux/linkage.h>
+
 #define CAMELLIA_TABLE_BYTE_LEN 272
 
 /* struct camellia_ctx: */
@@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
                  %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
                  %rcx, (%r9));
        ret;
+ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 
 .align 8
 roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
@@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
                  %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
                  %rax, (%r9));
        ret;
+ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 
 /*
  * IN/OUT:
@@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
 .text
 
 .align 8
-.type   __camellia_enc_blk16,@function;
-
 __camellia_enc_blk16:
        /* input:
         *      %rdi: ctx, CTX
@@ -793,10 +795,9 @@ __camellia_enc_blk16:
                     %xmm15, %rax, %rcx, 24);
 
        jmp .Lenc_done;
+ENDPROC(__camellia_enc_blk16)
 
 .align 8
-.type   __camellia_dec_blk16,@function;
-
 __camellia_dec_blk16:
        /* input:
         *      %rdi: ctx, CTX
@@ -877,12 +878,9 @@ __camellia_dec_blk16:
              ((key_table + (24) * 8) + 4)(CTX));
 
        jmp .Ldec_max24;
+ENDPROC(__camellia_dec_blk16)
 
-.align 8
-.global camellia_ecb_enc_16way
-.type   camellia_ecb_enc_16way,@function;
-
-camellia_ecb_enc_16way:
+ENTRY(camellia_ecb_enc_16way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst (16 blocks)
@@ -903,12 +901,9 @@ camellia_ecb_enc_16way:
                     %xmm8, %rsi);
 
        ret;
+ENDPROC(camellia_ecb_enc_16way)
 
-.align 8
-.global camellia_ecb_dec_16way
-.type   camellia_ecb_dec_16way,@function;
-
-camellia_ecb_dec_16way:
+ENTRY(camellia_ecb_dec_16way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst (16 blocks)
@@ -934,12 +929,9 @@ camellia_ecb_dec_16way:
                     %xmm8, %rsi);
 
        ret;
+ENDPROC(camellia_ecb_dec_16way)
 
-.align 8
-.global camellia_cbc_dec_16way
-.type   camellia_cbc_dec_16way,@function;
-
-camellia_cbc_dec_16way:
+ENTRY(camellia_cbc_dec_16way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst (16 blocks)
@@ -986,6 +978,7 @@ camellia_cbc_dec_16way:
                     %xmm8, %rsi);
 
        ret;
+ENDPROC(camellia_cbc_dec_16way)
 
 #define inc_le128(x, minus_one, tmp) \
        vpcmpeqq minus_one, x, tmp; \
@@ -993,11 +986,7 @@ camellia_cbc_dec_16way:
        vpslldq $8, tmp, tmp; \
        vpsubq tmp, x, x;
 
-.align 8
-.global camellia_ctr_16way
-.type   camellia_ctr_16way,@function;
-
-camellia_ctr_16way:
+ENTRY(camellia_ctr_16way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst (16 blocks)
@@ -1100,3 +1089,4 @@ camellia_ctr_16way:
                     %xmm8, %rsi);
 
        ret;
+ENDPROC(camellia_ctr_16way)
index 0b33743..310319c 100644 (file)
@@ -20,6 +20,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "camellia-x86_64-asm_64.S"
 .text
 
        bswapq                          RAB0; \
        movq RAB0,                      4*2(RIO);
 
-.global __camellia_enc_blk;
-.type   __camellia_enc_blk,@function;
-
-__camellia_enc_blk:
+ENTRY(__camellia_enc_blk)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -214,33 +213,31 @@ __camellia_enc_blk:
        movl $24, RT1d; /* max */
 
        cmpb $16, key_length(CTX);
-       je __enc_done;
+       je .L__enc_done;
 
        enc_fls(24);
        enc_rounds(24);
        movl $32, RT1d; /* max */
 
-__enc_done:
+.L__enc_done:
        testb RXORbl, RXORbl;
        movq RDST, RIO;
 
-       jnz __enc_xor;
+       jnz .L__enc_xor;
 
        enc_outunpack(mov, RT1);
 
        movq RRBP, %rbp;
        ret;
 
-__enc_xor:
+.L__enc_xor:
        enc_outunpack(xor, RT1);
 
        movq RRBP, %rbp;
        ret;
+ENDPROC(__camellia_enc_blk)
 
-.global camellia_dec_blk;
-.type   camellia_dec_blk,@function;
-
-camellia_dec_blk:
+ENTRY(camellia_dec_blk)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -258,12 +255,12 @@ camellia_dec_blk:
        dec_inpack(RT2);
 
        cmpb $24, RT2bl;
-       je __dec_rounds16;
+       je .L__dec_rounds16;
 
        dec_rounds(24);
        dec_fls(24);
 
-__dec_rounds16:
+.L__dec_rounds16:
        dec_rounds(16);
        dec_fls(16);
        dec_rounds(8);
@@ -276,6 +273,7 @@ __dec_rounds16:
 
        movq RRBP, %rbp;
        ret;
+ENDPROC(camellia_dec_blk)
 
 /**********************************************************************
   2-way camellia
@@ -426,10 +424,7 @@ __dec_rounds16:
                bswapq                          RAB1; \
                movq RAB1,                      12*2(RIO);
 
-.global __camellia_enc_blk_2way;
-.type   __camellia_enc_blk_2way,@function;
-
-__camellia_enc_blk_2way:
+ENTRY(__camellia_enc_blk_2way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -453,16 +448,16 @@ __camellia_enc_blk_2way:
        movl $24, RT2d; /* max */
 
        cmpb $16, key_length(CTX);
-       je __enc2_done;
+       je .L__enc2_done;
 
        enc_fls2(24);
        enc_rounds2(24);
        movl $32, RT2d; /* max */
 
-__enc2_done:
+.L__enc2_done:
        test RXORbl, RXORbl;
        movq RDST, RIO;
-       jnz __enc2_xor;
+       jnz .L__enc2_xor;
 
        enc_outunpack2(mov, RT2);
 
@@ -470,17 +465,15 @@ __enc2_done:
        popq %rbx;
        ret;
 
-__enc2_xor:
+.L__enc2_xor:
        enc_outunpack2(xor, RT2);
 
        movq RRBP, %rbp;
        popq %rbx;
        ret;
+ENDPROC(__camellia_enc_blk_2way)
 
-.global camellia_dec_blk_2way;
-.type   camellia_dec_blk_2way,@function;
-
-camellia_dec_blk_2way:
+ENTRY(camellia_dec_blk_2way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -499,12 +492,12 @@ camellia_dec_blk_2way:
        dec_inpack2(RT2);
 
        cmpb $24, RT2bl;
-       je __dec2_rounds16;
+       je .L__dec2_rounds16;
 
        dec_rounds2(24);
        dec_fls2(24);
 
-__dec2_rounds16:
+.L__dec2_rounds16:
        dec_rounds2(16);
        dec_fls2(16);
        dec_rounds2(8);
@@ -518,3 +511,4 @@ __dec2_rounds16:
        movq RRBP, %rbp;
        movq RXOR, %rbx;
        ret;
+ENDPROC(camellia_dec_blk_2way)
index 15b00ac..c35fd5d 100644 (file)
@@ -23,6 +23,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "cast5-avx-x86_64-asm_64.S"
 
 .extern cast_s1
 .text
 
 .align 16
-.type   __cast5_enc_blk16,@function;
-
 __cast5_enc_blk16:
        /* input:
         *      %rdi: ctx, CTX
@@ -263,14 +263,14 @@ __cast5_enc_blk16:
 
        movzbl rr(CTX), %eax;
        testl %eax, %eax;
-       jnz __skip_enc;
+       jnz .L__skip_enc;
 
        round(RL, RR, 12, 1);
        round(RR, RL, 13, 2);
        round(RL, RR, 14, 3);
        round(RR, RL, 15, 1);
 
-__skip_enc:
+.L__skip_enc:
        popq %rbx;
        popq %rbp;
 
@@ -282,10 +282,9 @@ __skip_enc:
        outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
 
        ret;
+ENDPROC(__cast5_enc_blk16)
 
 .align 16
-.type   __cast5_dec_blk16,@function;
-
 __cast5_dec_blk16:
        /* input:
         *      %rdi: ctx, CTX
@@ -323,14 +322,14 @@ __cast5_dec_blk16:
 
        movzbl rr(CTX), %eax;
        testl %eax, %eax;
-       jnz __skip_dec;
+       jnz .L__skip_dec;
 
        round(RL, RR, 15, 1);
        round(RR, RL, 14, 3);
        round(RL, RR, 13, 2);
        round(RR, RL, 12, 1);
 
-__dec_tail:
+.L__dec_tail:
        round(RL, RR, 11, 3);
        round(RR, RL, 10, 2);
        round(RL, RR, 9, 1);
@@ -355,15 +354,12 @@ __dec_tail:
 
        ret;
 
-__skip_dec:
+.L__skip_dec:
        vpsrldq $4, RKR, RKR;
-       jmp __dec_tail;
+       jmp .L__dec_tail;
+ENDPROC(__cast5_dec_blk16)
 
-.align 16
-.global cast5_ecb_enc_16way
-.type   cast5_ecb_enc_16way,@function;
-
-cast5_ecb_enc_16way:
+ENTRY(cast5_ecb_enc_16way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -393,12 +389,9 @@ cast5_ecb_enc_16way:
        vmovdqu RL4, (7*4*4)(%r11);
 
        ret;
+ENDPROC(cast5_ecb_enc_16way)
 
-.align 16
-.global cast5_ecb_dec_16way
-.type   cast5_ecb_dec_16way,@function;
-
-cast5_ecb_dec_16way:
+ENTRY(cast5_ecb_dec_16way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -428,12 +421,9 @@ cast5_ecb_dec_16way:
        vmovdqu RL4, (7*4*4)(%r11);
 
        ret;
+ENDPROC(cast5_ecb_dec_16way)
 
-.align 16
-.global cast5_cbc_dec_16way
-.type   cast5_cbc_dec_16way,@function;
-
-cast5_cbc_dec_16way:
+ENTRY(cast5_cbc_dec_16way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -480,12 +470,9 @@ cast5_cbc_dec_16way:
        popq %r12;
 
        ret;
+ENDPROC(cast5_cbc_dec_16way)
 
-.align 16
-.global cast5_ctr_16way
-.type   cast5_ctr_16way,@function;
-
-cast5_ctr_16way:
+ENTRY(cast5_ctr_16way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -556,3 +543,4 @@ cast5_ctr_16way:
        popq %r12;
 
        ret;
+ENDPROC(cast5_ctr_16way)
index 2569d0d..f93b610 100644 (file)
@@ -23,6 +23,7 @@
  *
  */
 
+#include <linux/linkage.h>
 #include "glue_helper-asm-avx.S"
 
 .file "cast6-avx-x86_64-asm_64.S"
 .text
 
 .align 8
-.type   __cast6_enc_blk8,@function;
-
 __cast6_enc_blk8:
        /* input:
         *      %rdi: ctx, CTX
@@ -295,10 +294,9 @@ __cast6_enc_blk8:
        outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
        ret;
+ENDPROC(__cast6_enc_blk8)
 
 .align 8
-.type   __cast6_dec_blk8,@function;
-
 __cast6_dec_blk8:
        /* input:
         *      %rdi: ctx, CTX
@@ -341,12 +339,9 @@ __cast6_dec_blk8:
        outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
        ret;
+ENDPROC(__cast6_dec_blk8)
 
-.align 8
-.global cast6_ecb_enc_8way
-.type   cast6_ecb_enc_8way,@function;
-
-cast6_ecb_enc_8way:
+ENTRY(cast6_ecb_enc_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -362,12 +357,9 @@ cast6_ecb_enc_8way:
        store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
        ret;
+ENDPROC(cast6_ecb_enc_8way)
 
-.align 8
-.global cast6_ecb_dec_8way
-.type   cast6_ecb_dec_8way,@function;
-
-cast6_ecb_dec_8way:
+ENTRY(cast6_ecb_dec_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -383,12 +375,9 @@ cast6_ecb_dec_8way:
        store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
        ret;
+ENDPROC(cast6_ecb_dec_8way)
 
-.align 8
-.global cast6_cbc_dec_8way
-.type   cast6_cbc_dec_8way,@function;
-
-cast6_cbc_dec_8way:
+ENTRY(cast6_cbc_dec_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -409,12 +398,9 @@ cast6_cbc_dec_8way:
        popq %r12;
 
        ret;
+ENDPROC(cast6_cbc_dec_8way)
 
-.align 8
-.global cast6_ctr_8way
-.type   cast6_ctr_8way,@function;
-
-cast6_ctr_8way:
+ENTRY(cast6_ctr_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -437,3 +423,4 @@ cast6_ctr_8way:
        popq %r12;
 
        ret;
+ENDPROC(cast6_ctr_8way)
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
new file mode 100644 (file)
index 0000000..c833501
--- /dev/null
@@ -0,0 +1,246 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
+ * calculation.
+ * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
+ * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
+ * at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2B: Instruction Set Reference, N-Z
+ *
+ * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
+ *           Alexander Boyko <Alexander_Boyko@xyratex.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/inst.h>
+
+
+.align 16
+/*
+ * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
+ * #define CONSTANT_R1  0x154442bd4LL
+ *
+ * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
+ * #define CONSTANT_R2  0x1c6e41596LL
+ */
+.Lconstant_R2R1:
+       .octa 0x00000001c6e415960000000154442bd4
+/*
+ * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
+ * #define CONSTANT_R3  0x1751997d0LL
+ *
+ * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
+ * #define CONSTANT_R4  0x0ccaa009eLL
+ */
+.Lconstant_R4R3:
+       .octa 0x00000000ccaa009e00000001751997d0
+/*
+ * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
+ * #define CONSTANT_R5  0x163cd6124LL
+ */
+.Lconstant_R5:
+       .octa 0x00000000000000000000000163cd6124
+.Lconstant_mask32:
+       .octa 0x000000000000000000000000FFFFFFFF
+/*
+ * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
+ *
+ * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
+ * #define CONSTANT_RU  0x1F7011641LL
+ */
+.Lconstant_RUpoly:
+       .octa 0x00000001F701164100000001DB710641
+
+#define CONSTANT %xmm0
+
+#ifdef __x86_64__
+#define BUF     %rdi
+#define LEN     %rsi
+#define CRC     %edx
+#else
+#define BUF     %eax
+#define LEN     %edx
+#define CRC     %ecx
+#endif
+
+
+
+.text
+/**
+ *      Calculate crc32
+ *      BUF - buffer (16 bytes aligned)
+ *      LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
+ *      CRC - initial crc32
+ *      return %eax crc32
+ *      uint crc32_pclmul_le_16(unsigned char const *buffer,
+ *                          size_t len, uint crc32)
+ */
+.globl crc32_pclmul_le_16
+.align 4, 0x90
+crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */
+       movdqa  (BUF), %xmm1
+       movdqa  0x10(BUF), %xmm2
+       movdqa  0x20(BUF), %xmm3
+       movdqa  0x30(BUF), %xmm4
+       movd    CRC, CONSTANT
+       pxor    CONSTANT, %xmm1
+       sub     $0x40, LEN
+       add     $0x40, BUF
+#ifndef __x86_64__
+       /* This is for position independent code(-fPIC) support for 32bit */
+       call    delta
+delta:
+       pop     %ecx
+#endif
+       cmp     $0x40, LEN
+       jb      less_64
+
+#ifdef __x86_64__
+       movdqa .Lconstant_R2R1(%rip), CONSTANT
+#else
+       movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT
+#endif
+
+loop_64:/*  64 bytes Full cache line folding */
+       prefetchnta    0x40(BUF)
+       movdqa  %xmm1, %xmm5
+       movdqa  %xmm2, %xmm6
+       movdqa  %xmm3, %xmm7
+#ifdef __x86_64__
+       movdqa  %xmm4, %xmm8
+#endif
+       PCLMULQDQ 00, CONSTANT, %xmm1
+       PCLMULQDQ 00, CONSTANT, %xmm2
+       PCLMULQDQ 00, CONSTANT, %xmm3
+#ifdef __x86_64__
+       PCLMULQDQ 00, CONSTANT, %xmm4
+#endif
+       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       PCLMULQDQ 0x11, CONSTANT, %xmm6
+       PCLMULQDQ 0x11, CONSTANT, %xmm7
+#ifdef __x86_64__
+       PCLMULQDQ 0x11, CONSTANT, %xmm8
+#endif
+       pxor    %xmm5, %xmm1
+       pxor    %xmm6, %xmm2
+       pxor    %xmm7, %xmm3
+#ifdef __x86_64__
+       pxor    %xmm8, %xmm4
+#else
+       /* xmm8 unsupported for x32 */
+       movdqa  %xmm4, %xmm5
+       PCLMULQDQ 00, CONSTANT, %xmm4
+       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm4
+#endif
+
+       pxor    (BUF), %xmm1
+       pxor    0x10(BUF), %xmm2
+       pxor    0x20(BUF), %xmm3
+       pxor    0x30(BUF), %xmm4
+
+       sub     $0x40, LEN
+       add     $0x40, BUF
+       cmp     $0x40, LEN
+       jge     loop_64
+less_64:/*  Folding cache line into 128bit */
+#ifdef __x86_64__
+       movdqa  .Lconstant_R4R3(%rip), CONSTANT
+#else
+       movdqa  .Lconstant_R4R3 - delta(%ecx), CONSTANT
+#endif
+       prefetchnta     (BUF)
+
+       movdqa  %xmm1, %xmm5
+       PCLMULQDQ 0x00, CONSTANT, %xmm1
+       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm2, %xmm1
+
+       movdqa  %xmm1, %xmm5
+       PCLMULQDQ 0x00, CONSTANT, %xmm1
+       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm3, %xmm1
+
+       movdqa  %xmm1, %xmm5
+       PCLMULQDQ 0x00, CONSTANT, %xmm1
+       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm4, %xmm1
+
+       cmp     $0x10, LEN
+       jb      fold_64
+loop_16:/* Folding rest buffer into 128bit */
+       movdqa  %xmm1, %xmm5
+       PCLMULQDQ 0x00, CONSTANT, %xmm1
+       PCLMULQDQ 0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    (BUF), %xmm1
+       sub     $0x10, LEN
+       add     $0x10, BUF
+       cmp     $0x10, LEN
+       jge     loop_16
+
+fold_64:
+       /* perform the last 64 bit fold, also adds 32 zeroes
+        * to the input stream */
+       PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
+       psrldq  $0x08, %xmm1
+       pxor    CONSTANT, %xmm1
+
+       /* final 32-bit fold */
+       movdqa  %xmm1, %xmm2
+#ifdef __x86_64__
+       movdqa  .Lconstant_R5(%rip), CONSTANT
+       movdqa  .Lconstant_mask32(%rip), %xmm3
+#else
+       movdqa  .Lconstant_R5 - delta(%ecx), CONSTANT
+       movdqa  .Lconstant_mask32 - delta(%ecx), %xmm3
+#endif
+       psrldq  $0x04, %xmm2
+       pand    %xmm3, %xmm1
+       PCLMULQDQ 0x00, CONSTANT, %xmm1
+       pxor    %xmm2, %xmm1
+
+       /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
+#ifdef __x86_64__
+       movdqa  .Lconstant_RUpoly(%rip), CONSTANT
+#else
+       movdqa  .Lconstant_RUpoly - delta(%ecx), CONSTANT
+#endif
+       movdqa  %xmm1, %xmm2
+       pand    %xmm3, %xmm1
+       PCLMULQDQ 0x10, CONSTANT, %xmm1
+       pand    %xmm3, %xmm1
+       PCLMULQDQ 0x00, CONSTANT, %xmm1
+       pxor    %xmm2, %xmm1
+       pextrd  $0x01, %xmm1, %eax
+
+       ret
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
new file mode 100644 (file)
index 0000000..9d014a7
--- /dev/null
@@ -0,0 +1,201 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation.
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+
+#include <asm/cpufeature.h>
+#include <asm/cpu_device_id.h>
+#include <asm/i387.h>
+
+#define CHKSUM_BLOCK_SIZE      1
+#define CHKSUM_DIGEST_SIZE     4
+
+#define PCLMUL_MIN_LEN         64L     /* minimum size of buffer
+                                        * for crc32_pclmul_le_16 */
+#define SCALE_F                        16L     /* size of xmm register */
+#define SCALE_F_MASK           (SCALE_F - 1)
+
+u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32);
+
+static u32 __attribute__((pure))
+       crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
+{
+       unsigned int iquotient;
+       unsigned int iremainder;
+       unsigned int prealign;
+
+       if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
+               return crc32_le(crc, p, len);
+
+       if ((long)p & SCALE_F_MASK) {
+               /* align p to 16 byte */
+               prealign = SCALE_F - ((long)p & SCALE_F_MASK);
+
+               crc = crc32_le(crc, p, prealign);
+               len -= prealign;
+               p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
+                                    ~SCALE_F_MASK);
+       }
+       iquotient = len & (~SCALE_F_MASK);
+       iremainder = len & SCALE_F_MASK;
+
+       kernel_fpu_begin();
+       crc = crc32_pclmul_le_16(p, iquotient, crc);
+       kernel_fpu_end();
+
+       if (iremainder)
+               crc = crc32_le(crc, p + iquotient, iremainder);
+
+       return crc;
+}
+
+static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
+{
+       u32 *key = crypto_tfm_ctx(tfm);
+
+       *key = 0;
+
+       return 0;
+}
+
+static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
+                       unsigned int keylen)
+{
+       u32 *mctx = crypto_shash_ctx(hash);
+
+       if (keylen != sizeof(u32)) {
+               crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       *mctx = le32_to_cpup((__le32 *)key);
+       return 0;
+}
+
+static int crc32_pclmul_init(struct shash_desc *desc)
+{
+       u32 *mctx = crypto_shash_ctx(desc->tfm);
+       u32 *crcp = shash_desc_ctx(desc);
+
+       *crcp = *mctx;
+
+       return 0;
+}
+
+static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
+                              unsigned int len)
+{
+       u32 *crcp = shash_desc_ctx(desc);
+
+       *crcp = crc32_pclmul_le(*crcp, data, len);
+       return 0;
+}
+
+/* No final XOR 0xFFFFFFFF, like crc32_le */
+static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
+                               u8 *out)
+{
+       *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
+       return 0;
+}
+
+static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
+                             unsigned int len, u8 *out)
+{
+       return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
+{
+       u32 *crcp = shash_desc_ctx(desc);
+
+       *(__le32 *)out = cpu_to_le32p(crcp);
+       return 0;
+}
+
+static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
+                              unsigned int len, u8 *out)
+{
+       return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
+                                   out);
+}
+
+static struct shash_alg alg = {
+       .setkey         = crc32_pclmul_setkey,
+       .init           = crc32_pclmul_init,
+       .update         = crc32_pclmul_update,
+       .final          = crc32_pclmul_final,
+       .finup          = crc32_pclmul_finup,
+       .digest         = crc32_pclmul_digest,
+       .descsize       = sizeof(u32),
+       .digestsize     = CHKSUM_DIGEST_SIZE,
+       .base           = {
+                       .cra_name               = "crc32",
+                       .cra_driver_name        = "crc32-pclmul",
+                       .cra_priority           = 200,
+                       .cra_blocksize          = CHKSUM_BLOCK_SIZE,
+                       .cra_ctxsize            = sizeof(u32),
+                       .cra_module             = THIS_MODULE,
+                       .cra_init               = crc32_pclmul_cra_init,
+       }
+};
+
+static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
+       X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
+       {}
+};
+MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
+
+
+static int __init crc32_pclmul_mod_init(void)
+{
+
+       if (!x86_match_cpu(crc32pclmul_cpu_id)) {
+               pr_info("PCLMULQDQ-NI instructions are not detected.\n");
+               return -ENODEV;
+       }
+       return crypto_register_shash(&alg);
+}
+
+static void __exit crc32_pclmul_mod_fini(void)
+{
+       crypto_unregister_shash(&alg);
+}
+
+module_init(crc32_pclmul_mod_init);
+module_exit(crc32_pclmul_mod_fini);
+
+MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS("crc32");
+MODULE_ALIAS("crc32-pclmul");
index 93c6d39..cf1a7ec 100644 (file)
@@ -42,6 +42,8 @@
  * SOFTWARE.
  */
 
+#include <linux/linkage.h>
+
 ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
 
 .macro LABEL prefix n
@@ -68,8 +70,7 @@
 
 # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
 
-.global crc_pcl
-crc_pcl:
+ENTRY(crc_pcl)
 #define    bufp                %rdi
 #define    bufp_dw     %edi
 #define    bufp_w      %di
@@ -323,6 +324,9 @@ JMPTBL_ENTRY %i
 .noaltmacro
        i=i+1
 .endr
+
+ENDPROC(crc_pcl)
+
        ################################################################
        ## PCLMULQDQ tables
        ## Table is 128 entries x 2 quad words each
index 1eb7f90..586f41a 100644 (file)
@@ -94,6 +94,7 @@ __clmul_gf128mul_ble:
        pxor T2, T1
        pxor T1, DATA
        ret
+ENDPROC(__clmul_gf128mul_ble)
 
 /* void clmul_ghash_mul(char *dst, const be128 *shash) */
 ENTRY(clmul_ghash_mul)
@@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul)
        PSHUFB_XMM BSWAP DATA
        movups DATA, (%rdi)
        ret
+ENDPROC(clmul_ghash_mul)
 
 /*
  * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
@@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update)
        movups DATA, (%rdi)
 .Lupdate_just_ret:
        ret
+ENDPROC(clmul_ghash_update)
 
 /*
  * void clmul_ghash_setkey(be128 *shash, const u8 *key);
@@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey)
        pxor %xmm1, %xmm0
        movups %xmm0, (%rdi)
        ret
+ENDPROC(clmul_ghash_setkey)
index 72eb306..329452b 100644 (file)
@@ -2,11 +2,12 @@
 # D. J. Bernstein
 # Public domain.
 
-# enter ECRYPT_encrypt_bytes
+#include <linux/linkage.h>
+
 .text
-.p2align 5
-.globl ECRYPT_encrypt_bytes
-ECRYPT_encrypt_bytes:
+
+# enter salsa20_encrypt_bytes
+ENTRY(salsa20_encrypt_bytes)
        mov     %esp,%eax
        and     $31,%eax
        add     $256,%eax
@@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes:
        add     $64,%esi
        # goto bytesatleast1
        jmp     ._bytesatleast1
-# enter ECRYPT_keysetup
-.text
-.p2align 5
-.globl ECRYPT_keysetup
-ECRYPT_keysetup:
+ENDPROC(salsa20_encrypt_bytes)
+
+# enter salsa20_keysetup
+ENTRY(salsa20_keysetup)
        mov     %esp,%eax
        and     $31,%eax
        add     $256,%eax
@@ -1060,11 +1060,10 @@ ECRYPT_keysetup:
        # leave
        add     %eax,%esp
        ret
-# enter ECRYPT_ivsetup
-.text
-.p2align 5
-.globl ECRYPT_ivsetup
-ECRYPT_ivsetup:
+ENDPROC(salsa20_keysetup)
+
+# enter salsa20_ivsetup
+ENTRY(salsa20_ivsetup)
        mov     %esp,%eax
        and     $31,%eax
        add     $256,%eax
@@ -1112,3 +1111,4 @@ ECRYPT_ivsetup:
        # leave
        add     %eax,%esp
        ret
+ENDPROC(salsa20_ivsetup)
index 6214a9b..9279e0b 100644 (file)
@@ -1,8 +1,7 @@
-# enter ECRYPT_encrypt_bytes
-.text
-.p2align 5
-.globl ECRYPT_encrypt_bytes
-ECRYPT_encrypt_bytes:
+#include <linux/linkage.h>
+
+# enter salsa20_encrypt_bytes
+ENTRY(salsa20_encrypt_bytes)
        mov     %rsp,%r11
        and     $31,%r11
        add     $256,%r11
@@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes:
        # comment:fp stack unchanged by jump
        # goto bytesatleast1
        jmp     ._bytesatleast1
-# enter ECRYPT_keysetup
-.text
-.p2align 5
-.globl ECRYPT_keysetup
-ECRYPT_keysetup:
+ENDPROC(salsa20_encrypt_bytes)
+
+# enter salsa20_keysetup
+ENTRY(salsa20_keysetup)
        mov     %rsp,%r11
        and     $31,%r11
        add     $256,%r11
@@ -892,11 +890,10 @@ ECRYPT_keysetup:
        mov     %rdi,%rax
        mov     %rsi,%rdx
        ret
-# enter ECRYPT_ivsetup
-.text
-.p2align 5
-.globl ECRYPT_ivsetup
-ECRYPT_ivsetup:
+ENDPROC(salsa20_keysetup)
+
+# enter salsa20_ivsetup
+ENTRY(salsa20_ivsetup)
        mov     %rsp,%r11
        and     $31,%r11
        add     $256,%r11
@@ -918,3 +915,4 @@ ECRYPT_ivsetup:
        mov     %rdi,%rax
        mov     %rsi,%rdx
        ret
+ENDPROC(salsa20_ivsetup)
index a3a3c02..5e8e677 100644 (file)
 #define SALSA20_MIN_KEY_SIZE  16U
 #define SALSA20_MAX_KEY_SIZE  32U
 
-// use the ECRYPT_* function names
-#define salsa20_keysetup        ECRYPT_keysetup
-#define salsa20_ivsetup         ECRYPT_ivsetup
-#define salsa20_encrypt_bytes   ECRYPT_encrypt_bytes
-
 struct salsa20_ctx
 {
        u32 input[16];
index 02b0e9f..43c9386 100644 (file)
@@ -24,6 +24,7 @@
  *
  */
 
+#include <linux/linkage.h>
 #include "glue_helper-asm-avx.S"
 
 .file "serpent-avx-x86_64-asm_64.S"
        transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
 
 .align 8
-.type   __serpent_enc_blk8_avx,@function;
-
 __serpent_enc_blk8_avx:
        /* input:
         *      %rdi: ctx, CTX
@@ -619,10 +618,9 @@ __serpent_enc_blk8_avx:
        write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
        ret;
+ENDPROC(__serpent_enc_blk8_avx)
 
 .align 8
-.type   __serpent_dec_blk8_avx,@function;
-
 __serpent_dec_blk8_avx:
        /* input:
         *      %rdi: ctx, CTX
@@ -674,12 +672,9 @@ __serpent_dec_blk8_avx:
        write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
        ret;
+ENDPROC(__serpent_dec_blk8_avx)
 
-.align 8
-.global serpent_ecb_enc_8way_avx
-.type   serpent_ecb_enc_8way_avx,@function;
-
-serpent_ecb_enc_8way_avx:
+ENTRY(serpent_ecb_enc_8way_avx)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx:
        store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
        ret;
+ENDPROC(serpent_ecb_enc_8way_avx)
 
-.align 8
-.global serpent_ecb_dec_8way_avx
-.type   serpent_ecb_dec_8way_avx,@function;
-
-serpent_ecb_dec_8way_avx:
+ENTRY(serpent_ecb_dec_8way_avx)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx:
        store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
 
        ret;
+ENDPROC(serpent_ecb_dec_8way_avx)
 
-.align 8
-.global serpent_cbc_dec_8way_avx
-.type   serpent_cbc_dec_8way_avx,@function;
-
-serpent_cbc_dec_8way_avx:
+ENTRY(serpent_cbc_dec_8way_avx)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx:
        store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
 
        ret;
+ENDPROC(serpent_cbc_dec_8way_avx)
 
-.align 8
-.global serpent_ctr_8way_avx
-.type   serpent_ctr_8way_avx,@function;
-
-serpent_ctr_8way_avx:
+ENTRY(serpent_ctr_8way_avx)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -752,3 +738,4 @@ serpent_ctr_8way_avx:
        store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
        ret;
+ENDPROC(serpent_ctr_8way_avx)
index c00053d..d348f15 100644 (file)
@@ -24,6 +24,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "serpent-sse2-i586-asm_32.S"
 .text
 
        pxor t0,                x3; \
        movdqu x3,              (3*4*4)(out);
 
-.align 8
-.global __serpent_enc_blk_4way
-.type   __serpent_enc_blk_4way,@function;
-
-__serpent_enc_blk_4way:
+ENTRY(__serpent_enc_blk_4way)
        /* input:
         *      arg_ctx(%esp): ctx, CTX
         *      arg_dst(%esp): dst
@@ -566,22 +564,19 @@ __serpent_enc_blk_4way:
        movl arg_dst(%esp), %eax;
 
        cmpb $0, arg_xor(%esp);
-       jnz __enc_xor4;
+       jnz .L__enc_xor4;
 
        write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
 
        ret;
 
-__enc_xor4:
+.L__enc_xor4:
        xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
 
        ret;
+ENDPROC(__serpent_enc_blk_4way)
 
-.align 8
-.global serpent_dec_blk_4way
-.type   serpent_dec_blk_4way,@function;
-
-serpent_dec_blk_4way:
+ENTRY(serpent_dec_blk_4way)
        /* input:
         *      arg_ctx(%esp): ctx, CTX
         *      arg_dst(%esp): dst
@@ -633,3 +628,4 @@ serpent_dec_blk_4way:
        write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
 
        ret;
+ENDPROC(serpent_dec_blk_4way)
index 3ee1ff0..acc066c 100644 (file)
@@ -24,6 +24,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "serpent-sse2-x86_64-asm_64.S"
 .text
 
        pxor t0,                x3; \
        movdqu x3,              (3*4*4)(out);
 
-.align 8
-.global __serpent_enc_blk_8way
-.type   __serpent_enc_blk_8way,@function;
-
-__serpent_enc_blk_8way:
+ENTRY(__serpent_enc_blk_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -687,24 +685,21 @@ __serpent_enc_blk_8way:
        leaq (4*4*4)(%rsi), %rax;
 
        testb %cl, %cl;
-       jnz __enc_xor8;
+       jnz .L__enc_xor8;
 
        write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
        write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
        ret;
 
-__enc_xor8:
+.L__enc_xor8:
        xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
        xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
 
        ret;
+ENDPROC(__serpent_enc_blk_8way)
 
-.align 8
-.global serpent_dec_blk_8way
-.type   serpent_dec_blk_8way,@function;
-
-serpent_dec_blk_8way:
+ENTRY(serpent_dec_blk_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -756,3 +751,4 @@ serpent_dec_blk_8way:
        write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
 
        ret;
+ENDPROC(serpent_dec_blk_8way)
index 49d6987..a410950 100644 (file)
@@ -28,6 +28,8 @@
  * (at your option) any later version.
  */
 
+#include <linux/linkage.h>
+
 #define CTX    %rdi    // arg1
 #define BUF    %rsi    // arg2
 #define CNT    %rdx    // arg3
  * param: function's name
  */
 .macro SHA1_VECTOR_ASM  name
-       .global \name
-       .type   \name, @function
-       .align 32
-\name:
+       ENTRY(\name)
+
        push    %rbx
        push    %rbp
        push    %r12
        pop     %rbx
        ret
 
-       .size   \name, .-\name
+       ENDPROC(\name)
 .endm
 
 /*
index ebac16b..8d3e113 100644 (file)
@@ -23,6 +23,7 @@
  *
  */
 
+#include <linux/linkage.h>
 #include "glue_helper-asm-avx.S"
 
 .file "twofish-avx-x86_64-asm_64.S"
        vpxor           x3, wkey, x3;
 
 .align 8
-.type  __twofish_enc_blk8,@function;
-
 __twofish_enc_blk8:
        /* input:
         *      %rdi: ctx, CTX
@@ -284,10 +283,9 @@ __twofish_enc_blk8:
        outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
        ret;
+ENDPROC(__twofish_enc_blk8)
 
 .align 8
-.type  __twofish_dec_blk8,@function;
-
 __twofish_dec_blk8:
        /* input:
         *      %rdi: ctx, CTX
@@ -325,12 +323,9 @@ __twofish_dec_blk8:
        outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
 
        ret;
+ENDPROC(__twofish_dec_blk8)
 
-.align 8
-.global twofish_ecb_enc_8way
-.type   twofish_ecb_enc_8way,@function;
-
-twofish_ecb_enc_8way:
+ENTRY(twofish_ecb_enc_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -346,12 +341,9 @@ twofish_ecb_enc_8way:
        store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
 
        ret;
+ENDPROC(twofish_ecb_enc_8way)
 
-.align 8
-.global twofish_ecb_dec_8way
-.type   twofish_ecb_dec_8way,@function;
-
-twofish_ecb_dec_8way:
+ENTRY(twofish_ecb_dec_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -367,12 +359,9 @@ twofish_ecb_dec_8way:
        store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
 
        ret;
+ENDPROC(twofish_ecb_dec_8way)
 
-.align 8
-.global twofish_cbc_dec_8way
-.type   twofish_cbc_dec_8way,@function;
-
-twofish_cbc_dec_8way:
+ENTRY(twofish_cbc_dec_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -393,12 +382,9 @@ twofish_cbc_dec_8way:
        popq %r12;
 
        ret;
+ENDPROC(twofish_cbc_dec_8way)
 
-.align 8
-.global twofish_ctr_8way
-.type   twofish_ctr_8way,@function;
-
-twofish_ctr_8way:
+ENTRY(twofish_ctr_8way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -421,3 +407,4 @@ twofish_ctr_8way:
        popq %r12;
 
        ret;
+ENDPROC(twofish_ctr_8way)
index 658af4b..694ea45 100644 (file)
@@ -20,6 +20,7 @@
 .file "twofish-i586-asm.S"
 .text
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
 /* return address at 0 */
        xor     %esi,           d ## D;\
        ror     $1,             d ## D;
 
-.align 4
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
+ENTRY(twofish_enc_blk)
        push    %ebp                    /* save registers according to calling convention*/
        push    %ebx
        push    %esi
@@ -277,8 +274,9 @@ twofish_enc_blk:
        pop     %ebp
        mov     $1,     %eax
        ret
+ENDPROC(twofish_enc_blk)
 
-twofish_dec_blk:
+ENTRY(twofish_dec_blk)
        push    %ebp                    /* save registers according to calling convention*/
        push    %ebx
        push    %esi
@@ -333,3 +331,4 @@ twofish_dec_blk:
        pop     %ebp
        mov     $1,     %eax
        ret
+ENDPROC(twofish_dec_blk)
index 5b012a2..1c3b7ce 100644 (file)
@@ -20,6 +20,8 @@
  *
  */
 
+#include <linux/linkage.h>
+
 .file "twofish-x86_64-asm-3way.S"
 .text
 
        rorq $32,                       RAB2; \
        outunpack3(mov, RIO, 2, RAB, 2);
 
-.align 8
-.global __twofish_enc_blk_3way
-.type   __twofish_enc_blk_3way,@function;
-
-__twofish_enc_blk_3way:
+ENTRY(__twofish_enc_blk_3way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -250,7 +248,7 @@ __twofish_enc_blk_3way:
        popq %rbp; /* bool xor */
 
        testb %bpl, %bpl;
-       jnz __enc_xor3;
+       jnz .L__enc_xor3;
 
        outunpack_enc3(mov);
 
@@ -262,7 +260,7 @@ __twofish_enc_blk_3way:
        popq %r15;
        ret;
 
-__enc_xor3:
+.L__enc_xor3:
        outunpack_enc3(xor);
 
        popq %rbx;
@@ -272,11 +270,9 @@ __enc_xor3:
        popq %r14;
        popq %r15;
        ret;
+ENDPROC(__twofish_enc_blk_3way)
 
-.global twofish_dec_blk_3way
-.type   twofish_dec_blk_3way,@function;
-
-twofish_dec_blk_3way:
+ENTRY(twofish_dec_blk_3way)
        /* input:
         *      %rdi: ctx, CTX
         *      %rsi: dst
@@ -313,4 +309,4 @@ twofish_dec_blk_3way:
        popq %r14;
        popq %r15;
        ret;
-
+ENDPROC(twofish_dec_blk_3way)
index 7bcf3fc..a039d21 100644 (file)
@@ -20,6 +20,7 @@
 .file "twofish-x86_64-asm.S"
 .text
 
+#include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
 #define a_offset       0
        xor     %r8d,           d ## D;\
        ror     $1,             d ## D;
 
-.align 8
-.global twofish_enc_blk
-.global twofish_dec_blk
-
-twofish_enc_blk:
+ENTRY(twofish_enc_blk)
        pushq    R1
 
        /* %rdi contains the ctx address */
@@ -269,8 +266,9 @@ twofish_enc_blk:
        popq    R1
        movq    $1,%rax
        ret
+ENDPROC(twofish_enc_blk)
 
-twofish_dec_blk:
+ENTRY(twofish_dec_blk)
        pushq    R1
 
        /* %rdi contains the ctx address */
@@ -320,3 +318,4 @@ twofish_dec_blk:
        popq    R1
        movq    $1,%rax
        ret
+ENDPROC(twofish_dec_blk)
index 0880a14..05c0ce5 100644 (file)
@@ -353,6 +353,27 @@ config CRYPTO_CRC32C_SPARC64
          CRC32c CRC algorithm implemented using sparc64 crypto instructions,
          when available.
 
+config CRYPTO_CRC32
+       tristate "CRC32 CRC algorithm"
+       select CRYPTO_HASH
+       select CRC32
+       help
+         CRC-32-IEEE 802.3 cyclic redundancy-check algorithm.
+         Shash crypto api wrappers to crc32_le function.
+
+config CRYPTO_CRC32_PCLMUL
+       tristate "CRC32 PCLMULQDQ hardware acceleration"
+       depends on X86
+       select CRYPTO_HASH
+       select CRC32
+       help
+         From Intel Westmere and AMD Bulldozer processor with SSE4.2
+         and PCLMULQDQ supported, the processor will support
+         CRC32 PCLMULQDQ implementation using hardware accelerated PCLMULQDQ
+         instruction. This option will create 'crc32-plcmul' module,
+         which will enable any routine to use the CRC-32-IEEE 802.3 checksum
+         and gain better performance as compared with the table implementation.
+
 config CRYPTO_GHASH
        tristate "GHASH digest algorithm"
        select CRYPTO_GF128MUL
index d59dec7..be1a1be 100644 (file)
@@ -81,6 +81,7 @@ obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
 obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
+obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
index 533de95..7d4a8d2 100644 (file)
@@ -388,9 +388,9 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_blkcipher rblkcipher;
 
-       snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "ablkcipher");
-       snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s",
-                alg->cra_ablkcipher.geniv ?: "<default>");
+       strncpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type));
+       strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "<default>",
+               sizeof(rblkcipher.geniv));
 
        rblkcipher.blocksize = alg->cra_blocksize;
        rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize;
@@ -469,9 +469,9 @@ static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_blkcipher rblkcipher;
 
-       snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "givcipher");
-       snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s",
-                alg->cra_ablkcipher.geniv ?: "<built-in>");
+       strncpy(rblkcipher.type, "givcipher", sizeof(rblkcipher.type));
+       strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "<built-in>",
+               sizeof(rblkcipher.geniv));
 
        rblkcipher.blocksize = alg->cra_blocksize;
        rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize;
index 0b8121e..547491e 100644 (file)
@@ -117,9 +117,8 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
        struct crypto_report_aead raead;
        struct aead_alg *aead = &alg->cra_aead;
 
-       snprintf(raead.type, CRYPTO_MAX_ALG_NAME, "%s", "aead");
-       snprintf(raead.geniv, CRYPTO_MAX_ALG_NAME, "%s",
-                aead->geniv ?: "<built-in>");
+       strncpy(raead.type, "aead", sizeof(raead.type));
+       strncpy(raead.geniv, aead->geniv ?: "<built-in>", sizeof(raead.geniv));
 
        raead.blocksize = alg->cra_blocksize;
        raead.maxauthsize = aead->maxauthsize;
@@ -203,8 +202,8 @@ static int crypto_nivaead_report(struct sk_buff *skb, struct crypto_alg *alg)
        struct crypto_report_aead raead;
        struct aead_alg *aead = &alg->cra_aead;
 
-       snprintf(raead.type, CRYPTO_MAX_ALG_NAME, "%s", "nivaead");
-       snprintf(raead.geniv, CRYPTO_MAX_ALG_NAME, "%s", aead->geniv);
+       strncpy(raead.type, "nivaead", sizeof(raead.type));
+       strncpy(raead.geniv, aead->geniv, sizeof(raead.geniv));
 
        raead.blocksize = alg->cra_blocksize;
        raead.maxauthsize = aead->maxauthsize;
@@ -282,18 +281,16 @@ struct crypto_instance *aead_geniv_alloc(struct crypto_template *tmpl,
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ (CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_GENIV)) &
            algt->mask)
                return ERR_PTR(-EINVAL);
 
        name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(name);
        if (IS_ERR(name))
-               return ERR_PTR(err);
+               return ERR_CAST(name);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
index 3887856..793a27f 100644 (file)
@@ -404,7 +404,7 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_hash rhash;
 
-       snprintf(rhash.type, CRYPTO_MAX_ALG_NAME, "%s", "ahash");
+       strncpy(rhash.type, "ahash", sizeof(rhash.type));
 
        rhash.blocksize = alg->cra_blocksize;
        rhash.digestsize = __crypto_hash_alg_common(alg)->digestsize;
index c3b9bfe..08c57c8 100644 (file)
@@ -749,12 +749,10 @@ struct crypto_alg *crypto_attr_alg2(struct rtattr *rta,
                                    u32 type, u32 mask)
 {
        const char *name;
-       int err;
 
        name = crypto_attr_alg_name(rta);
-       err = PTR_ERR(name);
        if (IS_ERR(name))
-               return ERR_PTR(err);
+               return ERR_CAST(name);
 
        return crypto_find_alg(name, frontend, type, mask);
 }
index d0583a4..ffce19d 100644 (file)
@@ -592,9 +592,8 @@ static struct crypto_instance *crypto_authenc_alloc(struct rtattr **tb)
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
                return ERR_PTR(-EINVAL);
index 136b68b..ab53762 100644 (file)
@@ -715,9 +715,8 @@ static struct crypto_instance *crypto_authenc_esn_alloc(struct rtattr **tb)
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
                return ERR_PTR(-EINVAL);
index a8d85a1..a79e7e9 100644 (file)
@@ -499,9 +499,9 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_blkcipher rblkcipher;
 
-       snprintf(rblkcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "blkcipher");
-       snprintf(rblkcipher.geniv, CRYPTO_MAX_ALG_NAME, "%s",
-                alg->cra_blkcipher.geniv ?: "<default>");
+       strncpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type));
+       strncpy(rblkcipher.geniv, alg->cra_blkcipher.geniv ?: "<default>",
+               sizeof(rblkcipher.geniv));
 
        rblkcipher.blocksize = alg->cra_blocksize;
        rblkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
@@ -588,18 +588,16 @@ struct crypto_instance *skcipher_geniv_alloc(struct crypto_template *tmpl,
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ (CRYPTO_ALG_TYPE_GIVCIPHER | CRYPTO_ALG_GENIV)) &
            algt->mask)
                return ERR_PTR(-EINVAL);
 
        name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(name);
        if (IS_ERR(name))
-               return ERR_PTR(err);
+               return ERR_CAST(name);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
index 32fe1bb..499c917 100644 (file)
@@ -484,18 +484,16 @@ static struct crypto_instance *crypto_ccm_alloc_common(struct rtattr **tb,
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
                return ERR_PTR(-EINVAL);
 
        cipher = crypto_alg_mod_lookup(cipher_name,  CRYPTO_ALG_TYPE_CIPHER,
                                       CRYPTO_ALG_TYPE_MASK);
-       err = PTR_ERR(cipher);
        if (IS_ERR(cipher))
-               return ERR_PTR(err);
+               return ERR_CAST(cipher);
 
        err = -EINVAL;
        if (cipher->cra_blocksize != 16)
@@ -573,15 +571,13 @@ out_put_cipher:
 
 static struct crypto_instance *crypto_ccm_alloc(struct rtattr **tb)
 {
-       int err;
        const char *cipher_name;
        char ctr_name[CRYPTO_MAX_ALG_NAME];
        char full_name[CRYPTO_MAX_ALG_NAME];
 
        cipher_name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(cipher_name);
        if (IS_ERR(cipher_name))
-               return ERR_PTR(err);
+               return ERR_CAST(cipher_name);
 
        if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)",
                     cipher_name) >= CRYPTO_MAX_ALG_NAME)
@@ -612,20 +608,17 @@ static struct crypto_template crypto_ccm_tmpl = {
 
 static struct crypto_instance *crypto_ccm_base_alloc(struct rtattr **tb)
 {
-       int err;
        const char *ctr_name;
        const char *cipher_name;
        char full_name[CRYPTO_MAX_ALG_NAME];
 
        ctr_name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(ctr_name);
        if (IS_ERR(ctr_name))
-               return ERR_PTR(err);
+               return ERR_CAST(ctr_name);
 
        cipher_name = crypto_attr_alg_name(tb[2]);
-       err = PTR_ERR(cipher_name);
        if (IS_ERR(cipher_name))
-               return ERR_PTR(err);
+               return ERR_CAST(cipher_name);
 
        if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)",
                     ctr_name, cipher_name) >= CRYPTO_MAX_ALG_NAME)
@@ -760,17 +753,15 @@ static struct crypto_instance *crypto_rfc4309_alloc(struct rtattr **tb)
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
                return ERR_PTR(-EINVAL);
 
        ccm_name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(ccm_name);
        if (IS_ERR(ccm_name))
-               return ERR_PTR(err);
+               return ERR_CAST(ccm_name);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
index ba200b0..834d8dd 100644 (file)
@@ -291,9 +291,8 @@ static struct crypto_instance *chainiv_alloc(struct rtattr **tb)
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        err = crypto_get_default_rng();
        if (err)
diff --git a/crypto/crc32.c b/crypto/crc32.c
new file mode 100644 (file)
index 0000000..9d1c415
--- /dev/null
@@ -0,0 +1,158 @@
+/* GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see http://www.gnu.org/licenses
+ *
+ * Please  visit http://www.xyratex.com/contact if you need additional
+ * information or have any questions.
+ *
+ * GPL HEADER END
+ */
+
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ */
+
+/*
+ * This is crypto api shash wrappers to crc32_le.
+ */
+
+#include <linux/crc32.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+#define CHKSUM_BLOCK_SIZE      1
+#define CHKSUM_DIGEST_SIZE     4
+
+static u32 __crc32_le(u32 crc, unsigned char const *p, size_t len)
+{
+       return crc32_le(crc, p, len);
+}
+
+/** No default init with ~0 */
+static int crc32_cra_init(struct crypto_tfm *tfm)
+{
+       u32 *key = crypto_tfm_ctx(tfm);
+
+       *key = 0;
+
+       return 0;
+}
+
+
+/*
+ * Setting the seed allows arbitrary accumulators and flexible XOR policy
+ * If your algorithm starts with ~0, then XOR with ~0 before you set
+ * the seed.
+ */
+static int crc32_setkey(struct crypto_shash *hash, const u8 *key,
+                       unsigned int keylen)
+{
+       u32 *mctx = crypto_shash_ctx(hash);
+
+       if (keylen != sizeof(u32)) {
+               crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+               return -EINVAL;
+       }
+       *mctx = le32_to_cpup((__le32 *)key);
+       return 0;
+}
+
+static int crc32_init(struct shash_desc *desc)
+{
+       u32 *mctx = crypto_shash_ctx(desc->tfm);
+       u32 *crcp = shash_desc_ctx(desc);
+
+       *crcp = *mctx;
+
+       return 0;
+}
+
+static int crc32_update(struct shash_desc *desc, const u8 *data,
+                       unsigned int len)
+{
+       u32 *crcp = shash_desc_ctx(desc);
+
+       *crcp = __crc32_le(*crcp, data, len);
+       return 0;
+}
+
+/* No final XOR 0xFFFFFFFF, like crc32_le */
+static int __crc32_finup(u32 *crcp, const u8 *data, unsigned int len,
+                        u8 *out)
+{
+       *(__le32 *)out = cpu_to_le32(__crc32_le(*crcp, data, len));
+       return 0;
+}
+
+static int crc32_finup(struct shash_desc *desc, const u8 *data,
+                      unsigned int len, u8 *out)
+{
+       return __crc32_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32_final(struct shash_desc *desc, u8 *out)
+{
+       u32 *crcp = shash_desc_ctx(desc);
+
+       *(__le32 *)out = cpu_to_le32p(crcp);
+       return 0;
+}
+
+static int crc32_digest(struct shash_desc *desc, const u8 *data,
+                       unsigned int len, u8 *out)
+{
+       return __crc32_finup(crypto_shash_ctx(desc->tfm), data, len,
+                            out);
+}
+static struct shash_alg alg = {
+       .setkey         = crc32_setkey,
+       .init           = crc32_init,
+       .update         = crc32_update,
+       .final          = crc32_final,
+       .finup          = crc32_finup,
+       .digest         = crc32_digest,
+       .descsize       = sizeof(u32),
+       .digestsize     = CHKSUM_DIGEST_SIZE,
+       .base           = {
+               .cra_name               = "crc32",
+               .cra_driver_name        = "crc32-table",
+               .cra_priority           = 100,
+               .cra_blocksize          = CHKSUM_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(u32),
+               .cra_module             = THIS_MODULE,
+               .cra_init               = crc32_cra_init,
+       }
+};
+
+static int __init crc32_mod_init(void)
+{
+       return crypto_register_shash(&alg);
+}
+
+static void __exit crc32_mod_fini(void)
+{
+       crypto_unregister_shash(&alg);
+}
+
+module_init(crc32_mod_init);
+module_exit(crc32_mod_fini);
+
+MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
+MODULE_DESCRIPTION("CRC32 calculations wrapper for lib/crc32");
+MODULE_LICENSE("GPL");
index 35d700a..dfd511f 100644 (file)
@@ -30,6 +30,8 @@
 
 #include "internal.h"
 
+#define null_terminated(x)     (strnlen(x, sizeof(x)) < sizeof(x))
+
 static DEFINE_MUTEX(crypto_cfg_mutex);
 
 /* The crypto netlink socket */
@@ -75,7 +77,7 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_cipher rcipher;
 
-       snprintf(rcipher.type, CRYPTO_MAX_ALG_NAME, "%s", "cipher");
+       strncpy(rcipher.type, "cipher", sizeof(rcipher.type));
 
        rcipher.blocksize = alg->cra_blocksize;
        rcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
@@ -94,8 +96,7 @@ static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_comp rcomp;
 
-       snprintf(rcomp.type, CRYPTO_MAX_ALG_NAME, "%s", "compression");
-
+       strncpy(rcomp.type, "compression", sizeof(rcomp.type));
        if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
                    sizeof(struct crypto_report_comp), &rcomp))
                goto nla_put_failure;
@@ -108,12 +109,14 @@ nla_put_failure:
 static int crypto_report_one(struct crypto_alg *alg,
                             struct crypto_user_alg *ualg, struct sk_buff *skb)
 {
-       memcpy(&ualg->cru_name, &alg->cra_name, sizeof(ualg->cru_name));
-       memcpy(&ualg->cru_driver_name, &alg->cra_driver_name,
-              sizeof(ualg->cru_driver_name));
-       memcpy(&ualg->cru_module_name, module_name(alg->cra_module),
-              CRYPTO_MAX_ALG_NAME);
-
+       strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+       strncpy(ualg->cru_driver_name, alg->cra_driver_name,
+               sizeof(ualg->cru_driver_name));
+       strncpy(ualg->cru_module_name, module_name(alg->cra_module),
+               sizeof(ualg->cru_module_name));
+
+       ualg->cru_type = 0;
+       ualg->cru_mask = 0;
        ualg->cru_flags = alg->cra_flags;
        ualg->cru_refcnt = atomic_read(&alg->cra_refcnt);
 
@@ -122,8 +125,7 @@ static int crypto_report_one(struct crypto_alg *alg,
        if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
                struct crypto_report_larval rl;
 
-               snprintf(rl.type, CRYPTO_MAX_ALG_NAME, "%s", "larval");
-
+               strncpy(rl.type, "larval", sizeof(rl.type));
                if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL,
                            sizeof(struct crypto_report_larval), &rl))
                        goto nla_put_failure;
@@ -196,7 +198,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
        struct crypto_dump_info info;
        int err;
 
-       if (!p->cru_driver_name)
+       if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+               return -EINVAL;
+
+       if (!p->cru_driver_name[0])
                return -EINVAL;
 
        alg = crypto_alg_match(p, 1);
@@ -260,6 +265,9 @@ static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL];
        LIST_HEAD(list);
 
+       if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+               return -EINVAL;
+
        if (priority && !strlen(p->cru_driver_name))
                return -EINVAL;
 
@@ -287,6 +295,9 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct crypto_alg *alg;
        struct crypto_user_alg *p = nlmsg_data(nlh);
 
+       if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+               return -EINVAL;
+
        alg = crypto_alg_match(p, 1);
        if (!alg)
                return -ENOENT;
@@ -368,6 +379,9 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
        struct crypto_user_alg *p = nlmsg_data(nlh);
        struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL];
 
+       if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+               return -EINVAL;
+
        if (strlen(p->cru_driver_name))
                exact = 1;
 
index 1f2997c..f2b94f2 100644 (file)
@@ -343,17 +343,15 @@ static struct crypto_instance *crypto_rfc3686_alloc(struct rtattr **tb)
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ CRYPTO_ALG_TYPE_BLKCIPHER) & algt->mask)
                return ERR_PTR(-EINVAL);
 
        cipher_name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(cipher_name);
        if (IS_ERR(cipher_name))
-               return ERR_PTR(err);
+               return ERR_CAST(cipher_name);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
index ccf9c5d..042223f 100644 (file)
@@ -282,9 +282,8 @@ static struct crypto_instance *crypto_cts_alloc(struct rtattr **tb)
 
        alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_BLKCIPHER,
                                  CRYPTO_ALG_TYPE_MASK);
-       err = PTR_ERR(alg);
        if (IS_ERR(alg))
-               return ERR_PTR(err);
+               return ERR_CAST(alg);
 
        inst = ERR_PTR(-EINVAL);
        if (!is_power_of_2(alg->cra_blocksize))
index 1a25263..137ad1e 100644 (file)
@@ -701,9 +701,8 @@ static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb,
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
                return ERR_PTR(-EINVAL);
@@ -711,9 +710,8 @@ static struct crypto_instance *crypto_gcm_alloc_common(struct rtattr **tb,
        ghash_alg = crypto_find_alg(ghash_name, &crypto_ahash_type,
                                    CRYPTO_ALG_TYPE_HASH,
                                    CRYPTO_ALG_TYPE_AHASH_MASK);
-       err = PTR_ERR(ghash_alg);
        if (IS_ERR(ghash_alg))
-               return ERR_PTR(err);
+               return ERR_CAST(ghash_alg);
 
        err = -ENOMEM;
        inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
@@ -787,15 +785,13 @@ out_put_ghash:
 
 static struct crypto_instance *crypto_gcm_alloc(struct rtattr **tb)
 {
-       int err;
        const char *cipher_name;
        char ctr_name[CRYPTO_MAX_ALG_NAME];
        char full_name[CRYPTO_MAX_ALG_NAME];
 
        cipher_name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(cipher_name);
        if (IS_ERR(cipher_name))
-               return ERR_PTR(err);
+               return ERR_CAST(cipher_name);
 
        if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)", cipher_name) >=
            CRYPTO_MAX_ALG_NAME)
@@ -826,20 +822,17 @@ static struct crypto_template crypto_gcm_tmpl = {
 
 static struct crypto_instance *crypto_gcm_base_alloc(struct rtattr **tb)
 {
-       int err;
        const char *ctr_name;
        const char *ghash_name;
        char full_name[CRYPTO_MAX_ALG_NAME];
 
        ctr_name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(ctr_name);
        if (IS_ERR(ctr_name))
-               return ERR_PTR(err);
+               return ERR_CAST(ctr_name);
 
        ghash_name = crypto_attr_alg_name(tb[2]);
-       err = PTR_ERR(ghash_name);
        if (IS_ERR(ghash_name))
-               return ERR_PTR(err);
+               return ERR_CAST(ghash_name);
 
        if (snprintf(full_name, CRYPTO_MAX_ALG_NAME, "gcm_base(%s,%s)",
                     ctr_name, ghash_name) >= CRYPTO_MAX_ALG_NAME)
@@ -971,17 +964,15 @@ static struct crypto_instance *crypto_rfc4106_alloc(struct rtattr **tb)
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
                return ERR_PTR(-EINVAL);
 
        ccm_name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(ccm_name);
        if (IS_ERR(ccm_name))
-               return ERR_PTR(err);
+               return ERR_CAST(ccm_name);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
@@ -1222,17 +1213,15 @@ static struct crypto_instance *crypto_rfc4543_alloc(struct rtattr **tb)
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        if ((algt->type ^ CRYPTO_ALG_TYPE_AEAD) & algt->mask)
                return ERR_PTR(-EINVAL);
 
        ccm_name = crypto_attr_alg_name(tb[1]);
-       err = PTR_ERR(ccm_name);
        if (IS_ERR(ccm_name))
-               return ERR_PTR(err);
+               return ERR_CAST(ccm_name);
 
        inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
        if (!inst)
index 04e083f..7140fe7 100644 (file)
@@ -53,8 +53,7 @@ static int crypto_pcomp_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_comp rpcomp;
 
-       snprintf(rpcomp.type, CRYPTO_MAX_ALG_NAME, "%s", "pcomp");
-
+       strncpy(rpcomp.type, "pcomp", sizeof(rpcomp.type));
        if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
                    sizeof(struct crypto_report_comp), &rpcomp))
                goto nla_put_failure;
index f3b7894..e0a25c2 100644 (file)
@@ -65,7 +65,7 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
 {
        struct crypto_report_rng rrng;
 
-       snprintf(rrng.type, CRYPTO_MAX_ALG_NAME, "%s", "rng");
+       strncpy(rrng.type, "rng", sizeof(rrng.type));
 
        rrng.seedsize = alg->cra_rng.seedsize;
 
index 4c44912..f2cba4e 100644 (file)
@@ -305,9 +305,8 @@ static struct crypto_instance *seqiv_alloc(struct rtattr **tb)
        int err;
 
        algt = crypto_get_attr_type(tb);
-       err = PTR_ERR(algt);
        if (IS_ERR(algt))
-               return ERR_PTR(err);
+               return ERR_CAST(algt);
 
        err = crypto_get_default_rng();
        if (err)
index f426330..929058a 100644 (file)
@@ -530,7 +530,8 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
        struct crypto_report_hash rhash;
        struct shash_alg *salg = __crypto_shash_alg(alg);
 
-       snprintf(rhash.type, CRYPTO_MAX_ALG_NAME, "%s", "shash");
+       strncpy(rhash.type, "shash", sizeof(rhash.type));
+
        rhash.blocksize = alg->cra_blocksize;
        rhash.digestsize = salg->digestsize;
 
index edf4a08..efd8b20 100644 (file)
@@ -2268,6 +2268,21 @@ static const struct alg_test_desc alg_test_descs[] = {
                                }
                        }
                }
+       }, {
+               .alg = "ecb(fcrypt)",
+               .test = alg_test_skcipher,
+               .suite = {
+                       .cipher = {
+                               .enc = {
+                                       .vecs = fcrypt_pcbc_enc_tv_template,
+                                       .count = 1
+                               },
+                               .dec = {
+                                       .vecs = fcrypt_pcbc_dec_tv_template,
+                                       .count = 1
+                               }
+                       }
+               }
        }, {
                .alg = "ecb(khazad)",
                .test = alg_test_skcipher,
index c9d9d5c..6f22ba5 100644 (file)
@@ -332,7 +332,7 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd)
                return -EINVAL;
 
        dd->nb_out_sg = atmel_aes_sg_length(dd->req, dd->out_sg);
-       if (!dd->nb_in_sg)
+       if (!dd->nb_out_sg)
                return -EINVAL;
 
        dd->bufcnt = sg_copy_to_buffer(dd->in_sg, dd->nb_in_sg,
index a22f1a9..827913d 100644 (file)
@@ -694,7 +694,7 @@ out_error_dma:
                dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma);
        free_dma(crc->dma_ch);
 out_error_irq:
-       free_irq(crc->irq, crc->dev);
+       free_irq(crc->irq, crc);
 out_error_unmap:
        iounmap((void *)crc->regs);
 out_error_free_mem:
@@ -720,10 +720,10 @@ static int bfin_crypto_crc_remove(struct platform_device *pdev)
 
        crypto_unregister_ahash(&algs);
        tasklet_kill(&crc->done_task);
-       iounmap((void *)crc->regs);
        free_dma(crc->dma_ch);
        if (crc->irq > 0)
-               free_irq(crc->irq, crc->dev);
+               free_irq(crc->irq, crc);
+       iounmap((void *)crc->regs);
        kfree(crc);
 
        return 0;
index e66e8ee..6aa425f 100644 (file)
@@ -5,6 +5,7 @@
  *
  * Copyright (c) 2010 Nokia Corporation
  * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ * Copyright (c) 2011 Texas Instruments Incorporated
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as published
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
-#include <linux/clk.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/omap-dma.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
 #include <linux/io.h>
 #include <linux/crypto.h>
 #include <linux/interrupt.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/aes.h>
 
-#include <linux/omap-dma.h>
+#define DST_MAXBURST                   4
+#define DMA_MIN                                (DST_MAXBURST * sizeof(u32))
 
 /* OMAP TRM gives bitfields as start:end, where start is the higher bit
    number. For example 7:0 */
 #define FLD_MASK(start, end)   (((1 << ((start) - (end) + 1)) - 1) << (end))
 #define FLD_VAL(val, start, end) (((val) << (end)) & FLD_MASK(start, end))
 
-#define AES_REG_KEY(x)                 (0x1C - ((x ^ 0x01) * 0x04))
-#define AES_REG_IV(x)                  (0x20 + ((x) * 0x04))
+#define AES_REG_KEY(dd, x)             ((dd)->pdata->key_ofs - \
+                                               ((x ^ 0x01) * 0x04))
+#define AES_REG_IV(dd, x)              ((dd)->pdata->iv_ofs + ((x) * 0x04))
 
-#define AES_REG_CTRL                   0x30
-#define AES_REG_CTRL_CTR_WIDTH         (1 << 7)
+#define AES_REG_CTRL(dd)               ((dd)->pdata->ctrl_ofs)
+#define AES_REG_CTRL_CTR_WIDTH_MASK    (3 << 7)
+#define AES_REG_CTRL_CTR_WIDTH_32              (0 << 7)
+#define AES_REG_CTRL_CTR_WIDTH_64              (1 << 7)
+#define AES_REG_CTRL_CTR_WIDTH_96              (2 << 7)
+#define AES_REG_CTRL_CTR_WIDTH_128             (3 << 7)
 #define AES_REG_CTRL_CTR               (1 << 6)
 #define AES_REG_CTRL_CBC               (1 << 5)
 #define AES_REG_CTRL_KEY_SIZE          (3 << 3)
 #define AES_REG_CTRL_INPUT_READY       (1 << 1)
 #define AES_REG_CTRL_OUTPUT_READY      (1 << 0)
 
-#define AES_REG_DATA                   0x34
-#define AES_REG_DATA_N(x)              (0x34 + ((x) * 0x04))
+#define AES_REG_DATA_N(dd, x)          ((dd)->pdata->data_ofs + ((x) * 0x04))
 
-#define AES_REG_REV                    0x44
-#define AES_REG_REV_MAJOR              0xF0
-#define AES_REG_REV_MINOR              0x0F
+#define AES_REG_REV(dd)                        ((dd)->pdata->rev_ofs)
 
-#define AES_REG_MASK                   0x48
+#define AES_REG_MASK(dd)               ((dd)->pdata->mask_ofs)
 #define AES_REG_MASK_SIDLE             (1 << 6)
 #define AES_REG_MASK_START             (1 << 5)
 #define AES_REG_MASK_DMA_OUT_EN                (1 << 3)
@@ -63,8 +72,7 @@
 #define AES_REG_MASK_SOFTRESET         (1 << 1)
 #define AES_REG_AUTOIDLE               (1 << 0)
 
-#define AES_REG_SYSSTATUS              0x4C
-#define AES_REG_SYSSTATUS_RESETDONE    (1 << 0)
+#define AES_REG_LENGTH_N(x)            (0x54 + ((x) * 0x04))
 
 #define DEFAULT_TIMEOUT                (5*HZ)
 
@@ -72,6 +80,7 @@
 #define FLAGS_ENCRYPT          BIT(0)
 #define FLAGS_CBC              BIT(1)
 #define FLAGS_GIV              BIT(2)
+#define FLAGS_CTR              BIT(3)
 
 #define FLAGS_INIT             BIT(4)
 #define FLAGS_FAST             BIT(5)
@@ -92,11 +101,39 @@ struct omap_aes_reqctx {
 #define OMAP_AES_QUEUE_LENGTH  1
 #define OMAP_AES_CACHE_SIZE    0
 
+struct omap_aes_algs_info {
+       struct crypto_alg       *algs_list;
+       unsigned int            size;
+       unsigned int            registered;
+};
+
+struct omap_aes_pdata {
+       struct omap_aes_algs_info       *algs_info;
+       unsigned int    algs_info_size;
+
+       void            (*trigger)(struct omap_aes_dev *dd, int length);
+
+       u32             key_ofs;
+       u32             iv_ofs;
+       u32             ctrl_ofs;
+       u32             data_ofs;
+       u32             rev_ofs;
+       u32             mask_ofs;
+
+       u32             dma_enable_in;
+       u32             dma_enable_out;
+       u32             dma_start;
+
+       u32             major_mask;
+       u32             major_shift;
+       u32             minor_mask;
+       u32             minor_shift;
+};
+
 struct omap_aes_dev {
        struct list_head        list;
        unsigned long           phys_base;
        void __iomem            *io_base;
-       struct clk              *iclk;
        struct omap_aes_ctx     *ctx;
        struct device           *dev;
        unsigned long           flags;
@@ -111,20 +148,24 @@ struct omap_aes_dev {
        struct ablkcipher_request       *req;
        size_t                          total;
        struct scatterlist              *in_sg;
+       struct scatterlist              in_sgl;
        size_t                          in_offset;
        struct scatterlist              *out_sg;
+       struct scatterlist              out_sgl;
        size_t                          out_offset;
 
        size_t                  buflen;
        void                    *buf_in;
        size_t                  dma_size;
        int                     dma_in;
-       int                     dma_lch_in;
+       struct dma_chan         *dma_lch_in;
        dma_addr_t              dma_addr_in;
        void                    *buf_out;
        int                     dma_out;
-       int                     dma_lch_out;
+       struct dma_chan         *dma_lch_out;
        dma_addr_t              dma_addr_out;
+
+       const struct omap_aes_pdata     *pdata;
 };
 
 /* keep registered devices data here */
@@ -160,19 +201,6 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset,
                omap_aes_write(dd, offset, *value);
 }
 
-static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit)
-{
-       unsigned long timeout = jiffies + DEFAULT_TIMEOUT;
-
-       while (!(omap_aes_read(dd, offset) & bit)) {
-               if (time_is_before_jiffies(timeout)) {
-                       dev_err(dd->dev, "omap-aes timeout\n");
-                       return -ETIMEDOUT;
-               }
-       }
-       return 0;
-}
-
 static int omap_aes_hw_init(struct omap_aes_dev *dd)
 {
        /*
@@ -180,23 +208,9 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd)
         * It may be long delays between requests.
         * Device might go to off mode to save power.
         */
-       clk_enable(dd->iclk);
+       pm_runtime_get_sync(dd->dev);
 
        if (!(dd->flags & FLAGS_INIT)) {
-               /* is it necessary to reset before every operation? */
-               omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_SOFTRESET,
-                                       AES_REG_MASK_SOFTRESET);
-               /*
-                * prevent OCP bus error (SRESP) in case an access to the module
-                * is performed while the module is coming out of soft reset
-                */
-               __asm__ __volatile__("nop");
-               __asm__ __volatile__("nop");
-
-               if (omap_aes_wait(dd, AES_REG_SYSSTATUS,
-                               AES_REG_SYSSTATUS_RESETDONE))
-                       return -ETIMEDOUT;
-
                dd->flags |= FLAGS_INIT;
                dd->err = 0;
        }
@@ -208,59 +222,75 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd)
 {
        unsigned int key32;
        int i, err;
-       u32 val, mask;
+       u32 val, mask = 0;
 
        err = omap_aes_hw_init(dd);
        if (err)
                return err;
 
-       val = 0;
-       if (dd->dma_lch_out >= 0)
-               val |= AES_REG_MASK_DMA_OUT_EN;
-       if (dd->dma_lch_in >= 0)
-               val |= AES_REG_MASK_DMA_IN_EN;
-
-       mask = AES_REG_MASK_DMA_IN_EN | AES_REG_MASK_DMA_OUT_EN;
-
-       omap_aes_write_mask(dd, AES_REG_MASK, val, mask);
-
        key32 = dd->ctx->keylen / sizeof(u32);
 
        /* it seems a key should always be set even if it has not changed */
        for (i = 0; i < key32; i++) {
-               omap_aes_write(dd, AES_REG_KEY(i),
+               omap_aes_write(dd, AES_REG_KEY(dd, i),
                        __le32_to_cpu(dd->ctx->key[i]));
        }
 
-       if ((dd->flags & FLAGS_CBC) && dd->req->info)
-               omap_aes_write_n(dd, AES_REG_IV(0), dd->req->info, 4);
+       if ((dd->flags & (FLAGS_CBC | FLAGS_CTR)) && dd->req->info)
+               omap_aes_write_n(dd, AES_REG_IV(dd, 0), dd->req->info, 4);
 
        val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3);
        if (dd->flags & FLAGS_CBC)
                val |= AES_REG_CTRL_CBC;
+       if (dd->flags & FLAGS_CTR) {
+               val |= AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_32;
+               mask = AES_REG_CTRL_CTR | AES_REG_CTRL_CTR_WIDTH_MASK;
+       }
        if (dd->flags & FLAGS_ENCRYPT)
                val |= AES_REG_CTRL_DIRECTION;
 
-       mask = AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION |
+       mask |= AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION |
                        AES_REG_CTRL_KEY_SIZE;
 
-       omap_aes_write_mask(dd, AES_REG_CTRL, val, mask);
+       omap_aes_write_mask(dd, AES_REG_CTRL(dd), val, mask);
 
-       /* IN */
-       omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT,
-                                dd->phys_base + AES_REG_DATA, 0, 4);
+       return 0;
+}
 
-       omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
-       omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4);
+static void omap_aes_dma_trigger_omap2(struct omap_aes_dev *dd, int length)
+{
+       u32 mask, val;
 
-       /* OUT */
-       omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT,
-                               dd->phys_base + AES_REG_DATA, 0, 4);
+       val = dd->pdata->dma_start;
 
-       omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
-       omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4);
+       if (dd->dma_lch_out != NULL)
+               val |= dd->pdata->dma_enable_out;
+       if (dd->dma_lch_in != NULL)
+               val |= dd->pdata->dma_enable_in;
+
+       mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in |
+              dd->pdata->dma_start;
+
+       omap_aes_write_mask(dd, AES_REG_MASK(dd), val, mask);
 
-       return 0;
+}
+
+static void omap_aes_dma_trigger_omap4(struct omap_aes_dev *dd, int length)
+{
+       omap_aes_write(dd, AES_REG_LENGTH_N(0), length);
+       omap_aes_write(dd, AES_REG_LENGTH_N(1), 0);
+
+       omap_aes_dma_trigger_omap2(dd, length);
+}
+
+static void omap_aes_dma_stop(struct omap_aes_dev *dd)
+{
+       u32 mask;
+
+       mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in |
+              dd->pdata->dma_start;
+
+       omap_aes_write_mask(dd, AES_REG_MASK(dd), 0, mask);
 }
 
 static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
@@ -284,18 +314,10 @@ static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx)
        return dd;
 }
 
-static void omap_aes_dma_callback(int lch, u16 ch_status, void *data)
+static void omap_aes_dma_out_callback(void *data)
 {
        struct omap_aes_dev *dd = data;
 
-       if (ch_status != OMAP_DMA_BLOCK_IRQ) {
-               pr_err("omap-aes DMA error status: 0x%hx\n", ch_status);
-               dd->err = -EIO;
-               dd->flags &= ~FLAGS_INIT; /* request to re-initialize */
-       } else if (lch == dd->dma_lch_in) {
-               return;
-       }
-
        /* dma_lch_out - completed */
        tasklet_schedule(&dd->done_task);
 }
@@ -303,9 +325,10 @@ static void omap_aes_dma_callback(int lch, u16 ch_status, void *data)
 static int omap_aes_dma_init(struct omap_aes_dev *dd)
 {
        int err = -ENOMEM;
+       dma_cap_mask_t mask;
 
-       dd->dma_lch_out = -1;
-       dd->dma_lch_in = -1;
+       dd->dma_lch_out = NULL;
+       dd->dma_lch_in = NULL;
 
        dd->buf_in = (void *)__get_free_pages(GFP_KERNEL, OMAP_AES_CACHE_SIZE);
        dd->buf_out = (void *)__get_free_pages(GFP_KERNEL, OMAP_AES_CACHE_SIZE);
@@ -334,23 +357,31 @@ static int omap_aes_dma_init(struct omap_aes_dev *dd)
                goto err_map_out;
        }
 
-       err = omap_request_dma(dd->dma_in, "omap-aes-rx",
-                              omap_aes_dma_callback, dd, &dd->dma_lch_in);
-       if (err) {
-               dev_err(dd->dev, "Unable to request DMA channel\n");
+       dma_cap_zero(mask);
+       dma_cap_set(DMA_SLAVE, mask);
+
+       dd->dma_lch_in = dma_request_slave_channel_compat(mask,
+                                                         omap_dma_filter_fn,
+                                                         &dd->dma_in,
+                                                         dd->dev, "rx");
+       if (!dd->dma_lch_in) {
+               dev_err(dd->dev, "Unable to request in DMA channel\n");
                goto err_dma_in;
        }
-       err = omap_request_dma(dd->dma_out, "omap-aes-tx",
-                              omap_aes_dma_callback, dd, &dd->dma_lch_out);
-       if (err) {
-               dev_err(dd->dev, "Unable to request DMA channel\n");
+
+       dd->dma_lch_out = dma_request_slave_channel_compat(mask,
+                                                          omap_dma_filter_fn,
+                                                          &dd->dma_out,
+                                                          dd->dev, "tx");
+       if (!dd->dma_lch_out) {
+               dev_err(dd->dev, "Unable to request out DMA channel\n");
                goto err_dma_out;
        }
 
        return 0;
 
 err_dma_out:
-       omap_free_dma(dd->dma_lch_in);
+       dma_release_channel(dd->dma_lch_in);
 err_dma_in:
        dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
                         DMA_FROM_DEVICE);
@@ -367,8 +398,8 @@ err_alloc:
 
 static void omap_aes_dma_cleanup(struct omap_aes_dev *dd)
 {
-       omap_free_dma(dd->dma_lch_out);
-       omap_free_dma(dd->dma_lch_in);
+       dma_release_channel(dd->dma_lch_out);
+       dma_release_channel(dd->dma_lch_in);
        dma_unmap_single(dd->dev, dd->dma_addr_out, dd->buflen,
                         DMA_FROM_DEVICE);
        dma_unmap_single(dd->dev, dd->dma_addr_in, dd->buflen, DMA_TO_DEVICE);
@@ -426,12 +457,15 @@ static int sg_copy(struct scatterlist **sg, size_t *offset, void *buf,
        return off;
 }
 
-static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
-                              dma_addr_t dma_addr_out, int length)
+static int omap_aes_crypt_dma(struct crypto_tfm *tfm,
+               struct scatterlist *in_sg, struct scatterlist *out_sg)
 {
        struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm);
        struct omap_aes_dev *dd = ctx->dd;
-       int len32;
+       struct dma_async_tx_descriptor *tx_in, *tx_out;
+       struct dma_slave_config cfg;
+       dma_addr_t dma_addr_in = sg_dma_address(in_sg);
+       int ret, length = sg_dma_len(in_sg);
 
        pr_debug("len: %d\n", length);
 
@@ -441,30 +475,61 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in,
                dma_sync_single_for_device(dd->dev, dma_addr_in, length,
                                           DMA_TO_DEVICE);
 
-       len32 = DIV_ROUND_UP(length, sizeof(u32));
+       memset(&cfg, 0, sizeof(cfg));
+
+       cfg.src_addr = dd->phys_base + AES_REG_DATA_N(dd, 0);
+       cfg.dst_addr = dd->phys_base + AES_REG_DATA_N(dd, 0);
+       cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       cfg.src_maxburst = DST_MAXBURST;
+       cfg.dst_maxburst = DST_MAXBURST;
 
        /* IN */
-       omap_set_dma_transfer_params(dd->dma_lch_in, OMAP_DMA_DATA_TYPE_S32,
-                                    len32, 1, OMAP_DMA_SYNC_PACKET, dd->dma_in,
-                                       OMAP_DMA_DST_SYNC);
+       ret = dmaengine_slave_config(dd->dma_lch_in, &cfg);
+       if (ret) {
+               dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
+                       ret);
+               return ret;
+       }
+
+       tx_in = dmaengine_prep_slave_sg(dd->dma_lch_in, in_sg, 1,
+                                       DMA_MEM_TO_DEV,
+                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       if (!tx_in) {
+               dev_err(dd->dev, "IN prep_slave_sg() failed\n");
+               return -EINVAL;
+       }
 
-       omap_set_dma_src_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_POST_INC,
-                               dma_addr_in, 0, 0);
+       /* No callback necessary */
+       tx_in->callback_param = dd;
 
        /* OUT */
-       omap_set_dma_transfer_params(dd->dma_lch_out, OMAP_DMA_DATA_TYPE_S32,
-                                    len32, 1, OMAP_DMA_SYNC_PACKET,
-                                       dd->dma_out, OMAP_DMA_SRC_SYNC);
+       ret = dmaengine_slave_config(dd->dma_lch_out, &cfg);
+       if (ret) {
+               dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n",
+                       ret);
+               return ret;
+       }
+
+       tx_out = dmaengine_prep_slave_sg(dd->dma_lch_out, out_sg, 1,
+                                       DMA_DEV_TO_MEM,
+                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       if (!tx_out) {
+               dev_err(dd->dev, "OUT prep_slave_sg() failed\n");
+               return -EINVAL;
+       }
 
-       omap_set_dma_dest_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_POST_INC,
-                                dma_addr_out, 0, 0);
+       tx_out->callback = omap_aes_dma_out_callback;
+       tx_out->callback_param = dd;
 
-       omap_start_dma(dd->dma_lch_in);
-       omap_start_dma(dd->dma_lch_out);
+       dmaengine_submit(tx_in);
+       dmaengine_submit(tx_out);
 
-       /* start DMA or disable idle mode */
-       omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START,
-                           AES_REG_MASK_START);
+       dma_async_issue_pending(dd->dma_lch_in);
+       dma_async_issue_pending(dd->dma_lch_out);
+
+       /* start DMA */
+       dd->pdata->trigger(dd, length);
 
        return 0;
 }
@@ -476,6 +541,8 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
        int err, fast = 0, in, out;
        size_t count;
        dma_addr_t addr_in, addr_out;
+       struct scatterlist *in_sg, *out_sg;
+       int len32;
 
        pr_debug("total: %d\n", dd->total);
 
@@ -514,6 +581,9 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
                addr_in = sg_dma_address(dd->in_sg);
                addr_out = sg_dma_address(dd->out_sg);
 
+               in_sg = dd->in_sg;
+               out_sg = dd->out_sg;
+
                dd->flags |= FLAGS_FAST;
 
        } else {
@@ -521,6 +591,27 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
                count = sg_copy(&dd->in_sg, &dd->in_offset, dd->buf_in,
                                 dd->buflen, dd->total, 0);
 
+               len32 = DIV_ROUND_UP(count, DMA_MIN) * DMA_MIN;
+
+               /*
+                * The data going into the AES module has been copied
+                * to a local buffer and the data coming out will go
+                * into a local buffer so set up local SG entries for
+                * both.
+                */
+               sg_init_table(&dd->in_sgl, 1);
+               dd->in_sgl.offset = dd->in_offset;
+               sg_dma_len(&dd->in_sgl) = len32;
+               sg_dma_address(&dd->in_sgl) = dd->dma_addr_in;
+
+               sg_init_table(&dd->out_sgl, 1);
+               dd->out_sgl.offset = dd->out_offset;
+               sg_dma_len(&dd->out_sgl) = len32;
+               sg_dma_address(&dd->out_sgl) = dd->dma_addr_out;
+
+               in_sg = &dd->in_sgl;
+               out_sg = &dd->out_sgl;
+
                addr_in = dd->dma_addr_in;
                addr_out = dd->dma_addr_out;
 
@@ -530,7 +621,7 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd)
 
        dd->total -= count;
 
-       err = omap_aes_crypt_dma(tfm, addr_in, addr_out, count);
+       err = omap_aes_crypt_dma(tfm, in_sg, out_sg);
        if (err) {
                dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
                dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE);
@@ -545,7 +636,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
 
        pr_debug("err: %d\n", err);
 
-       clk_disable(dd->iclk);
+       pm_runtime_put_sync(dd->dev);
        dd->flags &= ~FLAGS_BUSY;
 
        req->base.complete(&req->base, err);
@@ -558,10 +649,10 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd)
 
        pr_debug("total: %d\n", dd->total);
 
-       omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START);
+       omap_aes_dma_stop(dd);
 
-       omap_stop_dma(dd->dma_lch_in);
-       omap_stop_dma(dd->dma_lch_out);
+       dmaengine_terminate_all(dd->dma_lch_in);
+       dmaengine_terminate_all(dd->dma_lch_out);
 
        if (dd->flags & FLAGS_FAST) {
                dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
@@ -734,6 +825,16 @@ static int omap_aes_cbc_decrypt(struct ablkcipher_request *req)
        return omap_aes_crypt(req, FLAGS_CBC);
 }
 
+static int omap_aes_ctr_encrypt(struct ablkcipher_request *req)
+{
+       return omap_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CTR);
+}
+
+static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
+{
+       return omap_aes_crypt(req, FLAGS_CTR);
+}
+
 static int omap_aes_cra_init(struct crypto_tfm *tfm)
 {
        pr_debug("enter\n");
@@ -750,7 +851,7 @@ static void omap_aes_cra_exit(struct crypto_tfm *tfm)
 
 /* ********************** ALGS ************************************ */
 
-static struct crypto_alg algs[] = {
+static struct crypto_alg algs_ecb_cbc[] = {
 {
        .cra_name               = "ecb(aes)",
        .cra_driver_name        = "ecb-aes-omap",
@@ -798,11 +899,213 @@ static struct crypto_alg algs[] = {
 }
 };
 
+static struct crypto_alg algs_ctr[] = {
+{
+       .cra_name               = "ctr(aes)",
+       .cra_driver_name        = "ctr-aes-omap",
+       .cra_priority           = 100,
+       .cra_flags              = CRYPTO_ALG_TYPE_ABLKCIPHER |
+                                 CRYPTO_ALG_KERN_DRIVER_ONLY |
+                                 CRYPTO_ALG_ASYNC,
+       .cra_blocksize          = AES_BLOCK_SIZE,
+       .cra_ctxsize            = sizeof(struct omap_aes_ctx),
+       .cra_alignmask          = 0,
+       .cra_type               = &crypto_ablkcipher_type,
+       .cra_module             = THIS_MODULE,
+       .cra_init               = omap_aes_cra_init,
+       .cra_exit               = omap_aes_cra_exit,
+       .cra_u.ablkcipher = {
+               .min_keysize    = AES_MIN_KEY_SIZE,
+               .max_keysize    = AES_MAX_KEY_SIZE,
+               .geniv          = "eseqiv",
+               .ivsize         = AES_BLOCK_SIZE,
+               .setkey         = omap_aes_setkey,
+               .encrypt        = omap_aes_ctr_encrypt,
+               .decrypt        = omap_aes_ctr_decrypt,
+       }
+} ,
+};
+
+static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc[] = {
+       {
+               .algs_list      = algs_ecb_cbc,
+               .size           = ARRAY_SIZE(algs_ecb_cbc),
+       },
+};
+
+static const struct omap_aes_pdata omap_aes_pdata_omap2 = {
+       .algs_info      = omap_aes_algs_info_ecb_cbc,
+       .algs_info_size = ARRAY_SIZE(omap_aes_algs_info_ecb_cbc),
+       .trigger        = omap_aes_dma_trigger_omap2,
+       .key_ofs        = 0x1c,
+       .iv_ofs         = 0x20,
+       .ctrl_ofs       = 0x30,
+       .data_ofs       = 0x34,
+       .rev_ofs        = 0x44,
+       .mask_ofs       = 0x48,
+       .dma_enable_in  = BIT(2),
+       .dma_enable_out = BIT(3),
+       .dma_start      = BIT(5),
+       .major_mask     = 0xf0,
+       .major_shift    = 4,
+       .minor_mask     = 0x0f,
+       .minor_shift    = 0,
+};
+
+#ifdef CONFIG_OF
+static struct omap_aes_algs_info omap_aes_algs_info_ecb_cbc_ctr[] = {
+       {
+               .algs_list      = algs_ecb_cbc,
+               .size           = ARRAY_SIZE(algs_ecb_cbc),
+       },
+       {
+               .algs_list      = algs_ctr,
+               .size           = ARRAY_SIZE(algs_ctr),
+       },
+};
+
+static const struct omap_aes_pdata omap_aes_pdata_omap3 = {
+       .algs_info      = omap_aes_algs_info_ecb_cbc_ctr,
+       .algs_info_size = ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr),
+       .trigger        = omap_aes_dma_trigger_omap2,
+       .key_ofs        = 0x1c,
+       .iv_ofs         = 0x20,
+       .ctrl_ofs       = 0x30,
+       .data_ofs       = 0x34,
+       .rev_ofs        = 0x44,
+       .mask_ofs       = 0x48,
+       .dma_enable_in  = BIT(2),
+       .dma_enable_out = BIT(3),
+       .dma_start      = BIT(5),
+       .major_mask     = 0xf0,
+       .major_shift    = 4,
+       .minor_mask     = 0x0f,
+       .minor_shift    = 0,
+};
+
+static const struct omap_aes_pdata omap_aes_pdata_omap4 = {
+       .algs_info      = omap_aes_algs_info_ecb_cbc_ctr,
+       .algs_info_size = ARRAY_SIZE(omap_aes_algs_info_ecb_cbc_ctr),
+       .trigger        = omap_aes_dma_trigger_omap4,
+       .key_ofs        = 0x3c,
+       .iv_ofs         = 0x40,
+       .ctrl_ofs       = 0x50,
+       .data_ofs       = 0x60,
+       .rev_ofs        = 0x80,
+       .mask_ofs       = 0x84,
+       .dma_enable_in  = BIT(5),
+       .dma_enable_out = BIT(6),
+       .major_mask     = 0x0700,
+       .major_shift    = 8,
+       .minor_mask     = 0x003f,
+       .minor_shift    = 0,
+};
+
+static const struct of_device_id omap_aes_of_match[] = {
+       {
+               .compatible     = "ti,omap2-aes",
+               .data           = &omap_aes_pdata_omap2,
+       },
+       {
+               .compatible     = "ti,omap3-aes",
+               .data           = &omap_aes_pdata_omap3,
+       },
+       {
+               .compatible     = "ti,omap4-aes",
+               .data           = &omap_aes_pdata_omap4,
+       },
+       {},
+};
+MODULE_DEVICE_TABLE(of, omap_aes_of_match);
+
+static int omap_aes_get_res_of(struct omap_aes_dev *dd,
+               struct device *dev, struct resource *res)
+{
+       struct device_node *node = dev->of_node;
+       const struct of_device_id *match;
+       int err = 0;
+
+       match = of_match_device(of_match_ptr(omap_aes_of_match), dev);
+       if (!match) {
+               dev_err(dev, "no compatible OF match\n");
+               err = -EINVAL;
+               goto err;
+       }
+
+       err = of_address_to_resource(node, 0, res);
+       if (err < 0) {
+               dev_err(dev, "can't translate OF node address\n");
+               err = -EINVAL;
+               goto err;
+       }
+
+       dd->dma_out = -1; /* Dummy value that's unused */
+       dd->dma_in = -1; /* Dummy value that's unused */
+
+       dd->pdata = match->data;
+
+err:
+       return err;
+}
+#else
+static const struct of_device_id omap_aes_of_match[] = {
+       {},
+};
+
+static int omap_aes_get_res_of(struct omap_aes_dev *dd,
+               struct device *dev, struct resource *res)
+{
+       return -EINVAL;
+}
+#endif
+
+static int omap_aes_get_res_pdev(struct omap_aes_dev *dd,
+               struct platform_device *pdev, struct resource *res)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *r;
+       int err = 0;
+
+       /* Get the base address */
+       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!r) {
+               dev_err(dev, "no MEM resource info\n");
+               err = -ENODEV;
+               goto err;
+       }
+       memcpy(res, r, sizeof(*res));
+
+       /* Get the DMA out channel */
+       r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+       if (!r) {
+               dev_err(dev, "no DMA out resource info\n");
+               err = -ENODEV;
+               goto err;
+       }
+       dd->dma_out = r->start;
+
+       /* Get the DMA in channel */
+       r = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+       if (!r) {
+               dev_err(dev, "no DMA in resource info\n");
+               err = -ENODEV;
+               goto err;
+       }
+       dd->dma_in = r->start;
+
+       /* Only OMAP2/3 can be non-DT */
+       dd->pdata = &omap_aes_pdata_omap2;
+
+err:
+       return err;
+}
+
 static int omap_aes_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct omap_aes_dev *dd;
-       struct resource *res;
+       struct crypto_alg *algp;
+       struct resource res;
        int err = -ENOMEM, i, j;
        u32 reg;
 
@@ -817,49 +1120,31 @@ static int omap_aes_probe(struct platform_device *pdev)
        spin_lock_init(&dd->lock);
        crypto_init_queue(&dd->queue, OMAP_AES_QUEUE_LENGTH);
 
-       /* Get the base address */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(dev, "invalid resource type\n");
-               err = -ENODEV;
-               goto err_res;
-       }
-       dd->phys_base = res->start;
-
-       /* Get the DMA */
-       res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-       if (!res)
-               dev_info(dev, "no DMA info\n");
-       else
-               dd->dma_out = res->start;
-
-       /* Get the DMA */
-       res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-       if (!res)
-               dev_info(dev, "no DMA info\n");
-       else
-               dd->dma_in = res->start;
-
-       /* Initializing the clock */
-       dd->iclk = clk_get(dev, "ick");
-       if (IS_ERR(dd->iclk)) {
-               dev_err(dev, "clock intialization failed.\n");
-               err = PTR_ERR(dd->iclk);
+       err = (dev->of_node) ? omap_aes_get_res_of(dd, dev, &res) :
+                              omap_aes_get_res_pdev(dd, pdev, &res);
+       if (err)
                goto err_res;
-       }
 
-       dd->io_base = ioremap(dd->phys_base, SZ_4K);
+       dd->io_base = devm_request_and_ioremap(dev, &res);
        if (!dd->io_base) {
                dev_err(dev, "can't ioremap\n");
                err = -ENOMEM;
-               goto err_io;
+               goto err_res;
        }
+       dd->phys_base = res.start;
+
+       pm_runtime_enable(dev);
+       pm_runtime_get_sync(dev);
+
+       omap_aes_dma_stop(dd);
+
+       reg = omap_aes_read(dd, AES_REG_REV(dd));
+
+       pm_runtime_put_sync(dev);
 
-       clk_enable(dd->iclk);
-       reg = omap_aes_read(dd, AES_REG_REV);
        dev_info(dev, "OMAP AES hw accel rev: %u.%u\n",
-                (reg & AES_REG_REV_MAJOR) >> 4, reg & AES_REG_REV_MINOR);
-       clk_disable(dd->iclk);
+                (reg & dd->pdata->major_mask) >> dd->pdata->major_shift,
+                (reg & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
 
        tasklet_init(&dd->done_task, omap_aes_done_task, (unsigned long)dd);
        tasklet_init(&dd->queue_task, omap_aes_queue_task, (unsigned long)dd);
@@ -873,26 +1158,32 @@ static int omap_aes_probe(struct platform_device *pdev)
        list_add_tail(&dd->list, &dev_list);
        spin_unlock(&list_lock);
 
-       for (i = 0; i < ARRAY_SIZE(algs); i++) {
-               pr_debug("i: %d\n", i);
-               err = crypto_register_alg(&algs[i]);
-               if (err)
-                       goto err_algs;
-       }
+       for (i = 0; i < dd->pdata->algs_info_size; i++) {
+               for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
+                       algp = &dd->pdata->algs_info[i].algs_list[j];
+
+                       pr_debug("reg alg: %s\n", algp->cra_name);
+                       INIT_LIST_HEAD(&algp->cra_list);
+
+                       err = crypto_register_alg(algp);
+                       if (err)
+                               goto err_algs;
 
-       pr_info("probe() done\n");
+                       dd->pdata->algs_info[i].registered++;
+               }
+       }
 
        return 0;
 err_algs:
-       for (j = 0; j < i; j++)
-               crypto_unregister_alg(&algs[j]);
+       for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
+               for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
+                       crypto_unregister_alg(
+                                       &dd->pdata->algs_info[i].algs_list[j]);
        omap_aes_dma_cleanup(dd);
 err_dma:
        tasklet_kill(&dd->done_task);
        tasklet_kill(&dd->queue_task);
-       iounmap(dd->io_base);
-err_io:
-       clk_put(dd->iclk);
+       pm_runtime_disable(dev);
 err_res:
        kfree(dd);
        dd = NULL;
@@ -904,7 +1195,7 @@ err_data:
 static int omap_aes_remove(struct platform_device *pdev)
 {
        struct omap_aes_dev *dd = platform_get_drvdata(pdev);
-       int i;
+       int i, j;
 
        if (!dd)
                return -ENODEV;
@@ -913,33 +1204,52 @@ static int omap_aes_remove(struct platform_device *pdev)
        list_del(&dd->list);
        spin_unlock(&list_lock);
 
-       for (i = 0; i < ARRAY_SIZE(algs); i++)
-               crypto_unregister_alg(&algs[i]);
+       for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
+               for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
+                       crypto_unregister_alg(
+                                       &dd->pdata->algs_info[i].algs_list[j]);
 
        tasklet_kill(&dd->done_task);
        tasklet_kill(&dd->queue_task);
        omap_aes_dma_cleanup(dd);
-       iounmap(dd->io_base);
-       clk_put(dd->iclk);
+       pm_runtime_disable(dd->dev);
        kfree(dd);
        dd = NULL;
 
        return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int omap_aes_suspend(struct device *dev)
+{
+       pm_runtime_put_sync(dev);
+       return 0;
+}
+
+static int omap_aes_resume(struct device *dev)
+{
+       pm_runtime_get_sync(dev);
+       return 0;
+}
+#endif
+
+static const struct dev_pm_ops omap_aes_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(omap_aes_suspend, omap_aes_resume)
+};
+
 static struct platform_driver omap_aes_driver = {
        .probe  = omap_aes_probe,
        .remove = omap_aes_remove,
        .driver = {
                .name   = "omap-aes",
                .owner  = THIS_MODULE,
+               .pm     = &omap_aes_pm_ops,
+               .of_match_table = omap_aes_of_match,
        },
 };
 
 static int __init omap_aes_mod_init(void)
 {
-       pr_info("loading %s driver\n", "omap-aes");
-
        return  platform_driver_register(&omap_aes_driver);
 }
 
index 9e6947b..3d1611f 100644 (file)
@@ -5,6 +5,7 @@
  *
  * Copyright (c) 2010 Nokia Corporation
  * Author: Dmitry Kasatkin <dmitry.kasatkin@nokia.com>
+ * Copyright (c) 2011 Texas Instruments Incorporated
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as published
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
-#include <linux/clk.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/omap-dma.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
 #include <linux/delay.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
 #include <crypto/hash.h>
 #include <crypto/internal/hash.h>
 
-#include <linux/omap-dma.h>
-
-#ifdef CONFIG_ARCH_OMAP1
-#include <mach/irqs.h>
-#endif
-
-#define SHA_REG_DIGEST(x)              (0x00 + ((x) * 0x04))
-#define SHA_REG_DIN(x)                 (0x1C + ((x) * 0x04))
-
 #define SHA1_MD5_BLOCK_SIZE            SHA1_BLOCK_SIZE
 #define MD5_DIGEST_SIZE                        16
 
-#define SHA_REG_DIGCNT                 0x14
+#define DST_MAXBURST                   16
+#define DMA_MIN                                (DST_MAXBURST * sizeof(u32))
+
+#define SHA_REG_IDIGEST(dd, x)         ((dd)->pdata->idigest_ofs + ((x)*0x04))
+#define SHA_REG_DIN(dd, x)             ((dd)->pdata->din_ofs + ((x) * 0x04))
+#define SHA_REG_DIGCNT(dd)             ((dd)->pdata->digcnt_ofs)
+
+#define SHA_REG_ODIGEST(x)             (0x00 + ((x) * 0x04))
 
 #define SHA_REG_CTRL                   0x18
 #define SHA_REG_CTRL_LENGTH            (0xFFFFFFFF << 5)
 #define SHA_REG_CTRL_INPUT_READY       (1 << 1)
 #define SHA_REG_CTRL_OUTPUT_READY      (1 << 0)
 
-#define SHA_REG_REV                    0x5C
-#define SHA_REG_REV_MAJOR              0xF0
-#define SHA_REG_REV_MINOR              0x0F
+#define SHA_REG_REV(dd)                        ((dd)->pdata->rev_ofs)
 
-#define SHA_REG_MASK                   0x60
+#define SHA_REG_MASK(dd)               ((dd)->pdata->mask_ofs)
 #define SHA_REG_MASK_DMA_EN            (1 << 3)
 #define SHA_REG_MASK_IT_EN             (1 << 2)
 #define SHA_REG_MASK_SOFTRESET         (1 << 1)
 #define SHA_REG_AUTOIDLE               (1 << 0)
 
-#define SHA_REG_SYSSTATUS              0x64
+#define SHA_REG_SYSSTATUS(dd)          ((dd)->pdata->sysstatus_ofs)
 #define SHA_REG_SYSSTATUS_RESETDONE    (1 << 0)
 
+#define SHA_REG_MODE                   0x44
+#define SHA_REG_MODE_HMAC_OUTER_HASH   (1 << 7)
+#define SHA_REG_MODE_HMAC_KEY_PROC     (1 << 5)
+#define SHA_REG_MODE_CLOSE_HASH                (1 << 4)
+#define SHA_REG_MODE_ALGO_CONSTANT     (1 << 3)
+#define SHA_REG_MODE_ALGO_MASK         (3 << 1)
+#define                SHA_REG_MODE_ALGO_MD5_128       (0 << 1)
+#define                SHA_REG_MODE_ALGO_SHA1_160      (1 << 1)
+#define                SHA_REG_MODE_ALGO_SHA2_224      (2 << 1)
+#define                SHA_REG_MODE_ALGO_SHA2_256      (3 << 1)
+
+#define SHA_REG_LENGTH                 0x48
+
+#define SHA_REG_IRQSTATUS              0x118
+#define SHA_REG_IRQSTATUS_CTX_RDY      (1 << 3)
+#define SHA_REG_IRQSTATUS_PARTHASH_RDY (1 << 2)
+#define SHA_REG_IRQSTATUS_INPUT_RDY    (1 << 1)
+#define SHA_REG_IRQSTATUS_OUTPUT_RDY   (1 << 0)
+
+#define SHA_REG_IRQENA                 0x11C
+#define SHA_REG_IRQENA_CTX_RDY         (1 << 3)
+#define SHA_REG_IRQENA_PARTHASH_RDY    (1 << 2)
+#define SHA_REG_IRQENA_INPUT_RDY       (1 << 1)
+#define SHA_REG_IRQENA_OUTPUT_RDY      (1 << 0)
+
 #define DEFAULT_TIMEOUT_INTERVAL       HZ
 
 /* mostly device flags */
 #define FLAGS_INIT             4
 #define FLAGS_CPU              5
 #define FLAGS_DMA_READY                6
+#define FLAGS_AUTO_XOR         7
+#define FLAGS_BE32_SHA1                8
 /* context flags */
 #define FLAGS_FINUP            16
 #define FLAGS_SG               17
-#define FLAGS_SHA1             18
-#define FLAGS_HMAC             19
-#define FLAGS_ERROR            20
 
-#define OP_UPDATE      1
-#define OP_FINAL       2
+#define FLAGS_MODE_SHIFT       18
+#define FLAGS_MODE_MASK                (SHA_REG_MODE_ALGO_MASK                 \
+                                       << (FLAGS_MODE_SHIFT - 1))
+#define                FLAGS_MODE_MD5          (SHA_REG_MODE_ALGO_MD5_128      \
+                                               << (FLAGS_MODE_SHIFT - 1))
+#define                FLAGS_MODE_SHA1         (SHA_REG_MODE_ALGO_SHA1_160     \
+                                               << (FLAGS_MODE_SHIFT - 1))
+#define                FLAGS_MODE_SHA224       (SHA_REG_MODE_ALGO_SHA2_224     \
+                                               << (FLAGS_MODE_SHIFT - 1))
+#define                FLAGS_MODE_SHA256       (SHA_REG_MODE_ALGO_SHA2_256     \
+                                               << (FLAGS_MODE_SHIFT - 1))
+#define FLAGS_HMAC             20
+#define FLAGS_ERROR            21
+
+#define OP_UPDATE              1
+#define OP_FINAL               2
 
 #define OMAP_ALIGN_MASK                (sizeof(u32)-1)
 #define OMAP_ALIGNED           __attribute__((aligned(sizeof(u32))))
 
-#define BUFLEN         PAGE_SIZE
+#define BUFLEN                 PAGE_SIZE
 
 struct omap_sham_dev;
 
@@ -104,7 +145,7 @@ struct omap_sham_reqctx {
        unsigned long           flags;
        unsigned long           op;
 
-       u8                      digest[SHA1_DIGEST_SIZE] OMAP_ALIGNED;
+       u8                      digest[SHA256_DIGEST_SIZE] OMAP_ALIGNED;
        size_t                  digcnt;
        size_t                  bufcnt;
        size_t                  buflen;
@@ -112,6 +153,7 @@ struct omap_sham_reqctx {
 
        /* walk state */
        struct scatterlist      *sg;
+       struct scatterlist      sgl;
        unsigned int            offset; /* offset in current sg */
        unsigned int            total;  /* total request */
 
@@ -120,8 +162,8 @@ struct omap_sham_reqctx {
 
 struct omap_sham_hmac_ctx {
        struct crypto_shash     *shash;
-       u8                      ipad[SHA1_MD5_BLOCK_SIZE];
-       u8                      opad[SHA1_MD5_BLOCK_SIZE];
+       u8                      ipad[SHA1_MD5_BLOCK_SIZE] OMAP_ALIGNED;
+       u8                      opad[SHA1_MD5_BLOCK_SIZE] OMAP_ALIGNED;
 };
 
 struct omap_sham_ctx {
@@ -137,22 +179,56 @@ struct omap_sham_ctx {
 
 #define OMAP_SHAM_QUEUE_LENGTH 1
 
+struct omap_sham_algs_info {
+       struct ahash_alg        *algs_list;
+       unsigned int            size;
+       unsigned int            registered;
+};
+
+struct omap_sham_pdata {
+       struct omap_sham_algs_info      *algs_info;
+       unsigned int    algs_info_size;
+       unsigned long   flags;
+       int             digest_size;
+
+       void            (*copy_hash)(struct ahash_request *req, int out);
+       void            (*write_ctrl)(struct omap_sham_dev *dd, size_t length,
+                                     int final, int dma);
+       void            (*trigger)(struct omap_sham_dev *dd, size_t length);
+       int             (*poll_irq)(struct omap_sham_dev *dd);
+       irqreturn_t     (*intr_hdlr)(int irq, void *dev_id);
+
+       u32             odigest_ofs;
+       u32             idigest_ofs;
+       u32             din_ofs;
+       u32             digcnt_ofs;
+       u32             rev_ofs;
+       u32             mask_ofs;
+       u32             sysstatus_ofs;
+
+       u32             major_mask;
+       u32             major_shift;
+       u32             minor_mask;
+       u32             minor_shift;
+};
+
 struct omap_sham_dev {
        struct list_head        list;
        unsigned long           phys_base;
        struct device           *dev;
        void __iomem            *io_base;
        int                     irq;
-       struct clk              *iclk;
        spinlock_t              lock;
        int                     err;
-       int                     dma;
-       int                     dma_lch;
+       unsigned int            dma;
+       struct dma_chan         *dma_lch;
        struct tasklet_struct   done_task;
 
        unsigned long           flags;
        struct crypto_queue     queue;
        struct ahash_request    *req;
+
+       const struct omap_sham_pdata    *pdata;
 };
 
 struct omap_sham_drv {
@@ -200,21 +276,44 @@ static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit)
        return 0;
 }
 
-static void omap_sham_copy_hash(struct ahash_request *req, int out)
+static void omap_sham_copy_hash_omap2(struct ahash_request *req, int out)
 {
        struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       struct omap_sham_dev *dd = ctx->dd;
        u32 *hash = (u32 *)ctx->digest;
        int i;
 
-       /* MD5 is almost unused. So copy sha1 size to reduce code */
-       for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) {
+       for (i = 0; i < dd->pdata->digest_size / sizeof(u32); i++) {
                if (out)
-                       hash[i] = omap_sham_read(ctx->dd,
-                                               SHA_REG_DIGEST(i));
+                       hash[i] = omap_sham_read(dd, SHA_REG_IDIGEST(dd, i));
                else
-                       omap_sham_write(ctx->dd,
-                                       SHA_REG_DIGEST(i), hash[i]);
+                       omap_sham_write(dd, SHA_REG_IDIGEST(dd, i), hash[i]);
+       }
+}
+
+static void omap_sham_copy_hash_omap4(struct ahash_request *req, int out)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
+       struct omap_sham_dev *dd = ctx->dd;
+       int i;
+
+       if (ctx->flags & BIT(FLAGS_HMAC)) {
+               struct crypto_ahash *tfm = crypto_ahash_reqtfm(dd->req);
+               struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm);
+               struct omap_sham_hmac_ctx *bctx = tctx->base;
+               u32 *opad = (u32 *)bctx->opad;
+
+               for (i = 0; i < dd->pdata->digest_size / sizeof(u32); i++) {
+                       if (out)
+                               opad[i] = omap_sham_read(dd,
+                                               SHA_REG_ODIGEST(i));
+                       else
+                               omap_sham_write(dd, SHA_REG_ODIGEST(i),
+                                               opad[i]);
+               }
        }
+
+       omap_sham_copy_hash_omap2(req, out);
 }
 
 static void omap_sham_copy_ready_hash(struct ahash_request *req)
@@ -222,34 +321,44 @@ static void omap_sham_copy_ready_hash(struct ahash_request *req)
        struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
        u32 *in = (u32 *)ctx->digest;
        u32 *hash = (u32 *)req->result;
-       int i;
+       int i, d, big_endian = 0;
 
        if (!hash)
                return;
 
-       if (likely(ctx->flags & BIT(FLAGS_SHA1))) {
-               /* SHA1 results are in big endian */
-               for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
+       switch (ctx->flags & FLAGS_MODE_MASK) {
+       case FLAGS_MODE_MD5:
+               d = MD5_DIGEST_SIZE / sizeof(u32);
+               break;
+       case FLAGS_MODE_SHA1:
+               /* OMAP2 SHA1 is big endian */
+               if (test_bit(FLAGS_BE32_SHA1, &ctx->dd->flags))
+                       big_endian = 1;
+               d = SHA1_DIGEST_SIZE / sizeof(u32);
+               break;
+       case FLAGS_MODE_SHA224:
+               d = SHA224_DIGEST_SIZE / sizeof(u32);
+               break;
+       case FLAGS_MODE_SHA256:
+               d = SHA256_DIGEST_SIZE / sizeof(u32);
+               break;
+       default:
+               d = 0;
+       }
+
+       if (big_endian)
+               for (i = 0; i < d; i++)
                        hash[i] = be32_to_cpu(in[i]);
-       } else {
-               /* MD5 results are in little endian */
-               for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++)
+       else
+               for (i = 0; i < d; i++)
                        hash[i] = le32_to_cpu(in[i]);
-       }
 }
 
 static int omap_sham_hw_init(struct omap_sham_dev *dd)
 {
-       clk_enable(dd->iclk);
+       pm_runtime_get_sync(dd->dev);
 
        if (!test_bit(FLAGS_INIT, &dd->flags)) {
-               omap_sham_write_mask(dd, SHA_REG_MASK,
-                       SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET);
-
-               if (omap_sham_wait(dd, SHA_REG_SYSSTATUS,
-                                       SHA_REG_SYSSTATUS_RESETDONE))
-                       return -ETIMEDOUT;
-
                set_bit(FLAGS_INIT, &dd->flags);
                dd->err = 0;
        }
@@ -257,23 +366,23 @@ static int omap_sham_hw_init(struct omap_sham_dev *dd)
        return 0;
 }
 
-static void omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
+static void omap_sham_write_ctrl_omap2(struct omap_sham_dev *dd, size_t length,
                                 int final, int dma)
 {
        struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
        u32 val = length << 5, mask;
 
        if (likely(ctx->digcnt))
-               omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt);
+               omap_sham_write(dd, SHA_REG_DIGCNT(dd), ctx->digcnt);
 
-       omap_sham_write_mask(dd, SHA_REG_MASK,
+       omap_sham_write_mask(dd, SHA_REG_MASK(dd),
                SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0),
                SHA_REG_MASK_IT_EN | SHA_REG_MASK_DMA_EN);
        /*
         * Setting ALGO_CONST only for the first iteration
         * and CLOSE_HASH only for the last one.
         */
-       if (ctx->flags & BIT(FLAGS_SHA1))
+       if ((ctx->flags & FLAGS_MODE_MASK) == FLAGS_MODE_SHA1)
                val |= SHA_REG_CTRL_ALGO;
        if (!ctx->digcnt)
                val |= SHA_REG_CTRL_ALGO_CONST;
@@ -286,6 +395,81 @@ static void omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length,
        omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask);
 }
 
+static void omap_sham_trigger_omap2(struct omap_sham_dev *dd, size_t length)
+{
+}
+
+static int omap_sham_poll_irq_omap2(struct omap_sham_dev *dd)
+{
+       return omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY);
+}
+
+static void omap_sham_write_n(struct omap_sham_dev *dd, u32 offset,
+                                   u32 *value, int count)
+{
+       for (; count--; value++, offset += 4)
+               omap_sham_write(dd, offset, *value);
+}
+
+static void omap_sham_write_ctrl_omap4(struct omap_sham_dev *dd, size_t length,
+                                int final, int dma)
+{
+       struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
+       u32 val, mask;
+
+       /*
+        * Setting ALGO_CONST only for the first iteration and
+        * CLOSE_HASH only for the last one. Note that flags mode bits
+        * correspond to algorithm encoding in mode register.
+        */
+       val = (ctx->flags & FLAGS_MODE_MASK) >> (FLAGS_MODE_SHIFT - 1);
+       if (!ctx->digcnt) {
+               struct crypto_ahash *tfm = crypto_ahash_reqtfm(dd->req);
+               struct omap_sham_ctx *tctx = crypto_ahash_ctx(tfm);
+               struct omap_sham_hmac_ctx *bctx = tctx->base;
+
+               val |= SHA_REG_MODE_ALGO_CONSTANT;
+
+               if (ctx->flags & BIT(FLAGS_HMAC)) {
+                       val |= SHA_REG_MODE_HMAC_KEY_PROC;
+                       omap_sham_write_n(dd, SHA_REG_ODIGEST(0),
+                                         (u32 *)bctx->ipad,
+                                         SHA1_BLOCK_SIZE / sizeof(u32));
+                       ctx->digcnt += SHA1_BLOCK_SIZE;
+               }
+       }
+
+       if (final) {
+               val |= SHA_REG_MODE_CLOSE_HASH;
+
+               if (ctx->flags & BIT(FLAGS_HMAC))
+                       val |= SHA_REG_MODE_HMAC_OUTER_HASH;
+       }
+
+       mask = SHA_REG_MODE_ALGO_CONSTANT | SHA_REG_MODE_CLOSE_HASH |
+              SHA_REG_MODE_ALGO_MASK | SHA_REG_MODE_HMAC_OUTER_HASH |
+              SHA_REG_MODE_HMAC_KEY_PROC;
+
+       dev_dbg(dd->dev, "ctrl: %08x, flags: %08lx\n", val, ctx->flags);
+       omap_sham_write_mask(dd, SHA_REG_MODE, val, mask);
+       omap_sham_write(dd, SHA_REG_IRQENA, SHA_REG_IRQENA_OUTPUT_RDY);
+       omap_sham_write_mask(dd, SHA_REG_MASK(dd),
+                            SHA_REG_MASK_IT_EN |
+                                    (dma ? SHA_REG_MASK_DMA_EN : 0),
+                            SHA_REG_MASK_IT_EN | SHA_REG_MASK_DMA_EN);
+}
+
+static void omap_sham_trigger_omap4(struct omap_sham_dev *dd, size_t length)
+{
+       omap_sham_write(dd, SHA_REG_LENGTH, length);
+}
+
+static int omap_sham_poll_irq_omap4(struct omap_sham_dev *dd)
+{
+       return omap_sham_wait(dd, SHA_REG_IRQSTATUS,
+                             SHA_REG_IRQSTATUS_INPUT_RDY);
+}
+
 static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
                              size_t length, int final)
 {
@@ -296,12 +480,13 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
        dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
                                                ctx->digcnt, length, final);
 
-       omap_sham_write_ctrl(dd, length, final, 0);
+       dd->pdata->write_ctrl(dd, length, final, 0);
+       dd->pdata->trigger(dd, length);
 
        /* should be non-zero before next lines to disable clocks later */
        ctx->digcnt += length;
 
-       if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY))
+       if (dd->pdata->poll_irq(dd))
                return -ETIMEDOUT;
 
        if (final)
@@ -312,30 +497,73 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf,
        len32 = DIV_ROUND_UP(length, sizeof(u32));
 
        for (count = 0; count < len32; count++)
-               omap_sham_write(dd, SHA_REG_DIN(count), buffer[count]);
+               omap_sham_write(dd, SHA_REG_DIN(dd, count), buffer[count]);
 
        return -EINPROGRESS;
 }
 
+static void omap_sham_dma_callback(void *param)
+{
+       struct omap_sham_dev *dd = param;
+
+       set_bit(FLAGS_DMA_READY, &dd->flags);
+       tasklet_schedule(&dd->done_task);
+}
+
 static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
-                             size_t length, int final)
+                             size_t length, int final, int is_sg)
 {
        struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
-       int len32;
+       struct dma_async_tx_descriptor *tx;
+       struct dma_slave_config cfg;
+       int len32, ret;
 
        dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n",
                                                ctx->digcnt, length, final);
 
-       len32 = DIV_ROUND_UP(length, sizeof(u32));
+       memset(&cfg, 0, sizeof(cfg));
+
+       cfg.dst_addr = dd->phys_base + SHA_REG_DIN(dd, 0);
+       cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+       cfg.dst_maxburst = DST_MAXBURST;
+
+       ret = dmaengine_slave_config(dd->dma_lch, &cfg);
+       if (ret) {
+               pr_err("omap-sham: can't configure dmaengine slave: %d\n", ret);
+               return ret;
+       }
+
+       len32 = DIV_ROUND_UP(length, DMA_MIN) * DMA_MIN;
+
+       if (is_sg) {
+               /*
+                * The SG entry passed in may not have the 'length' member
+                * set correctly so use a local SG entry (sgl) with the
+                * proper value for 'length' instead.  If this is not done,
+                * the dmaengine may try to DMA the incorrect amount of data.
+                */
+               sg_init_table(&ctx->sgl, 1);
+               ctx->sgl.page_link = ctx->sg->page_link;
+               ctx->sgl.offset = ctx->sg->offset;
+               sg_dma_len(&ctx->sgl) = len32;
+               sg_dma_address(&ctx->sgl) = sg_dma_address(ctx->sg);
+
+               tx = dmaengine_prep_slave_sg(dd->dma_lch, &ctx->sgl, 1,
+                       DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       } else {
+               tx = dmaengine_prep_slave_single(dd->dma_lch, dma_addr, len32,
+                       DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       }
 
-       omap_set_dma_transfer_params(dd->dma_lch, OMAP_DMA_DATA_TYPE_S32, len32,
-                       1, OMAP_DMA_SYNC_PACKET, dd->dma,
-                               OMAP_DMA_DST_SYNC_PREFETCH);
+       if (!tx) {
+               dev_err(dd->dev, "prep_slave_sg/single() failed\n");
+               return -EINVAL;
+       }
 
-       omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC,
-                               dma_addr, 0, 0);
+       tx->callback = omap_sham_dma_callback;
+       tx->callback_param = dd;
 
-       omap_sham_write_ctrl(dd, length, final, 1);
+       dd->pdata->write_ctrl(dd, length, final, 1);
 
        ctx->digcnt += length;
 
@@ -344,7 +572,10 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr,
 
        set_bit(FLAGS_DMA_ACTIVE, &dd->flags);
 
-       omap_start_dma(dd->dma_lch);
+       dmaengine_submit(tx);
+       dma_async_issue_pending(dd->dma_lch);
+
+       dd->pdata->trigger(dd, length);
 
        return -EINPROGRESS;
 }
@@ -391,6 +622,8 @@ static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd,
                                        struct omap_sham_reqctx *ctx,
                                        size_t length, int final)
 {
+       int ret;
+
        ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen,
                                       DMA_TO_DEVICE);
        if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
@@ -400,8 +633,12 @@ static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd,
 
        ctx->flags &= ~BIT(FLAGS_SG);
 
-       /* next call does not fail... so no unmap in the case of error */
-       return omap_sham_xmit_dma(dd, ctx->dma_addr, length, final);
+       ret = omap_sham_xmit_dma(dd, ctx->dma_addr, length, final, 0);
+       if (ret != -EINPROGRESS)
+               dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen,
+                                DMA_TO_DEVICE);
+
+       return ret;
 }
 
 static int omap_sham_update_dma_slow(struct omap_sham_dev *dd)
@@ -436,6 +673,7 @@ static int omap_sham_update_dma_start(struct omap_sham_dev *dd)
        struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
        unsigned int length, final, tail;
        struct scatterlist *sg;
+       int ret;
 
        if (!ctx->total)
                return 0;
@@ -443,6 +681,15 @@ static int omap_sham_update_dma_start(struct omap_sham_dev *dd)
        if (ctx->bufcnt || ctx->offset)
                return omap_sham_update_dma_slow(dd);
 
+       /*
+        * Don't use the sg interface when the transfer size is less
+        * than the number of elements in a DMA frame.  Otherwise,
+        * the dmaengine infrastructure will calculate that it needs
+        * to transfer 0 frames which ultimately fails.
+        */
+       if (ctx->total < (DST_MAXBURST * sizeof(u32)))
+               return omap_sham_update_dma_slow(dd);
+
        dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
                        ctx->digcnt, ctx->bufcnt, ctx->total);
 
@@ -480,8 +727,11 @@ static int omap_sham_update_dma_start(struct omap_sham_dev *dd)
 
        final = (ctx->flags & BIT(FLAGS_FINUP)) && !ctx->total;
 
-       /* next call does not fail... so no unmap in the case of error */
-       return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final);
+       ret = omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final, 1);
+       if (ret != -EINPROGRESS)
+               dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
+
+       return ret;
 }
 
 static int omap_sham_update_cpu(struct omap_sham_dev *dd)
@@ -500,7 +750,8 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd)
 {
        struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req);
 
-       omap_stop_dma(dd->dma_lch);
+       dmaengine_terminate_all(dd->dma_lch);
+
        if (ctx->flags & BIT(FLAGS_SG)) {
                dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
                if (ctx->sg->length == ctx->offset) {
@@ -542,18 +793,33 @@ static int omap_sham_init(struct ahash_request *req)
        dev_dbg(dd->dev, "init: digest size: %d\n",
                crypto_ahash_digestsize(tfm));
 
-       if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
-               ctx->flags |= BIT(FLAGS_SHA1);
+       switch (crypto_ahash_digestsize(tfm)) {
+       case MD5_DIGEST_SIZE:
+               ctx->flags |= FLAGS_MODE_MD5;
+               break;
+       case SHA1_DIGEST_SIZE:
+               ctx->flags |= FLAGS_MODE_SHA1;
+               break;
+       case SHA224_DIGEST_SIZE:
+               ctx->flags |= FLAGS_MODE_SHA224;
+               break;
+       case SHA256_DIGEST_SIZE:
+               ctx->flags |= FLAGS_MODE_SHA256;
+               break;
+       }
 
        ctx->bufcnt = 0;
        ctx->digcnt = 0;
        ctx->buflen = BUFLEN;
 
        if (tctx->flags & BIT(FLAGS_HMAC)) {
-               struct omap_sham_hmac_ctx *bctx = tctx->base;
+               if (!test_bit(FLAGS_AUTO_XOR, &dd->flags)) {
+                       struct omap_sham_hmac_ctx *bctx = tctx->base;
+
+                       memcpy(ctx->buffer, bctx->ipad, SHA1_MD5_BLOCK_SIZE);
+                       ctx->bufcnt = SHA1_MD5_BLOCK_SIZE;
+               }
 
-               memcpy(ctx->buffer, bctx->ipad, SHA1_MD5_BLOCK_SIZE);
-               ctx->bufcnt = SHA1_MD5_BLOCK_SIZE;
                ctx->flags |= BIT(FLAGS_HMAC);
        }
 
@@ -587,7 +853,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd)
        struct omap_sham_reqctx *ctx = ahash_request_ctx(req);
        int err = 0, use_dma = 1;
 
-       if (ctx->bufcnt <= 64)
+       if (ctx->bufcnt <= DMA_MIN)
                /* faster to handle last block with cpu */
                use_dma = 0;
 
@@ -630,7 +896,8 @@ static int omap_sham_finish(struct ahash_request *req)
 
        if (ctx->digcnt) {
                omap_sham_copy_ready_hash(req);
-               if (ctx->flags & BIT(FLAGS_HMAC))
+               if ((ctx->flags & BIT(FLAGS_HMAC)) &&
+                               !test_bit(FLAGS_AUTO_XOR, &dd->flags))
                        err = omap_sham_finish_hmac(req);
        }
 
@@ -645,7 +912,7 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
        struct omap_sham_dev *dd = ctx->dd;
 
        if (!err) {
-               omap_sham_copy_hash(req, 1);
+               dd->pdata->copy_hash(req, 1);
                if (test_bit(FLAGS_FINAL, &dd->flags))
                        err = omap_sham_finish(req);
        } else {
@@ -655,7 +922,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err)
        /* atomic operation is not needed here */
        dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) |
                        BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY));
-       clk_disable(dd->iclk);
+
+       pm_runtime_put_sync(dd->dev);
 
        if (req->base.complete)
                req->base.complete(&req->base, err);
@@ -702,19 +970,9 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd,
        if (err)
                goto err1;
 
-       omap_set_dma_dest_params(dd->dma_lch, 0,
-                       OMAP_DMA_AMODE_CONSTANT,
-                       dd->phys_base + SHA_REG_DIN(0), 0, 16);
-
-       omap_set_dma_dest_burst_mode(dd->dma_lch,
-                       OMAP_DMA_DATA_BURST_16);
-
-       omap_set_dma_src_burst_mode(dd->dma_lch,
-                       OMAP_DMA_DATA_BURST_4);
-
        if (ctx->digcnt)
                /* request has changed - restore hash */
-               omap_sham_copy_hash(req, 0);
+               dd->pdata->copy_hash(req, 0);
 
        if (ctx->op == OP_UPDATE) {
                err = omap_sham_update_req(dd);
@@ -853,7 +1111,21 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key,
        struct omap_sham_hmac_ctx *bctx = tctx->base;
        int bs = crypto_shash_blocksize(bctx->shash);
        int ds = crypto_shash_digestsize(bctx->shash);
+       struct omap_sham_dev *dd = NULL, *tmp;
        int err, i;
+
+       spin_lock_bh(&sham.lock);
+       if (!tctx->dd) {
+               list_for_each_entry(tmp, &sham.dev_list, list) {
+                       dd = tmp;
+                       break;
+               }
+               tctx->dd = dd;
+       } else {
+               dd = tctx->dd;
+       }
+       spin_unlock_bh(&sham.lock);
+
        err = crypto_shash_setkey(tctx->fallback, key, keylen);
        if (err)
                return err;
@@ -870,11 +1142,14 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key,
        }
 
        memset(bctx->ipad + keylen, 0, bs - keylen);
-       memcpy(bctx->opad, bctx->ipad, bs);
 
-       for (i = 0; i < bs; i++) {
-               bctx->ipad[i] ^= 0x36;
-               bctx->opad[i] ^= 0x5c;
+       if (!test_bit(FLAGS_AUTO_XOR, &dd->flags)) {
+               memcpy(bctx->opad, bctx->ipad, bs);
+
+               for (i = 0; i < bs; i++) {
+                       bctx->ipad[i] ^= 0x36;
+                       bctx->opad[i] ^= 0x5c;
+               }
        }
 
        return err;
@@ -924,6 +1199,16 @@ static int omap_sham_cra_sha1_init(struct crypto_tfm *tfm)
        return omap_sham_cra_init_alg(tfm, "sha1");
 }
 
+static int omap_sham_cra_sha224_init(struct crypto_tfm *tfm)
+{
+       return omap_sham_cra_init_alg(tfm, "sha224");
+}
+
+static int omap_sham_cra_sha256_init(struct crypto_tfm *tfm)
+{
+       return omap_sham_cra_init_alg(tfm, "sha256");
+}
+
 static int omap_sham_cra_md5_init(struct crypto_tfm *tfm)
 {
        return omap_sham_cra_init_alg(tfm, "md5");
@@ -942,7 +1227,7 @@ static void omap_sham_cra_exit(struct crypto_tfm *tfm)
        }
 }
 
-static struct ahash_alg algs[] = {
+static struct ahash_alg algs_sha1_md5[] = {
 {
        .init           = omap_sham_init,
        .update         = omap_sham_update,
@@ -1041,6 +1326,102 @@ static struct ahash_alg algs[] = {
 }
 };
 
+/* OMAP4 has some algs in addition to what OMAP2 has */
+static struct ahash_alg algs_sha224_sha256[] = {
+{
+       .init           = omap_sham_init,
+       .update         = omap_sham_update,
+       .final          = omap_sham_final,
+       .finup          = omap_sham_finup,
+       .digest         = omap_sham_digest,
+       .halg.digestsize        = SHA224_DIGEST_SIZE,
+       .halg.base      = {
+               .cra_name               = "sha224",
+               .cra_driver_name        = "omap-sha224",
+               .cra_priority           = 100,
+               .cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_NEED_FALLBACK,
+               .cra_blocksize          = SHA224_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct omap_sham_ctx),
+               .cra_alignmask          = 0,
+               .cra_module             = THIS_MODULE,
+               .cra_init               = omap_sham_cra_init,
+               .cra_exit               = omap_sham_cra_exit,
+       }
+},
+{
+       .init           = omap_sham_init,
+       .update         = omap_sham_update,
+       .final          = omap_sham_final,
+       .finup          = omap_sham_finup,
+       .digest         = omap_sham_digest,
+       .halg.digestsize        = SHA256_DIGEST_SIZE,
+       .halg.base      = {
+               .cra_name               = "sha256",
+               .cra_driver_name        = "omap-sha256",
+               .cra_priority           = 100,
+               .cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_NEED_FALLBACK,
+               .cra_blocksize          = SHA256_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct omap_sham_ctx),
+               .cra_alignmask          = 0,
+               .cra_module             = THIS_MODULE,
+               .cra_init               = omap_sham_cra_init,
+               .cra_exit               = omap_sham_cra_exit,
+       }
+},
+{
+       .init           = omap_sham_init,
+       .update         = omap_sham_update,
+       .final          = omap_sham_final,
+       .finup          = omap_sham_finup,
+       .digest         = omap_sham_digest,
+       .setkey         = omap_sham_setkey,
+       .halg.digestsize        = SHA224_DIGEST_SIZE,
+       .halg.base      = {
+               .cra_name               = "hmac(sha224)",
+               .cra_driver_name        = "omap-hmac-sha224",
+               .cra_priority           = 100,
+               .cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_NEED_FALLBACK,
+               .cra_blocksize          = SHA224_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct omap_sham_ctx) +
+                                       sizeof(struct omap_sham_hmac_ctx),
+               .cra_alignmask          = OMAP_ALIGN_MASK,
+               .cra_module             = THIS_MODULE,
+               .cra_init               = omap_sham_cra_sha224_init,
+               .cra_exit               = omap_sham_cra_exit,
+       }
+},
+{
+       .init           = omap_sham_init,
+       .update         = omap_sham_update,
+       .final          = omap_sham_final,
+       .finup          = omap_sham_finup,
+       .digest         = omap_sham_digest,
+       .setkey         = omap_sham_setkey,
+       .halg.digestsize        = SHA256_DIGEST_SIZE,
+       .halg.base      = {
+               .cra_name               = "hmac(sha256)",
+               .cra_driver_name        = "omap-hmac-sha256",
+               .cra_priority           = 100,
+               .cra_flags              = CRYPTO_ALG_TYPE_AHASH |
+                                               CRYPTO_ALG_ASYNC |
+                                               CRYPTO_ALG_NEED_FALLBACK,
+               .cra_blocksize          = SHA256_BLOCK_SIZE,
+               .cra_ctxsize            = sizeof(struct omap_sham_ctx) +
+                                       sizeof(struct omap_sham_hmac_ctx),
+               .cra_alignmask          = OMAP_ALIGN_MASK,
+               .cra_module             = THIS_MODULE,
+               .cra_init               = omap_sham_cra_sha256_init,
+               .cra_exit               = omap_sham_cra_exit,
+       }
+},
+};
+
 static void omap_sham_done_task(unsigned long data)
 {
        struct omap_sham_dev *dd = (struct omap_sham_dev *)data;
@@ -1079,7 +1460,19 @@ finish:
        omap_sham_finish_req(dd->req, err);
 }
 
-static irqreturn_t omap_sham_irq(int irq, void *dev_id)
+static irqreturn_t omap_sham_irq_common(struct omap_sham_dev *dd)
+{
+       if (!test_bit(FLAGS_BUSY, &dd->flags)) {
+               dev_warn(dd->dev, "Interrupt when no active requests.\n");
+       } else {
+               set_bit(FLAGS_OUTPUT_READY, &dd->flags);
+               tasklet_schedule(&dd->done_task);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t omap_sham_irq_omap2(int irq, void *dev_id)
 {
        struct omap_sham_dev *dd = dev_id;
 
@@ -1091,61 +1484,188 @@ static irqreturn_t omap_sham_irq(int irq, void *dev_id)
                                 SHA_REG_CTRL_OUTPUT_READY);
        omap_sham_read(dd, SHA_REG_CTRL);
 
-       if (!test_bit(FLAGS_BUSY, &dd->flags)) {
-               dev_warn(dd->dev, "Interrupt when no active requests.\n");
-               return IRQ_HANDLED;
-       }
+       return omap_sham_irq_common(dd);
+}
 
-       set_bit(FLAGS_OUTPUT_READY, &dd->flags);
-       tasklet_schedule(&dd->done_task);
+static irqreturn_t omap_sham_irq_omap4(int irq, void *dev_id)
+{
+       struct omap_sham_dev *dd = dev_id;
 
-       return IRQ_HANDLED;
+       omap_sham_write_mask(dd, SHA_REG_MASK(dd), 0, SHA_REG_MASK_IT_EN);
+
+       return omap_sham_irq_common(dd);
 }
 
-static void omap_sham_dma_callback(int lch, u16 ch_status, void *data)
+static struct omap_sham_algs_info omap_sham_algs_info_omap2[] = {
+       {
+               .algs_list      = algs_sha1_md5,
+               .size           = ARRAY_SIZE(algs_sha1_md5),
+       },
+};
+
+static const struct omap_sham_pdata omap_sham_pdata_omap2 = {
+       .algs_info      = omap_sham_algs_info_omap2,
+       .algs_info_size = ARRAY_SIZE(omap_sham_algs_info_omap2),
+       .flags          = BIT(FLAGS_BE32_SHA1),
+       .digest_size    = SHA1_DIGEST_SIZE,
+       .copy_hash      = omap_sham_copy_hash_omap2,
+       .write_ctrl     = omap_sham_write_ctrl_omap2,
+       .trigger        = omap_sham_trigger_omap2,
+       .poll_irq       = omap_sham_poll_irq_omap2,
+       .intr_hdlr      = omap_sham_irq_omap2,
+       .idigest_ofs    = 0x00,
+       .din_ofs        = 0x1c,
+       .digcnt_ofs     = 0x14,
+       .rev_ofs        = 0x5c,
+       .mask_ofs       = 0x60,
+       .sysstatus_ofs  = 0x64,
+       .major_mask     = 0xf0,
+       .major_shift    = 4,
+       .minor_mask     = 0x0f,
+       .minor_shift    = 0,
+};
+
+#ifdef CONFIG_OF
+static struct omap_sham_algs_info omap_sham_algs_info_omap4[] = {
+       {
+               .algs_list      = algs_sha1_md5,
+               .size           = ARRAY_SIZE(algs_sha1_md5),
+       },
+       {
+               .algs_list      = algs_sha224_sha256,
+               .size           = ARRAY_SIZE(algs_sha224_sha256),
+       },
+};
+
+static const struct omap_sham_pdata omap_sham_pdata_omap4 = {
+       .algs_info      = omap_sham_algs_info_omap4,
+       .algs_info_size = ARRAY_SIZE(omap_sham_algs_info_omap4),
+       .flags          = BIT(FLAGS_AUTO_XOR),
+       .digest_size    = SHA256_DIGEST_SIZE,
+       .copy_hash      = omap_sham_copy_hash_omap4,
+       .write_ctrl     = omap_sham_write_ctrl_omap4,
+       .trigger        = omap_sham_trigger_omap4,
+       .poll_irq       = omap_sham_poll_irq_omap4,
+       .intr_hdlr      = omap_sham_irq_omap4,
+       .idigest_ofs    = 0x020,
+       .din_ofs        = 0x080,
+       .digcnt_ofs     = 0x040,
+       .rev_ofs        = 0x100,
+       .mask_ofs       = 0x110,
+       .sysstatus_ofs  = 0x114,
+       .major_mask     = 0x0700,
+       .major_shift    = 8,
+       .minor_mask     = 0x003f,
+       .minor_shift    = 0,
+};
+
+static const struct of_device_id omap_sham_of_match[] = {
+       {
+               .compatible     = "ti,omap2-sham",
+               .data           = &omap_sham_pdata_omap2,
+       },
+       {
+               .compatible     = "ti,omap4-sham",
+               .data           = &omap_sham_pdata_omap4,
+       },
+       {},
+};
+MODULE_DEVICE_TABLE(of, omap_sham_of_match);
+
+static int omap_sham_get_res_of(struct omap_sham_dev *dd,
+               struct device *dev, struct resource *res)
 {
-       struct omap_sham_dev *dd = data;
+       struct device_node *node = dev->of_node;
+       const struct of_device_id *match;
+       int err = 0;
 
-       if (ch_status != OMAP_DMA_BLOCK_IRQ) {
-               pr_err("omap-sham DMA error status: 0x%hx\n", ch_status);
-               dd->err = -EIO;
-               clear_bit(FLAGS_INIT, &dd->flags);/* request to re-initialize */
+       match = of_match_device(of_match_ptr(omap_sham_of_match), dev);
+       if (!match) {
+               dev_err(dev, "no compatible OF match\n");
+               err = -EINVAL;
+               goto err;
        }
 
-       set_bit(FLAGS_DMA_READY, &dd->flags);
-       tasklet_schedule(&dd->done_task);
+       err = of_address_to_resource(node, 0, res);
+       if (err < 0) {
+               dev_err(dev, "can't translate OF node address\n");
+               err = -EINVAL;
+               goto err;
+       }
+
+       dd->irq = of_irq_to_resource(node, 0, NULL);
+       if (!dd->irq) {
+               dev_err(dev, "can't translate OF irq value\n");
+               err = -EINVAL;
+               goto err;
+       }
+
+       dd->dma = -1; /* Dummy value that's unused */
+       dd->pdata = match->data;
+
+err:
+       return err;
 }
+#else
+static const struct of_device_id omap_sham_of_match[] = {
+       {},
+};
 
-static int omap_sham_dma_init(struct omap_sham_dev *dd)
+static int omap_sham_get_res_of(struct omap_sham_dev *dd,
+               struct device *dev, struct resource *res)
 {
-       int err;
+       return -EINVAL;
+}
+#endif
 
-       dd->dma_lch = -1;
+static int omap_sham_get_res_pdev(struct omap_sham_dev *dd,
+               struct platform_device *pdev, struct resource *res)
+{
+       struct device *dev = &pdev->dev;
+       struct resource *r;
+       int err = 0;
 
-       err = omap_request_dma(dd->dma, dev_name(dd->dev),
-                       omap_sham_dma_callback, dd, &dd->dma_lch);
-       if (err) {
-               dev_err(dd->dev, "Unable to request DMA channel\n");
-               return err;
+       /* Get the base address */
+       r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!r) {
+               dev_err(dev, "no MEM resource info\n");
+               err = -ENODEV;
+               goto err;
        }
+       memcpy(res, r, sizeof(*res));
 
-       return 0;
-}
+       /* Get the IRQ */
+       dd->irq = platform_get_irq(pdev, 0);
+       if (dd->irq < 0) {
+               dev_err(dev, "no IRQ resource info\n");
+               err = dd->irq;
+               goto err;
+       }
 
-static void omap_sham_dma_cleanup(struct omap_sham_dev *dd)
-{
-       if (dd->dma_lch >= 0) {
-               omap_free_dma(dd->dma_lch);
-               dd->dma_lch = -1;
+       /* Get the DMA */
+       r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+       if (!r) {
+               dev_err(dev, "no DMA resource info\n");
+               err = -ENODEV;
+               goto err;
        }
+       dd->dma = r->start;
+
+       /* Only OMAP2/3 can be non-DT */
+       dd->pdata = &omap_sham_pdata_omap2;
+
+err:
+       return err;
 }
 
 static int omap_sham_probe(struct platform_device *pdev)
 {
        struct omap_sham_dev *dd;
        struct device *dev = &pdev->dev;
-       struct resource *res;
+       struct resource res;
+       dma_cap_mask_t mask;
        int err, i, j;
+       u32 rev;
 
        dd = kzalloc(sizeof(struct omap_sham_dev), GFP_KERNEL);
        if (dd == NULL) {
@@ -1161,89 +1681,75 @@ static int omap_sham_probe(struct platform_device *pdev)
        tasklet_init(&dd->done_task, omap_sham_done_task, (unsigned long)dd);
        crypto_init_queue(&dd->queue, OMAP_SHAM_QUEUE_LENGTH);
 
-       dd->irq = -1;
-
-       /* Get the base address */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(dev, "no MEM resource info\n");
-               err = -ENODEV;
-               goto res_err;
-       }
-       dd->phys_base = res->start;
-
-       /* Get the DMA */
-       res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-       if (!res) {
-               dev_err(dev, "no DMA resource info\n");
-               err = -ENODEV;
+       err = (dev->of_node) ? omap_sham_get_res_of(dd, dev, &res) :
+                              omap_sham_get_res_pdev(dd, pdev, &res);
+       if (err)
                goto res_err;
-       }
-       dd->dma = res->start;
 
-       /* Get the IRQ */
-       dd->irq = platform_get_irq(pdev,  0);
-       if (dd->irq < 0) {
-               dev_err(dev, "no IRQ resource info\n");
-               err = dd->irq;
+       dd->io_base = devm_request_and_ioremap(dev, &res);
+       if (!dd->io_base) {
+               dev_err(dev, "can't ioremap\n");
+               err = -ENOMEM;
                goto res_err;
        }
+       dd->phys_base = res.start;
 
-       err = request_irq(dd->irq, omap_sham_irq,
-                       IRQF_TRIGGER_LOW, dev_name(dev), dd);
+       err = request_irq(dd->irq, dd->pdata->intr_hdlr, IRQF_TRIGGER_LOW,
+                         dev_name(dev), dd);
        if (err) {
                dev_err(dev, "unable to request irq.\n");
                goto res_err;
        }
 
-       err = omap_sham_dma_init(dd);
-       if (err)
-               goto dma_err;
+       dma_cap_zero(mask);
+       dma_cap_set(DMA_SLAVE, mask);
 
-       /* Initializing the clock */
-       dd->iclk = clk_get(dev, "ick");
-       if (IS_ERR(dd->iclk)) {
-               dev_err(dev, "clock intialization failed.\n");
-               err = PTR_ERR(dd->iclk);
-               goto clk_err;
+       dd->dma_lch = dma_request_slave_channel_compat(mask, omap_dma_filter_fn,
+                                                      &dd->dma, dev, "rx");
+       if (!dd->dma_lch) {
+               dev_err(dev, "unable to obtain RX DMA engine channel %u\n",
+                       dd->dma);
+               err = -ENXIO;
+               goto dma_err;
        }
 
-       dd->io_base = ioremap(dd->phys_base, SZ_4K);
-       if (!dd->io_base) {
-               dev_err(dev, "can't ioremap\n");
-               err = -ENOMEM;
-               goto io_err;
-       }
+       dd->flags |= dd->pdata->flags;
+
+       pm_runtime_enable(dev);
+       pm_runtime_get_sync(dev);
+       rev = omap_sham_read(dd, SHA_REG_REV(dd));
+       pm_runtime_put_sync(&pdev->dev);
 
-       clk_enable(dd->iclk);
        dev_info(dev, "hw accel on OMAP rev %u.%u\n",
-               (omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MAJOR) >> 4,
-               omap_sham_read(dd, SHA_REG_REV) & SHA_REG_REV_MINOR);
-       clk_disable(dd->iclk);
+               (rev & dd->pdata->major_mask) >> dd->pdata->major_shift,
+               (rev & dd->pdata->minor_mask) >> dd->pdata->minor_shift);
 
        spin_lock(&sham.lock);
        list_add_tail(&dd->list, &sham.dev_list);
        spin_unlock(&sham.lock);
 
-       for (i = 0; i < ARRAY_SIZE(algs); i++) {
-               err = crypto_register_ahash(&algs[i]);
-               if (err)
-                       goto err_algs;
+       for (i = 0; i < dd->pdata->algs_info_size; i++) {
+               for (j = 0; j < dd->pdata->algs_info[i].size; j++) {
+                       err = crypto_register_ahash(
+                                       &dd->pdata->algs_info[i].algs_list[j]);
+                       if (err)
+                               goto err_algs;
+
+                       dd->pdata->algs_info[i].registered++;
+               }
        }
 
        return 0;
 
 err_algs:
-       for (j = 0; j < i; j++)
-               crypto_unregister_ahash(&algs[j]);
-       iounmap(dd->io_base);
-io_err:
-       clk_put(dd->iclk);
-clk_err:
-       omap_sham_dma_cleanup(dd);
+       for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
+               for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
+                       crypto_unregister_ahash(
+                                       &dd->pdata->algs_info[i].algs_list[j]);
+       pm_runtime_disable(dev);
+       dma_release_channel(dd->dma_lch);
 dma_err:
-       if (dd->irq >= 0)
-               free_irq(dd->irq, dd);
+       free_irq(dd->irq, dd);
 res_err:
        kfree(dd);
        dd = NULL;
@@ -1256,7 +1762,7 @@ data_err:
 static int omap_sham_remove(struct platform_device *pdev)
 {
        static struct omap_sham_dev *dd;
-       int i;
+       int i, j;
 
        dd = platform_get_drvdata(pdev);
        if (!dd)
@@ -1264,33 +1770,51 @@ static int omap_sham_remove(struct platform_device *pdev)
        spin_lock(&sham.lock);
        list_del(&dd->list);
        spin_unlock(&sham.lock);
-       for (i = 0; i < ARRAY_SIZE(algs); i++)
-               crypto_unregister_ahash(&algs[i]);
+       for (i = dd->pdata->algs_info_size - 1; i >= 0; i--)
+               for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--)
+                       crypto_unregister_ahash(
+                                       &dd->pdata->algs_info[i].algs_list[j]);
        tasklet_kill(&dd->done_task);
-       iounmap(dd->io_base);
-       clk_put(dd->iclk);
-       omap_sham_dma_cleanup(dd);
-       if (dd->irq >= 0)
-               free_irq(dd->irq, dd);
+       pm_runtime_disable(&pdev->dev);
+       dma_release_channel(dd->dma_lch);
+       free_irq(dd->irq, dd);
        kfree(dd);
        dd = NULL;
 
        return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int omap_sham_suspend(struct device *dev)
+{
+       pm_runtime_put_sync(dev);
+       return 0;
+}
+
+static int omap_sham_resume(struct device *dev)
+{
+       pm_runtime_get_sync(dev);
+       return 0;
+}
+#endif
+
+static const struct dev_pm_ops omap_sham_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(omap_sham_suspend, omap_sham_resume)
+};
+
 static struct platform_driver omap_sham_driver = {
        .probe  = omap_sham_probe,
        .remove = omap_sham_remove,
        .driver = {
                .name   = "omap-sham",
                .owner  = THIS_MODULE,
+               .pm     = &omap_sham_pm_ops,
+               .of_match_table = omap_sham_of_match,
        },
 };
 
 static int __init omap_sham_mod_init(void)
 {
-       pr_info("loading %s driver\n", "omap-sham");
-
        return platform_driver_register(&omap_sham_driver);
 }
 
index 49ad8cb..4b31432 100644 (file)
@@ -580,7 +580,7 @@ static int s5p_aes_probe(struct platform_device *pdev)
                                     resource_size(res), pdev->name))
                return -EBUSY;
 
-       pdata->clk = clk_get(dev, "secss");
+       pdata->clk = devm_clk_get(dev, "secss");
        if (IS_ERR(pdata->clk)) {
                dev_err(dev, "failed to find secss clock source\n");
                return -ENOENT;
@@ -645,7 +645,6 @@ static int s5p_aes_probe(struct platform_device *pdev)
 
  err_irq:
        clk_disable(pdata->clk);
-       clk_put(pdata->clk);
 
        s5p_dev = NULL;
        platform_set_drvdata(pdev, NULL);
@@ -667,7 +666,6 @@ static int s5p_aes_remove(struct platform_device *pdev)
        tasklet_kill(&pdata->tasklet);
 
        clk_disable(pdata->clk);
-       clk_put(pdata->clk);
 
        s5p_dev = NULL;
        platform_set_drvdata(pdev, NULL);