summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 10:42:15 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 10:42:15 -0700
commitcb906953d2c3fd450655d9fa833f03690ad50c23 (patch)
tree06c5665afb24baee3ac49f62db61ca97918079b4
parent6c373ca89399c5a3f7ef210ad8f63dc3437da345 (diff)
parent3abafaf2192b1712079edfd4232b19877d6f41a5 (diff)
downloadkernel-cb906953d2c3fd450655d9fa833f03690ad50c23.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "Here is the crypto update for 4.1: New interfaces: - user-space interface for AEAD - user-space interface for RNG (i.e., pseudo RNG) New hashes: - ARMv8 SHA1/256 - ARMv8 AES - ARMv8 GHASH - ARM assembler and NEON SHA256 - MIPS OCTEON SHA1/256/512 - MIPS img-hash SHA1/256 and MD5 - Power 8 VMX AES/CBC/CTR/GHASH - PPC assembler AES, SHA1/256 and MD5 - Broadcom IPROC RNG driver Cleanups/fixes: - prevent internal helper algos from being exposed to user-space - merge common code from assembly/C SHA implementations - misc fixes" * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (169 commits) crypto: arm - workaround for building with old binutils crypto: arm/sha256 - avoid sha256 code on ARMv7-M crypto: x86/sha512_ssse3 - move SHA-384/512 SSSE3 implementation to base layer crypto: x86/sha256_ssse3 - move SHA-224/256 SSSE3 implementation to base layer crypto: x86/sha1_ssse3 - move SHA-1 SSSE3 implementation to base layer crypto: arm64/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer crypto: arm/sha2-ce - move SHA-224/256 ARMv8 implementation to base layer crypto: arm/sha256 - move SHA-224/256 ASM/NEON implementation to base layer crypto: arm/sha1-ce - move SHA-1 ARMv8 implementation to base layer crypto: arm/sha1_neon - move SHA-1 NEON implementation to base layer crypto: arm/sha1 - move SHA-1 ARM asm implementation to base layer crypto: sha512-generic - move to generic glue implementation crypto: sha256-generic - move to generic glue implementation crypto: sha1-generic - move to generic glue implementation crypto: sha512 - implement base layer for SHA-512 crypto: sha256 - implement base layer for SHA-256 crypto: sha1 - implement base layer for SHA-1 crypto: api - remove instance when test failed crypto: api - Move alg ref count init to crypto_check_alg ...
-rw-r--r--Documentation/DocBook/crypto-API.tmpl860
-rw-r--r--Documentation/crypto/crypto-API-userspace.txt205
-rw-r--r--Documentation/devicetree/bindings/crypto/img-hash.txt27
-rw-r--r--Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt12
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm/Kconfig3
-rw-r--r--arch/arm/crypto/Kconfig130
-rw-r--r--arch/arm/crypto/Makefile27
-rw-r--r--arch/arm/crypto/aes-ce-core.S518
-rw-r--r--arch/arm/crypto/aes-ce-glue.c524
-rw-r--r--arch/arm/crypto/aesbs-glue.c9
-rw-r--r--arch/arm/crypto/ghash-ce-core.S94
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c320
-rw-r--r--arch/arm/crypto/sha1-ce-core.S125
-rw-r--r--arch/arm/crypto/sha1-ce-glue.c96
-rw-r--r--arch/arm/crypto/sha1.h (renamed from arch/arm/include/asm/crypto/sha1.h)3
-rw-r--r--arch/arm/crypto/sha1_glue.c112
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c137
-rw-r--r--arch/arm/crypto/sha2-ce-core.S125
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c114
-rw-r--r--arch/arm/crypto/sha256-armv4.pl716
-rw-r--r--arch/arm/crypto/sha256-core.S_shipped2808
-rw-r--r--arch/arm/crypto/sha256_glue.c128
-rw-r--r--arch/arm/crypto/sha256_glue.h14
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c101
-rw-r--r--arch/arm64/crypto/aes-glue.c12
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S33
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c151
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S29
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c227
-rw-r--r--arch/mips/cavium-octeon/crypto/Makefile5
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.c4
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.h83
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-md5.c8
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha1.c241
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha256.c280
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-sha512.c277
-rw-r--r--arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h14
-rw-r--r--arch/powerpc/crypto/Makefile8
-rw-r--r--arch/powerpc/crypto/aes-spe-core.S351
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c512
-rw-r--r--arch/powerpc/crypto/aes-spe-keys.S283
-rw-r--r--arch/powerpc/crypto/aes-spe-modes.S630
-rw-r--r--arch/powerpc/crypto/aes-spe-regs.h42
-rw-r--r--arch/powerpc/crypto/aes-tab-4k.S331
-rw-r--r--arch/powerpc/crypto/md5-asm.S243
-rw-r--r--arch/powerpc/crypto/md5-glue.c165
-rw-r--r--arch/powerpc/crypto/sha1-spe-asm.S299
-rw-r--r--arch/powerpc/crypto/sha1-spe-glue.c210
-rw-r--r--arch/powerpc/crypto/sha256-spe-asm.S323
-rw-r--r--arch/powerpc/crypto/sha256-spe-glue.c275
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c187
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c15
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c15
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c9
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c15
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c7
-rw-r--r--arch/x86/crypto/glue_helper.c1
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c15
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c15
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c15
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb.c9
-rw-r--r--arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c2
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c139
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S10
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S10
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S10
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c193
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S6
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S6
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S6
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c202
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c15
-rw-r--r--crypto/Kconfig142
-rw-r--r--crypto/Makefile1
-rw-r--r--crypto/ablk_helper.c3
-rw-r--r--crypto/algapi.c42
-rw-r--r--crypto/algif_aead.c666
-rw-r--r--crypto/algif_rng.c2
-rw-r--r--crypto/ansi_cprng.c6
-rw-r--r--crypto/api.c10
-rw-r--r--crypto/cryptd.c49
-rw-r--r--crypto/crypto_user.c39
-rw-r--r--crypto/drbg.c64
-rw-r--r--crypto/mcryptd.c25
-rw-r--r--crypto/proc.c3
-rw-r--r--crypto/sha1_generic.c102
-rw-r--r--crypto/sha256_generic.c133
-rw-r--r--crypto/sha512_generic.c123
-rw-r--r--crypto/tcrypt.c4
-rw-r--r--crypto/testmgr.c24
-rw-r--r--drivers/char/hw_random/Kconfig13
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/bcm63xx-rng.c120
-rw-r--r--drivers/char/hw_random/core.c45
-rw-r--r--drivers/char/hw_random/exynos-rng.c12
-rw-r--r--drivers/char/hw_random/iproc-rng200.c239
-rw-r--r--drivers/char/hw_random/msm-rng.c11
-rw-r--r--drivers/char/hw_random/octeon-rng.c4
-rw-r--r--drivers/char/hw_random/omap-rng.c23
-rw-r--r--drivers/char/hw_random/pseries-rng.c4
-rw-r--r--drivers/char/hw_random/xgene-rng.c10
-rw-r--r--drivers/crypto/Kconfig24
-rw-r--r--drivers/crypto/Makefile2
-rw-r--r--drivers/crypto/amcc/crypto4xx_core.c6
-rw-r--r--drivers/crypto/atmel-aes.c26
-rw-r--r--drivers/crypto/atmel-sha.c37
-rw-r--r--drivers/crypto/atmel-tdes.c3
-rw-r--r--drivers/crypto/caam/caamhash.c1
-rw-r--r--drivers/crypto/caam/caamrng.c6
-rw-r--r--drivers/crypto/ccp/Makefile9
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-cmac.c12
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-xts.c4
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes.c3
-rw-r--r--drivers/crypto/ccp/ccp-crypto-main.c5
-rw-r--r--drivers/crypto/ccp/ccp-crypto-sha.c12
-rw-r--r--drivers/crypto/ccp/ccp-crypto.h3
-rw-r--r--drivers/crypto/ccp/ccp-dev.c7
-rw-r--r--drivers/crypto/ccp/ccp-dev.h12
-rw-r--r--drivers/crypto/ccp/ccp-ops.c24
-rw-r--r--drivers/crypto/ccp/ccp-pci.c21
-rw-r--r--drivers/crypto/ccp/ccp-platform.c111
-rw-r--r--drivers/crypto/img-hash.c1029
-rw-r--r--drivers/crypto/mxs-dcp.c2
-rw-r--r--drivers/crypto/omap-aes.c14
-rw-r--r--drivers/crypto/omap-sham.c2
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_devices.h1
-rw-r--r--drivers/crypto/qat/qat_common/adf_accel_engine.c35
-rw-r--r--drivers/crypto/qat/qat_common/adf_aer.c21
-rw-r--r--drivers/crypto/qat/qat_common/adf_cfg.c5
-rw-r--r--drivers/crypto/qat/qat_common/adf_cfg_strings.h10
-rw-r--r--drivers/crypto/qat/qat_common/adf_common_drv.h2
-rw-r--r--drivers/crypto/qat/qat_common/adf_ctl_drv.c66
-rw-r--r--drivers/crypto/qat/qat_common/adf_dev_mgr.c3
-rw-r--r--drivers/crypto/qat/qat_common/adf_init.c88
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport.c31
-rw-r--r--drivers/crypto/qat/qat_common/adf_transport_debug.c2
-rw-r--r--drivers/crypto/qat/qat_common/icp_qat_hw.h2
-rw-r--r--drivers/crypto/qat/qat_common/qat_crypto.c9
-rw-r--r--drivers/crypto/qat/qat_common/qat_hal.c6
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_admin.c3
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c3
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.h6
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_drv.c4
-rw-r--r--drivers/crypto/qat/qat_dh895xcc/adf_isr.c15
-rw-r--r--drivers/crypto/sahara.c51
-rw-r--r--drivers/crypto/talitos.c17
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c2
-rw-r--r--drivers/crypto/vmx/Kconfig8
-rw-r--r--drivers/crypto/vmx/Makefile19
-rw-r--r--drivers/crypto/vmx/aes.c139
-rw-r--r--drivers/crypto/vmx/aes_cbc.c184
-rw-r--r--drivers/crypto/vmx/aes_ctr.c167
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.h20
-rw-r--r--drivers/crypto/vmx/aesp8-ppc.pl1930
-rw-r--r--drivers/crypto/vmx/ghash.c214
-rw-r--r--drivers/crypto/vmx/ghashp8-ppc.pl228
-rw-r--r--drivers/crypto/vmx/ppc-xlate.pl207
-rw-r--r--drivers/crypto/vmx/vmx.c88
-rw-r--r--include/crypto/algapi.h2
-rw-r--r--include/crypto/rng.h3
-rw-r--r--include/crypto/sha.h15
-rw-r--r--include/crypto/sha1_base.h106
-rw-r--r--include/crypto/sha256_base.h128
-rw-r--r--include/crypto/sha512_base.h131
-rw-r--r--include/linux/crypto.h6
-rw-r--r--include/linux/hw_random.h4
-rw-r--r--lib/string.c2
168 files changed, 18223 insertions, 2202 deletions
diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl
index 04a8c24ead47..efc8d90a9a3f 100644
--- a/Documentation/DocBook/crypto-API.tmpl
+++ b/Documentation/DocBook/crypto-API.tmpl
@@ -509,6 +509,270 @@
select it due to the used type and mask field.
</para>
</sect1>
+
+ <sect1><title>Internal Structure of Kernel Crypto API</title>
+
+ <para>
+ The kernel crypto API has an internal structure where a cipher
+ implementation may use many layers and indirections. This section
+ shall help to clarify how the kernel crypto API uses
+ various components to implement the complete cipher.
+ </para>
+
+ <para>
+ The following subsections explain the internal structure based
+ on existing cipher implementations. The first section addresses
+ the most complex scenario where all other scenarios form a logical
+ subset.
+ </para>
+
+ <sect2><title>Generic AEAD Cipher Structure</title>
+
+ <para>
+ The following ASCII art decomposes the kernel crypto API layers
+ when using the AEAD cipher with the automated IV generation. The
+ shown example is used by the IPSEC layer.
+ </para>
+
+ <para>
+ For other use cases of AEAD ciphers, the ASCII art applies as
+ well, but the caller may not use the GIVCIPHER interface. In
+ this case, the caller must generate the IV.
+ </para>
+
+ <para>
+ The depicted example decomposes the AEAD cipher of GCM(AES) based
+ on the generic C implementations (gcm.c, aes-generic.c, ctr.c,
+ ghash-generic.c, seqiv.c). The generic implementation serves as an
+ example showing the complete logic of the kernel crypto API.
+ </para>
+
+ <para>
+ It is possible that some streamlined cipher implementations (like
+ AES-NI) provide implementations merging aspects which in the view
+ of the kernel crypto API cannot be decomposed into layers any more.
+ In case of the AES-NI implementation, the CTR mode, the GHASH
+ implementation and the AES cipher are all merged into one cipher
+ implementation registered with the kernel crypto API. In this case,
+ the concept described by the following ASCII art applies too. However,
+ the decomposition of GCM into the individual sub-components
+ by the kernel crypto API is not done any more.
+ </para>
+
+ <para>
+ Each block in the following ASCII art is an independent cipher
+ instance obtained from the kernel crypto API. Each block
+ is accessed by the caller or by other blocks using the API functions
+ defined by the kernel crypto API for the cipher implementation type.
+ </para>
+
+ <para>
+ The blocks below indicate the cipher type as well as the specific
+ logic implemented in the cipher.
+ </para>
+
+ <para>
+ The ASCII art picture also indicates the call structure, i.e. who
+ calls which component. The arrows point to the invoked block
+ where the caller uses the API applicable to the cipher type
+ specified for the block.
+ </para>
+
+ <programlisting>
+<![CDATA[
+kernel crypto API | IPSEC Layer
+ |
++-----------+ |
+| | (1)
+| givcipher | <----------------------------------- esp_output
+| (seqiv) | ---+
++-----------+ |
+ | (2)
++-----------+ |
+| | <--+ (2)
+| aead | <----------------------------------- esp_input
+| (gcm) | ------------+
++-----------+ |
+ | (3) | (5)
+ v v
++-----------+ +-----------+
+| | | |
+| ablkcipher| | ahash |
+| (ctr) | ---+ | (ghash) |
++-----------+ | +-----------+
+ |
++-----------+ | (4)
+| | <--+
+| cipher |
+| (aes) |
++-----------+
+]]>
+ </programlisting>
+
+ <para>
+ The following call sequence is applicable when the IPSEC layer
+ triggers an encryption operation with the esp_output function. During
+ configuration, the administrator set up the use of rfc4106(gcm(aes)) as
+ the cipher for ESP. The following call sequence is now depicted in the
+ ASCII art above:
+ </para>
+
+ <orderedlist>
+ <listitem>
+ <para>
+ esp_output() invokes crypto_aead_givencrypt() to trigger an encryption
+ operation of the GIVCIPHER implementation.
+ </para>
+
+ <para>
+ In case of GCM, the SEQIV implementation is registered as GIVCIPHER
+ in crypto_rfc4106_alloc().
+ </para>
+
+ <para>
+ The SEQIV performs its operation to generate an IV where the core
+ function is seqiv_geniv().
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Now, SEQIV uses the AEAD API function calls to invoke the associated
+ AEAD cipher. In our case, during the instantiation of SEQIV, the
+ cipher handle for GCM is provided to SEQIV. This means that SEQIV
+ invokes AEAD cipher operations with the GCM cipher handle.
+ </para>
+
+ <para>
+ During instantiation of the GCM handle, the CTR(AES) and GHASH
+ ciphers are instantiated. The cipher handles for CTR(AES) and GHASH
+ are retained for later use.
+ </para>
+
+ <para>
+ The GCM implementation is responsible to invoke the CTR mode AES and
+ the GHASH cipher in the right manner to implement the GCM
+ specification.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The GCM AEAD cipher type implementation now invokes the ABLKCIPHER API
+ with the instantiated CTR(AES) cipher handle.
+ </para>
+
+ <para>
+ During instantiation of the CTR(AES) cipher, the CIPHER type
+ implementation of AES is instantiated. The cipher handle for AES is
+ retained.
+ </para>
+
+ <para>
+ That means that the ABLKCIPHER implementation of CTR(AES) only
+ implements the CTR block chaining mode. After performing the block
+ chaining operation, the CIPHER implementation of AES is invoked.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The ABLKCIPHER of CTR(AES) now invokes the CIPHER API with the AES
+ cipher handle to encrypt one block.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The GCM AEAD implementation also invokes the GHASH cipher
+ implementation via the AHASH API.
+ </para>
+ </listitem>
+ </orderedlist>
+
+ <para>
+ When the IPSEC layer triggers the esp_input() function, the same call
+ sequence is followed with the only difference that the operation starts
+ with step (2).
+ </para>
+ </sect2>
+
+ <sect2><title>Generic Block Cipher Structure</title>
+ <para>
+ Generic block ciphers follow the same concept as depicted with the ASCII
+ art picture above.
+ </para>
+
+ <para>
+ For example, CBC(AES) is implemented with cbc.c, and aes-generic.c. The
+ ASCII art picture above applies as well with the difference that only
+ step (4) is used and the ABLKCIPHER block chaining mode is CBC.
+ </para>
+ </sect2>
+
+ <sect2><title>Generic Keyed Message Digest Structure</title>
+ <para>
+ Keyed message digest implementations again follow the same concept as
+ depicted in the ASCII art picture above.
+ </para>
+
+ <para>
+ For example, HMAC(SHA256) is implemented with hmac.c and
+ sha256_generic.c. The following ASCII art illustrates the
+ implementation:
+ </para>
+
+ <programlisting>
+<![CDATA[
+kernel crypto API | Caller
+ |
++-----------+ (1) |
+| | <------------------ some_function
+| ahash |
+| (hmac) | ---+
++-----------+ |
+ | (2)
++-----------+ |
+| | <--+
+| shash |
+| (sha256) |
++-----------+
+]]>
+ </programlisting>
+
+ <para>
+ The following call sequence is applicable when a caller triggers
+ an HMAC operation:
+ </para>
+
+ <orderedlist>
+ <listitem>
+ <para>
+ The AHASH API functions are invoked by the caller. The HMAC
+ implementation performs its operation as needed.
+ </para>
+
+ <para>
+ During initialization of the HMAC cipher, the SHASH cipher type of
+ SHA256 is instantiated. The cipher handle for the SHA256 instance is
+ retained.
+ </para>
+
+ <para>
+ At one time, the HMAC implementation requires a SHA256 operation
+ where the SHA256 cipher handle is used.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The HMAC instance now invokes the SHASH API with the SHA256
+ cipher handle to calculate the message digest.
+ </para>
+ </listitem>
+ </orderedlist>
+ </sect2>
+ </sect1>
</chapter>
<chapter id="Development"><title>Developing Cipher Algorithms</title>
@@ -808,6 +1072,602 @@
</sect1>
</chapter>
+ <chapter id="User"><title>User Space Interface</title>
+ <sect1><title>Introduction</title>
+ <para>
+ The concepts of the kernel crypto API visible to kernel space is fully
+ applicable to the user space interface as well. Therefore, the kernel
+ crypto API high level discussion for the in-kernel use cases applies
+ here as well.
+ </para>
+
+ <para>
+ The major difference, however, is that user space can only act as a
+ consumer and never as a provider of a transformation or cipher algorithm.
+ </para>
+
+ <para>
+ The following covers the user space interface exported by the kernel
+ crypto API. A working example of this description is libkcapi that
+ can be obtained from [1]. That library can be used by user space
+ applications that require cryptographic services from the kernel.
+ </para>
+
+ <para>
+ Some details of the in-kernel kernel crypto API aspects do not
+ apply to user space, however. This includes the difference between
+ synchronous and asynchronous invocations. The user space API call
+ is fully synchronous.
+ </para>
+
+ <para>
+ [1] http://www.chronox.de/libkcapi.html
+ </para>
+
+ </sect1>
+
+ <sect1><title>User Space API General Remarks</title>
+ <para>
+ The kernel crypto API is accessible from user space. Currently,
+ the following ciphers are accessible:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>Message digest including keyed message digest (HMAC, CMAC)</para>
+ </listitem>
+
+ <listitem>
+ <para>Symmetric ciphers</para>
+ </listitem>
+
+ <listitem>
+ <para>AEAD ciphers</para>
+ </listitem>
+
+ <listitem>
+ <para>Random Number Generators</para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The interface is provided via socket type using the type AF_ALG.
+ In addition, the setsockopt option type is SOL_ALG. In case the
+ user space header files do not export these flags yet, use the
+ following macros:
+ </para>
+
+ <programlisting>
+#ifndef AF_ALG
+#define AF_ALG 38
+#endif
+#ifndef SOL_ALG
+#define SOL_ALG 279
+#endif
+ </programlisting>
+
+ <para>
+ A cipher is accessed with the same name as done for the in-kernel
+ API calls. This includes the generic vs. unique naming schema for
+ ciphers as well as the enforcement of priorities for generic names.
+ </para>
+
+ <para>
+ To interact with the kernel crypto API, a socket must be
+ created by the user space application. User space invokes the cipher
+ operation with the send()/write() system call family. The result of the
+ cipher operation is obtained with the read()/recv() system call family.
+ </para>
+
+ <para>
+ The following API calls assume that the socket descriptor
+ is already opened by the user space application and discusses only
+ the kernel crypto API specific invocations.
+ </para>
+
+ <para>
+ To initialize the socket interface, the following sequence has to
+ be performed by the consumer:
+ </para>
+
+ <orderedlist>
+ <listitem>
+ <para>
+ Create a socket of type AF_ALG with the struct sockaddr_alg
+ parameter specified below for the different cipher types.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Invoke bind with the socket descriptor
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Invoke accept with the socket descriptor. The accept system call
+ returns a new file descriptor that is to be used to interact with
+ the particular cipher instance. When invoking send/write or recv/read
+ system calls to send data to the kernel or obtain data from the
+ kernel, the file descriptor returned by accept must be used.
+ </para>
+ </listitem>
+ </orderedlist>
+ </sect1>
+
+ <sect1><title>In-place Cipher operation</title>
+ <para>
+ Just like the in-kernel operation of the kernel crypto API, the user
+ space interface allows the cipher operation in-place. That means that
+ the input buffer used for the send/write system call and the output
+ buffer used by the read/recv system call may be one and the same.
+ This is of particular interest for symmetric cipher operations where a
+ copying of the output data to its final destination can be avoided.
+ </para>
+
+ <para>
+ If a consumer on the other hand wants to maintain the plaintext and
+ the ciphertext in different memory locations, all a consumer needs
+ to do is to provide different memory pointers for the encryption and
+ decryption operation.
+ </para>
+ </sect1>
+
+ <sect1><title>Message Digest API</title>
+ <para>
+ The message digest type to be used for the cipher operation is
+ selected when invoking the bind syscall. bind requires the caller
+ to provide a filled struct sockaddr data structure. This data
+ structure must be filled as follows:
+ </para>
+
+ <programlisting>
+struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "hash", /* this selects the hash logic in the kernel */
+ .salg_name = "sha1" /* this is the cipher name */
+};
+ </programlisting>
+
+ <para>
+ The salg_type value "hash" applies to message digests and keyed
+ message digests. Though, a keyed message digest is referenced by
+ the appropriate salg_name. Please see below for the setsockopt
+ interface that explains how the key can be set for a keyed message
+ digest.
+ </para>
+
+ <para>
+ Using the send() system call, the application provides the data that
+ should be processed with the message digest. The send system call
+ allows the following flags to be specified:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ MSG_MORE: If this flag is set, the send system call acts like a
+ message digest update function where the final hash is not
+ yet calculated. If the flag is not set, the send system call
+ calculates the final message digest immediately.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ With the recv() system call, the application can read the message
+ digest from the kernel crypto API. If the buffer is too small for the
+ message digest, the flag MSG_TRUNC is set by the kernel.
+ </para>
+
+ <para>
+ In order to set a message digest key, the calling application must use
+ the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
+ operation is performed without the initial HMAC state change caused by
+ the key.
+ </para>
+ </sect1>
+
+ <sect1><title>Symmetric Cipher API</title>
+ <para>
+ The operation is very similar to the message digest discussion.
+ During initialization, the struct sockaddr data structure must be
+ filled as follows:
+ </para>
+
+ <programlisting>
+struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "skcipher", /* this selects the symmetric cipher */
+ .salg_name = "cbc(aes)" /* this is the cipher name */
+};
+ </programlisting>
+
+ <para>
+ Before data can be sent to the kernel using the write/send system
+ call family, the consumer must set the key. The key setting is
+ described with the setsockopt invocation below.
+ </para>
+
+ <para>
+ Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is
+ specified with the data structure provided by the sendmsg() system call.
+ </para>
+
+ <para>
+ The sendmsg system call parameter of struct msghdr is embedded into the
+ struct cmsghdr data structure. See recv(2) and cmsg(3) for more
+ information on how the cmsghdr data structure is used together with the
+ send/recv system call family. That cmsghdr data structure holds the
+ following information specified with a separate header instances:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ specification of the cipher operation type with one of these flags:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>ALG_OP_ENCRYPT - encryption of data</para>
+ </listitem>
+ <listitem>
+ <para>ALG_OP_DECRYPT - decryption of data</para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <para>
+ specification of the IV information marked with the flag ALG_SET_IV
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The send system call family allows the following flag to be specified:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ MSG_MORE: If this flag is set, the send system call acts like a
+ cipher update function where more input data is expected
+ with a subsequent invocation of the send system call.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Note: The kernel reports -EINVAL for any unexpected data. The caller
+ must make sure that all data matches the constraints given in
+ /proc/crypto for the selected cipher.
+ </para>
+
+ <para>
+ With the recv() system call, the application can read the result of
+ the cipher operation from the kernel crypto API. The output buffer
+ must be at least as large as to hold all blocks of the encrypted or
+ decrypted data. If the output data size is smaller, only as many
+ blocks are returned that fit into that output buffer size.
+ </para>
+ </sect1>
+
+ <sect1><title>AEAD Cipher API</title>
+ <para>
+ The operation is very similar to the symmetric cipher discussion.
+ During initialization, the struct sockaddr data structure must be
+ filled as follows:
+ </para>
+
+ <programlisting>
+struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "aead", /* this selects the symmetric cipher */
+ .salg_name = "gcm(aes)" /* this is the cipher name */
+};
+ </programlisting>
+
+ <para>
+ Before data can be sent to the kernel using the write/send system
+ call family, the consumer must set the key. The key setting is
+ described with the setsockopt invocation below.
+ </para>
+
+ <para>
+ In addition, before data can be sent to the kernel using the
+ write/send system call family, the consumer must set the authentication
+ tag size. To set the authentication tag size, the caller must use the
+ setsockopt invocation described below.
+ </para>
+
+ <para>
+ Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is
+ specified with the data structure provided by the sendmsg() system call.
+ </para>
+
+ <para>
+ The sendmsg system call parameter of struct msghdr is embedded into the
+ struct cmsghdr data structure. See recv(2) and cmsg(3) for more
+ information on how the cmsghdr data structure is used together with the
+ send/recv system call family. That cmsghdr data structure holds the
+ following information specified with a separate header instances:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ specification of the cipher operation type with one of these flags:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>ALG_OP_ENCRYPT - encryption of data</para>
+ </listitem>
+ <listitem>
+ <para>ALG_OP_DECRYPT - decryption of data</para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <para>
+ specification of the IV information marked with the flag ALG_SET_IV
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ specification of the associated authentication data (AAD) with the
+ flag ALG_SET_AEAD_ASSOCLEN. The AAD is sent to the kernel together
+ with the plaintext / ciphertext. See below for the memory structure.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The send system call family allows the following flag to be specified:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ MSG_MORE: If this flag is set, the send system call acts like a
+ cipher update function where more input data is expected
+ with a subsequent invocation of the send system call.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Note: The kernel reports -EINVAL for any unexpected data. The caller
+ must make sure that all data matches the constraints given in
+ /proc/crypto for the selected cipher.
+ </para>
+
+ <para>
+ With the recv() system call, the application can read the result of
+ the cipher operation from the kernel crypto API. The output buffer
+ must be at least as large as defined with the memory structure below.
+ If the output data size is smaller, the cipher operation is not performed.
+ </para>
+
+ <para>
+ The authenticated decryption operation may indicate an integrity error.
+ Such breach in integrity is marked with the -EBADMSG error code.
+ </para>
+
+ <sect2><title>AEAD Memory Structure</title>
+ <para>
+ The AEAD cipher operates with the following information that
+ is communicated between user and kernel space as one data stream:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>plaintext or ciphertext</para>
+ </listitem>
+
+ <listitem>
+ <para>associated authentication data (AAD)</para>
+ </listitem>
+
+ <listitem>
+ <para>authentication tag</para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The sizes of the AAD and the authentication tag are provided with
+ the sendmsg and setsockopt calls (see there). As the kernel knows
+ the size of the entire data stream, the kernel is now able to
+ calculate the right offsets of the data components in the data
+ stream.
+ </para>
+
+ <para>
+ The user space caller must arrange the aforementioned information
+ in the following order:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ AEAD encryption input: AAD || plaintext
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ AEAD decryption input: AAD || ciphertext || authentication tag
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The output buffer the user space caller provides must be at least as
+ large to hold the following data:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ AEAD encryption output: ciphertext || authentication tag
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ AEAD decryption output: plaintext
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ </sect1>
+
+ <sect1><title>Random Number Generator API</title>
+ <para>
+ Again, the operation is very similar to the other APIs.
+ During initialization, the struct sockaddr data structure must be
+ filled as follows:
+ </para>
+
+ <programlisting>
+struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "rng", /* this selects the symmetric cipher */
+ .salg_name = "drbg_nopr_sha256" /* this is the cipher name */
+};
+ </programlisting>
+
+ <para>
+ Depending on the RNG type, the RNG must be seeded. The seed is provided
+ using the setsockopt interface to set the key. For example, the
+ ansi_cprng requires a seed. The DRBGs do not require a seed, but
+ may be seeded.
+ </para>
+
+ <para>
+ Using the read()/recvmsg() system calls, random numbers can be obtained.
+ The kernel generates at most 128 bytes in one call. If user space
+ requires more data, multiple calls to read()/recvmsg() must be made.
+ </para>
+
+ <para>
+ WARNING: The user space caller may invoke the initially mentioned
+ accept system call multiple times. In this case, the returned file
+ descriptors have the same state.
+ </para>
+
+ </sect1>
+
+ <sect1><title>Zero-Copy Interface</title>
+ <para>
+ In addition to the send/write/read/recv system call familty, the AF_ALG
+ interface can be accessed with the zero-copy interface of splice/vmsplice.
+ As the name indicates, the kernel tries to avoid a copy operation into
+ kernel space.
+ </para>
+
+ <para>
+ The zero-copy operation requires data to be aligned at the page boundary.
+ Non-aligned data can be used as well, but may require more operations of
+ the kernel which would defeat the speed gains obtained from the zero-copy
+ interface.
+ </para>
+
+ <para>
+ The system-interent limit for the size of one zero-copy operation is
+ 16 pages. If more data is to be sent to AF_ALG, user space must slice
+ the input into segments with a maximum size of 16 pages.
+ </para>
+
+ <para>
+ Zero-copy can be used with the following code example (a complete working
+ example is provided with libkcapi):
+ </para>
+
+ <programlisting>
+int pipes[2];
+
+pipe(pipes);
+/* input data in iov */
+vmsplice(pipes[1], iov, iovlen, SPLICE_F_GIFT);
+/* opfd is the file descriptor returned from accept() system call */
+splice(pipes[0], NULL, opfd, NULL, ret, 0);
+read(opfd, out, outlen);
+ </programlisting>
+
+ </sect1>
+
+ <sect1><title>Setsockopt Interface</title>
+ <para>
+ In addition to the read/recv and send/write system call handling
+ to send and retrieve data subject to the cipher operation, a consumer
+ also needs to set the additional information for the cipher operation.
+ This additional information is set using the setsockopt system call
+ that must be invoked with the file descriptor of the open cipher
+ (i.e. the file descriptor returned by the accept system call).
+ </para>
+
+ <para>
+ Each setsockopt invocation must use the level SOL_ALG.
+ </para>
+
+ <para>
+ The setsockopt interface allows setting the following data using
+ the mentioned optname:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ ALG_SET_KEY -- Setting the key. Key setting is applicable to:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>the skcipher cipher type (symmetric ciphers)</para>
+ </listitem>
+ <listitem>
+ <para>the hash cipher type (keyed message digests)</para>
+ </listitem>
+ <listitem>
+ <para>the AEAD cipher type</para>
+ </listitem>
+ <listitem>
+ <para>the RNG cipher type to provide the seed</para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <para>
+ ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size
+ for AEAD ciphers. For a encryption operation, the authentication
+ tag of the given size will be generated. For a decryption operation,
+ the provided ciphertext is assumed to contain an authentication tag
+ of the given size (see section about AEAD memory layout below).
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ </sect1>
+
+ <sect1><title>User space API example</title>
+ <para>
+ Please see [1] for libkcapi which provides an easy-to-use wrapper
+ around the aforementioned Netlink kernel interface. [1] also contains
+ a test application that invokes all libkcapi API calls.
+ </para>
+
+ <para>
+ [1] http://www.chronox.de/libkcapi.html
+ </para>
+
+ </sect1>
+
+ </chapter>
+
<chapter id="API"><title>Programming Interface</title>
<sect1><title>Block Cipher Context Data Structures</title>
!Pinclude/linux/crypto.h Block Cipher Context Data Structures
diff --git a/Documentation/crypto/crypto-API-userspace.txt b/Documentation/crypto/crypto-API-userspace.txt
deleted file mode 100644
index ac619cd90300..000000000000
--- a/Documentation/crypto/crypto-API-userspace.txt
+++ /dev/null
@@ -1,205 +0,0 @@
-Introduction
-============
-
-The concepts of the kernel crypto API visible to kernel space is fully
-applicable to the user space interface as well. Therefore, the kernel crypto API
-high level discussion for the in-kernel use cases applies here as well.
-
-The major difference, however, is that user space can only act as a consumer
-and never as a provider of a transformation or cipher algorithm.
-
-The following covers the user space interface exported by the kernel crypto
-API. A working example of this description is libkcapi that can be obtained from
-[1]. That library can be used by user space applications that require
-cryptographic services from the kernel.
-
-Some details of the in-kernel kernel crypto API aspects do not
-apply to user space, however. This includes the difference between synchronous
-and asynchronous invocations. The user space API call is fully synchronous.
-In addition, only a subset of all cipher types are available as documented
-below.
-
-
-User space API general remarks
-==============================
-
-The kernel crypto API is accessible from user space. Currently, the following
-ciphers are accessible:
-
- * Message digest including keyed message digest (HMAC, CMAC)
-
- * Symmetric ciphers
-
-Note, AEAD ciphers are currently not supported via the symmetric cipher
-interface.
-
-The interface is provided via Netlink using the type AF_ALG. In addition, the
-setsockopt option type is SOL_ALG. In case the user space header files do not
-export these flags yet, use the following macros:
-
-#ifndef AF_ALG
-#define AF_ALG 38
-#endif
-#ifndef SOL_ALG
-#define SOL_ALG 279
-#endif
-
-A cipher is accessed with the same name as done for the in-kernel API calls.
-This includes the generic vs. unique naming schema for ciphers as well as the
-enforcement of priorities for generic names.
-
-To interact with the kernel crypto API, a Netlink socket must be created by
-the user space application. User space invokes the cipher operation with the
-send/write system call family. The result of the cipher operation is obtained
-with the read/recv system call family.
-
-The following API calls assume that the Netlink socket descriptor is already
-opened by the user space application and discusses only the kernel crypto API
-specific invocations.
-
-To initialize a Netlink interface, the following sequence has to be performed
-by the consumer:
-
- 1. Create a socket of type AF_ALG with the struct sockaddr_alg parameter
- specified below for the different cipher types.
-
- 2. Invoke bind with the socket descriptor
-
- 3. Invoke accept with the socket descriptor. The accept system call
- returns a new file descriptor that is to be used to interact with
- the particular cipher instance. When invoking send/write or recv/read
- system calls to send data to the kernel or obtain data from the
- kernel, the file descriptor returned by accept must be used.
-
-In-place cipher operation
-=========================
-
-Just like the in-kernel operation of the kernel crypto API, the user space
-interface allows the cipher operation in-place. That means that the input buffer
-used for the send/write system call and the output buffer used by the read/recv
-system call may be one and the same. This is of particular interest for
-symmetric cipher operations where a copying of the output data to its final
-destination can be avoided.
-
-If a consumer on the other hand wants to maintain the plaintext and the
-ciphertext in different memory locations, all a consumer needs to do is to
-provide different memory pointers for the encryption and decryption operation.
-
-Message digest API
-==================
-
-The message digest type to be used for the cipher operation is selected when
-invoking the bind syscall. bind requires the caller to provide a filled
-struct sockaddr data structure. This data structure must be filled as follows:
-
-struct sockaddr_alg sa = {
- .salg_family = AF_ALG,
- .salg_type = "hash", /* this selects the hash logic in the kernel */
- .salg_name = "sha1" /* this is the cipher name */
-};
-
-The salg_type value "hash" applies to message digests and keyed message digests.
-Though, a keyed message digest is referenced by the appropriate salg_name.
-Please see below for the setsockopt interface that explains how the key can be
-set for a keyed message digest.
-
-Using the send() system call, the application provides the data that should be
-processed with the message digest. The send system call allows the following
-flags to be specified:
-
- * MSG_MORE: If this flag is set, the send system call acts like a
- message digest update function where the final hash is not
- yet calculated. If the flag is not set, the send system call
- calculates the final message digest immediately.
-
-With the recv() system call, the application can read the message digest from
-the kernel crypto API. If the buffer is too small for the message digest, the
-flag MSG_TRUNC is set by the kernel.
-
-In order to set a message digest key, the calling application must use the
-setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC operation is
-performed without the initial HMAC state change caused by the key.
-
-
-Symmetric cipher API
-====================
-
-The operation is very similar to the message digest discussion. During
-initialization, the struct sockaddr data structure must be filled as follows:
-
-struct sockaddr_alg sa = {
- .salg_family = AF_ALG,
- .salg_type = "skcipher", /* this selects the symmetric cipher */
- .salg_name = "cbc(aes)" /* this is the cipher name */
-};
-
-Before data can be sent to the kernel using the write/send system call family,
-the consumer must set the key. The key setting is described with the setsockopt
-invocation below.
-
-Using the sendmsg() system call, the application provides the data that should
-be processed for encryption or decryption. In addition, the IV is specified
-with the data structure provided by the sendmsg() system call.
-
-The sendmsg system call parameter of struct msghdr is embedded into the
-struct cmsghdr data structure. See recv(2) and cmsg(3) for more information
-on how the cmsghdr data structure is used together with the send/recv system
-call family. That cmsghdr data structure holds the following information
-specified with a separate header instances:
-
- * specification of the cipher operation type with one of these flags:
- ALG_OP_ENCRYPT - encryption of data
- ALG_OP_DECRYPT - decryption of data
-
- * specification of the IV information marked with the flag ALG_SET_IV
-
-The send system call family allows the following flag to be specified:
-
- * MSG_MORE: If this flag is set, the send system call acts like a
- cipher update function where more input data is expected
- with a subsequent invocation of the send system call.
-
-Note: The kernel reports -EINVAL for any unexpected data. The caller must
-make sure that all data matches the constraints given in /proc/crypto for the
-selected cipher.
-
-With the recv() system call, the application can read the result of the
-cipher operation from the kernel crypto API. The output buffer must be at least
-as large as to hold all blocks of the encrypted or decrypted data. If the output
-data size is smaller, only as many blocks are returned that fit into that
-output buffer size.
-
-Setsockopt interface
-====================
-
-In addition to the read/recv and send/write system call handling to send and
-retrieve data subject to the cipher operation, a consumer also needs to set
-the additional information for the cipher operation. This additional information
-is set using the setsockopt system call that must be invoked with the file
-descriptor of the open cipher (i.e. the file descriptor returned by the
-accept system call).
-
-Each setsockopt invocation must use the level SOL_ALG.
-
-The setsockopt interface allows setting the following data using the mentioned
-optname:
-
- * ALG_SET_KEY -- Setting the key. Key setting is applicable to:
-
- - the skcipher cipher type (symmetric ciphers)
-
- - the hash cipher type (keyed message digests)
-
-User space API example
-======================
-
-Please see [1] for libkcapi which provides an easy-to-use wrapper around the
-aforementioned Netlink kernel interface. [1] also contains a test application
-that invokes all libkcapi API calls.
-
-[1] http://www.chronox.de/libkcapi.html
-
-Author
-======
-
-Stephan Mueller <smueller@chronox.de>
diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt
new file mode 100644
index 000000000000..91a3d757d641
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/img-hash.txt
@@ -0,0 +1,27 @@
+Imagination Technologies hardware hash accelerator
+
+The hash accelerator provides hardware hashing acceleration for
+SHA1, SHA224, SHA256 and MD5 hashes
+
+Required properties:
+
+- compatible : "img,hash-accelerator"
+- reg : Offset and length of the register set for the module, and the DMA port
+- interrupts : The designated IRQ line for the hashing module.
+- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
+- dma-names : Should be "tx"
+- clocks : Clock specifiers
+- clock-names : "sys" Used to clock the hash block registers
+ "hash" Used to clock data through the accelerator
+
+Example:
+
+ hash: hash@18149600 {
+ compatible = "img,hash-accelerator";
+ reg = <0x18149600 0x100>, <0x18101100 0x4>;
+ interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&dma 8 0xffffffff 0>;
+ dma-names = "tx";
+ clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
+ clock-names = "sys", "hash";
+ };
diff --git a/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt b/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt
new file mode 100644
index 000000000000..e25a456664b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt
@@ -0,0 +1,12 @@
+HWRNG support for the iproc-rng200 driver
+
+Required properties:
+- compatible : "brcm,iproc-rng200"
+- reg : base address and size of control register block
+
+Example:
+
+rng {
+ compatible = "brcm,iproc-rng200";
+ reg = <0x18032000 0x28>;
+};
diff --git a/MAINTAINERS b/MAINTAINERS
index 6ee1e79ea16b..7a8f367b4ebc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2825,6 +2825,7 @@ L: linux-crypto@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git
S: Maintained
F: Documentation/crypto/
+F: Documentation/DocBook/crypto-API.tmpl
F: arch/*/crypto/
F: crypto/
F: drivers/crypto/
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index da1266c53c13..7cbf4ef5c6fd 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2175,6 +2175,9 @@ source "arch/arm/Kconfig.debug"
source "security/Kconfig"
source "crypto/Kconfig"
+if CRYPTO
+source "arch/arm/crypto/Kconfig"
+endif
source "lib/Kconfig"
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
new file mode 100644
index 000000000000..8da2207b0072
--- /dev/null
+++ b/arch/arm/crypto/Kconfig
@@ -0,0 +1,130 @@
+
+menuconfig ARM_CRYPTO
+ bool "ARM Accelerated Cryptographic Algorithms"
+ depends on ARM
+ help
+ Say Y here to choose from a selection of cryptographic algorithms
+ implemented using ARM specific CPU features or instructions.
+
+if ARM_CRYPTO
+
+config CRYPTO_SHA1_ARM
+ tristate "SHA1 digest algorithm (ARM-asm)"
+ select CRYPTO_SHA1
+ select CRYPTO_HASH
+ help
+ SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+ using optimized ARM assembler.
+
+config CRYPTO_SHA1_ARM_NEON
+ tristate "SHA1 digest algorithm (ARM NEON)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_SHA1_ARM
+ select CRYPTO_SHA1
+ select CRYPTO_HASH
+ help
+ SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+ using optimized ARM NEON assembly, when NEON instructions are
+ available.
+
+config CRYPTO_SHA1_ARM_CE
+ tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_SHA1_ARM
+ select CRYPTO_HASH
+ help
+ SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
+ using special ARMv8 Crypto Extensions.
+
+config CRYPTO_SHA2_ARM_CE
+ tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_SHA256_ARM
+ select CRYPTO_HASH
+ help
+ SHA-256 secure hash standard (DFIPS 180-2) implemented
+ using special ARMv8 Crypto Extensions.
+
+config CRYPTO_SHA256_ARM
+ tristate "SHA-224/256 digest algorithm (ARM-asm and NEON)"
+ select CRYPTO_HASH
+ depends on !CPU_V7M
+ help
+ SHA-256 secure hash standard (DFIPS 180-2) implemented
+ using optimized ARM assembler and NEON, when available.
+
+config CRYPTO_SHA512_ARM_NEON
+ tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_SHA512
+ select CRYPTO_HASH
+ help
+ SHA-512 secure hash standard (DFIPS 180-2) implemented
+ using ARM NEON instructions, when available.
+
+ This version of SHA implements a 512 bit hash with 256 bits of
+ security against collision attacks.
+
+ This code also includes SHA-384, a 384 bit hash with 192 bits
+ of security against collision attacks.
+
+config CRYPTO_AES_ARM
+ tristate "AES cipher algorithms (ARM-asm)"
+ depends on ARM
+ select CRYPTO_ALGAPI
+ select CRYPTO_AES
+ help
+ Use optimized AES assembler routines for ARM platforms.
+
+ AES cipher algorithms (FIPS-197). AES uses the Rijndael
+ algorithm.
+
+ Rijndael appears to be consistently a very good performer in
+ both hardware and software across a wide range of computing
+ environments regardless of its use in feedback or non-feedback
+ modes. Its key setup time is excellent, and its key agility is
+ good. Rijndael's very low memory requirements make it very well
+ suited for restricted-space environments, in which it also
+ demonstrates excellent performance. Rijndael's operations are
+ among the easiest to defend against power and timing attacks.
+
+ The AES specifies three key sizes: 128, 192 and 256 bits
+
+ See <http://csrc.nist.gov/encryption/aes/> for more information.
+
+config CRYPTO_AES_ARM_BS
+ tristate "Bit sliced AES using NEON instructions"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_AES_ARM
+ select CRYPTO_ABLK_HELPER
+ help
+ Use a faster and more secure NEON based implementation of AES in CBC,
+ CTR and XTS modes
+
+ Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
+ and for XTS mode encryption, CBC and XTS mode decryption speedup is
+ around 25%. (CBC encryption speed is not affected by this driver.)
+ This implementation does not rely on any lookup tables so it is
+ believed to be invulnerable to cache timing attacks.
+
+config CRYPTO_AES_ARM_CE
+ tristate "Accelerated AES using ARMv8 Crypto Extensions"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_ABLK_HELPER
+ help
+ Use an implementation of AES in CBC, CTR and XTS modes that uses
+ ARMv8 Crypto Extensions
+
+config CRYPTO_GHASH_ARM_CE
+ tristate "PMULL-accelerated GHASH using ARMv8 Crypto Extensions"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_HASH
+ select CRYPTO_CRYPTD
+ help
+ Use an implementation of GHASH (used by the GCM AEAD chaining mode)
+ that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
+ that is part of the ARMv8 Crypto Extensions
+
+endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index b48fa341648d..6ea828241fcb 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -6,13 +6,35 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
+ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
+
+ifneq ($(ce-obj-y)$(ce-obj-m),)
+ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
+obj-y += $(ce-obj-y)
+obj-m += $(ce-obj-m)
+else
+$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
+$(warning $(ce-obj-y) $(ce-obj-m))
+endif
+endif
+
aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
+sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
+sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
+sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
+sha2-arm-ce-y := sha2-ce-core.o sha2-ce-glue.o
+aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
+ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
@@ -20,4 +42,7 @@ quiet_cmd_perl = PERL $@
$(src)/aesbs-core.S_shipped: $(src)/bsaes-armv7.pl
$(call cmd,perl)
-.PRECIOUS: $(obj)/aesbs-core.S
+$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
+ $(call cmd,perl)
+
+.PRECIOUS: $(obj)/aesbs-core.S $(obj)/sha256-core.S
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
new file mode 100644
index 000000000000..8cfa468ee570
--- /dev/null
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -0,0 +1,518 @@
+/*
+ * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .fpu crypto-neon-fp-armv8
+ .align 3
+
+ .macro enc_round, state, key
+ aese.8 \state, \key
+ aesmc.8 \state, \state
+ .endm
+
+ .macro dec_round, state, key
+ aesd.8 \state, \key
+ aesimc.8 \state, \state
+ .endm
+
+ .macro enc_dround, key1, key2
+ enc_round q0, \key1
+ enc_round q0, \key2
+ .endm
+
+ .macro dec_dround, key1, key2
+ dec_round q0, \key1
+ dec_round q0, \key2
+ .endm
+
+ .macro enc_fround, key1, key2, key3
+ enc_round q0, \key1
+ aese.8 q0, \key2
+ veor q0, q0, \key3
+ .endm
+
+ .macro dec_fround, key1, key2, key3
+ dec_round q0, \key1
+ aesd.8 q0, \key2
+ veor q0, q0, \key3
+ .endm
+
+ .macro enc_dround_3x, key1, key2
+ enc_round q0, \key1
+ enc_round q1, \key1
+ enc_round q2, \key1
+ enc_round q0, \key2
+ enc_round q1, \key2
+ enc_round q2, \key2
+ .endm
+
+ .macro dec_dround_3x, key1, key2
+ dec_round q0, \key1
+ dec_round q1, \key1
+ dec_round q2, \key1
+ dec_round q0, \key2
+ dec_round q1, \key2
+ dec_round q2, \key2
+ .endm
+
+ .macro enc_fround_3x, key1, key2, key3
+ enc_round q0, \key1
+ enc_round q1, \key1
+ enc_round q2, \key1
+ aese.8 q0, \key2
+ aese.8 q1, \key2
+ aese.8 q2, \key2
+ veor q0, q0, \key3
+ veor q1, q1, \key3
+ veor q2, q2, \key3
+ .endm
+
+ .macro dec_fround_3x, key1, key2, key3
+ dec_round q0, \key1
+ dec_round q1, \key1
+ dec_round q2, \key1
+ aesd.8 q0, \key2
+ aesd.8 q1, \key2
+ aesd.8 q2, \key2
+ veor q0, q0, \key3
+ veor q1, q1, \key3
+ veor q2, q2, \key3
+ .endm
+
+ .macro do_block, dround, fround
+ cmp r3, #12 @ which key size?
+ vld1.8 {q10-q11}, [ip]!
+ \dround q8, q9
+ vld1.8 {q12-q13}, [ip]!
+ \dround q10, q11
+ vld1.8 {q10-q11}, [ip]!
+ \dround q12, q13
+ vld1.8 {q12-q13}, [ip]!
+ \dround q10, q11
+ blo 0f @ AES-128: 10 rounds
+ vld1.8 {q10-q11}, [ip]!
+ beq 1f @ AES-192: 12 rounds
+ \dround q12, q13
+ vld1.8 {q12-q13}, [ip]
+ \dround q10, q11
+0: \fround q12, q13, q14
+ bx lr
+
+1: \dround q12, q13
+ \fround q10, q11, q14
+ bx lr
+ .endm
+
+ /*
+ * Internal, non-AAPCS compliant functions that implement the core AES
+ * transforms. These should preserve all registers except q0 - q2 and ip
+ * Arguments:
+ * q0 : first in/output block
+ * q1 : second in/output block (_3x version only)
+ * q2 : third in/output block (_3x version only)
+ * q8 : first round key
+ * q9 : secound round key
+ * ip : address of 3rd round key
+ * q14 : final round key
+ * r3 : number of rounds
+ */
+ .align 6
+aes_encrypt:
+ add ip, r2, #32 @ 3rd round key
+.Laes_encrypt_tweak:
+ do_block enc_dround, enc_fround
+ENDPROC(aes_encrypt)
+
+ .align 6
+aes_decrypt:
+ add ip, r2, #32 @ 3rd round key
+ do_block dec_dround, dec_fround
+ENDPROC(aes_decrypt)
+
+ .align 6
+aes_encrypt_3x:
+ add ip, r2, #32 @ 3rd round key
+ do_block enc_dround_3x, enc_fround_3x
+ENDPROC(aes_encrypt_3x)
+
+ .align 6
+aes_decrypt_3x:
+ add ip, r2, #32 @ 3rd round key
+ do_block dec_dround_3x, dec_fround_3x
+ENDPROC(aes_decrypt_3x)
+
+ .macro prepare_key, rk, rounds
+ add ip, \rk, \rounds, lsl #4
+ vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
+ vld1.8 {q14}, [ip] @ load last round key
+ .endm
+
+ /*
+ * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks)
+ * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks)
+ */
+ENTRY(ce_aes_ecb_encrypt)
+ push {r4, lr}
+ ldr r4, [sp, #8]
+ prepare_key r2, r3
+.Lecbencloop3x:
+ subs r4, r4, #3
+ bmi .Lecbenc1x
+ vld1.8 {q0-q1}, [r1, :64]!
+ vld1.8 {q2}, [r1, :64]!
+ bl aes_encrypt_3x
+ vst1.8 {q0-q1}, [r0, :64]!
+ vst1.8 {q2}, [r0, :64]!
+ b .Lecbencloop3x
+.Lecbenc1x:
+ adds r4, r4, #3
+ beq .Lecbencout
+.Lecbencloop:
+ vld1.8 {q0}, [r1, :64]!
+ bl aes_encrypt
+ vst1.8 {q0}, [r0, :64]!
+ subs r4, r4, #1
+ bne .Lecbencloop
+.Lecbencout:
+ pop {r4, pc}
+ENDPROC(ce_aes_ecb_encrypt)
+
+ENTRY(ce_aes_ecb_decrypt)
+ push {r4, lr}
+ ldr r4, [sp, #8]
+ prepare_key r2, r3
+.Lecbdecloop3x:
+ subs r4, r4, #3
+ bmi .Lecbdec1x
+ vld1.8 {q0-q1}, [r1, :64]!
+ vld1.8 {q2}, [r1, :64]!
+ bl aes_decrypt_3x
+ vst1.8 {q0-q1}, [r0, :64]!
+ vst1.8 {q2}, [r0, :64]!
+ b .Lecbdecloop3x
+.Lecbdec1x:
+ adds r4, r4, #3
+ beq .Lecbdecout
+.Lecbdecloop:
+ vld1.8 {q0}, [r1, :64]!
+ bl aes_decrypt
+ vst1.8 {q0}, [r0, :64]!
+ subs r4, r4, #1
+ bne .Lecbdecloop
+.Lecbdecout:
+ pop {r4, pc}
+ENDPROC(ce_aes_ecb_decrypt)
+
+ /*
+ * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 iv[])
+ */
+ENTRY(ce_aes_cbc_encrypt)
+ push {r4-r6, lr}
+ ldrd r4, r5, [sp, #16]
+ vld1.8 {q0}, [r5]
+ prepare_key r2, r3
+.Lcbcencloop:
+ vld1.8 {q1}, [r1, :64]! @ get next pt block
+ veor q0, q0, q1 @ ..and xor with iv
+ bl aes_encrypt
+ vst1.8 {q0}, [r0, :64]!
+ subs r4, r4, #1
+ bne .Lcbcencloop
+ vst1.8 {q0}, [r5]
+ pop {r4-r6, pc}
+ENDPROC(ce_aes_cbc_encrypt)
+
+ENTRY(ce_aes_cbc_decrypt)
+ push {r4-r6, lr}
+ ldrd r4, r5, [sp, #16]
+ vld1.8 {q6}, [r5] @ keep iv in q6
+ prepare_key r2, r3
+.Lcbcdecloop3x:
+ subs r4, r4, #3
+ bmi .Lcbcdec1x
+ vld1.8 {q0-q1}, [r1, :64]!
+ vld1.8 {q2}, [r1, :64]!
+ vmov q3, q0
+ vmov q4, q1
+ vmov q5, q2
+ bl aes_decrypt_3x
+ veor q0, q0, q6
+ veor q1, q1, q3
+ veor q2, q2, q4
+ vmov q6, q5
+ vst1.8 {q0-q1}, [r0, :64]!
+ vst1.8 {q2}, [r0, :64]!
+ b .Lcbcdecloop3x
+.Lcbcdec1x:
+ adds r4, r4, #3
+ beq .Lcbcdecout
+ vmov q15, q14 @ preserve last round key
+.Lcbcdecloop:
+ vld1.8 {q0}, [r1, :64]! @ get next ct block
+ veor q14, q15, q6 @ combine prev ct with last key
+ vmov q6, q0
+ bl aes_decrypt
+ vst1.8 {q0}, [r0, :64]!
+ subs r4, r4, #1
+ bne .Lcbcdecloop
+.Lcbcdecout:
+ vst1.8 {q6}, [r5] @ keep iv in q6
+ pop {r4-r6, pc}
+ENDPROC(ce_aes_cbc_decrypt)
+
+ /*
+ * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+ * int blocks, u8 ctr[])
+ */
+ENTRY(ce_aes_ctr_encrypt)
+ push {r4-r6, lr}
+ ldrd r4, r5, [sp, #16]
+ vld1.8 {q6}, [r5] @ load ctr
+ prepare_key r2, r3
+ vmov r6, s27 @ keep swabbed ctr in r6
+ rev r6, r6
+ cmn r6, r4 @ 32 bit overflow?
+ bcs .Lctrloop
+.Lctrloop3x:
+ subs r4, r4, #3
+ bmi .Lctr1x
+ add r6, r6, #1
+ vmov q0, q6
+ vmov q1, q6
+ rev ip, r6
+ add r6, r6, #1
+ vmov q2, q6
+ vmov s7, ip
+ rev ip, r6
+ add r6, r6, #1
+ vmov s11, ip
+ vld1.8 {q3-q4}, [r1, :64]!
+ vld1.8 {q5}, [r1, :64]!
+ bl aes_encrypt_3x
+ veor q0, q0, q3
+ veor q1, q1, q4
+ veor q2, q2, q5
+ rev ip, r6
+ vst1.8 {q0-q1}, [r0, :64]!
+ vst1.8 {q2}, [r0, :64]!
+ vmov s27, ip
+ b .Lctrloop3x
+.Lctr1x:
+ adds r4, r4, #3
+ beq .Lctrout
+.Lctrloop:
+ vmov q0, q6
+ bl aes_encrypt
+ subs r4, r4, #1
+ bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
+ vld1.8 {q3}, [r1, :64]!
+ veor q3, q0, q3
+ vst1.8 {q3}, [r0, :64]!
+
+ adds r6, r6, #1 @ increment BE ctr
+ rev ip, r6
+ vmov s27, ip
+ bcs .Lctrcarry
+ teq r4, #0
+ bne .Lctrloop
+.Lctrout:
+ vst1.8 {q6}, [r5]
+ pop {r4-r6, pc}
+
+.Lctrhalfblock:
+ vld1.8 {d1}, [r1, :64]
+ veor d0, d0, d1
+ vst1.8 {d0}, [r0, :64]
+ pop {r4-r6, pc}
+
+.Lctrcarry:
+ .irp sreg, s26, s25, s24
+ vmov ip, \sreg @ load next word of ctr
+ rev ip, ip @ ... to handle the carry
+ adds ip, ip, #1
+ rev ip, ip
+ vmov \sreg, ip
+ bcc 0f
+ .endr
+0: teq r4, #0
+ beq .Lctrout
+ b .Lctrloop
+ENDPROC(ce_aes_ctr_encrypt)
+
+ /*
+ * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 iv[], u8 const rk2[], int first)
+ * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int blocks, u8 iv[], u8 const rk2[], int first)
+ */
+
+ .macro next_tweak, out, in, const, tmp
+ vshr.s64 \tmp, \in, #63
+ vand \tmp, \tmp, \const
+ vadd.u64 \out, \in, \in
+ vext.8 \tmp, \tmp, \tmp, #8
+ veor \out, \out, \tmp
+ .endm
+
+ .align 3
+.Lxts_mul_x:
+ .quad 1, 0x87
+
+ce_aes_xts_init:
+ vldr d14, .Lxts_mul_x
+ vldr d15, .Lxts_mul_x + 8
+
+ ldrd r4, r5, [sp, #16] @ load args
+ ldr r6, [sp, #28]
+ vld1.8 {q0}, [r5] @ load iv
+ teq r6, #1 @ start of a block?
+ bxne lr
+
+ @ Encrypt the IV in q0 with the second AES key. This should only
+ @ be done at the start of a block.
+ ldr r6, [sp, #24] @ load AES key 2
+ prepare_key r6, r3
+ add ip, r6, #32 @ 3rd round key of key 2
+ b .Laes_encrypt_tweak @ tail call
+ENDPROC(ce_aes_xts_init)
+
+ENTRY(ce_aes_xts_encrypt)
+ push {r4-r6, lr}
+
+ bl ce_aes_xts_init @ run shared prologue
+ prepare_key r2, r3
+ vmov q3, q0
+
+ teq r6, #0 @ start of a block?
+ bne .Lxtsenc3x
+
+.Lxtsencloop3x:
+ next_tweak q3, q3, q7, q6
+.Lxtsenc3x:
+ subs r4, r4, #3
+ bmi .Lxtsenc1x
+ vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
+ vld1.8 {q2}, [r1, :64]!
+ next_tweak q4, q3, q7, q6
+ veor q0, q0, q3
+ next_tweak q5, q4, q7, q6
+ veor q1, q1, q4
+ veor q2, q2, q5
+ bl aes_encrypt_3x
+ veor q0, q0, q3
+ veor q1, q1, q4
+ veor q2, q2, q5
+ vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
+ vst1.8 {q2}, [r0, :64]!
+ vmov q3, q5
+ teq r4, #0
+ beq .Lxtsencout
+ b .Lxtsencloop3x
+.Lxtsenc1x:
+ adds r4, r4, #3
+ beq .Lxtsencout
+.Lxtsencloop:
+ vld1.8 {q0}, [r1, :64]!
+ veor q0, q0, q3
+ bl aes_encrypt
+ veor q0, q0, q3
+ vst1.8 {q0}, [r0, :64]!
+ subs r4, r4, #1
+ beq .Lxtsencout
+ next_tweak q3, q3, q7, q6
+ b .Lxtsencloop
+.Lxtsencout:
+ vst1.8 {q3}, [r5]
+ pop {r4-r6, pc}
+ENDPROC(ce_aes_xts_encrypt)
+
+
+ENTRY(ce_aes_xts_decrypt)
+ push {r4-r6, lr}
+
+ bl ce_aes_xts_init @ run shared prologue
+ prepare_key r2, r3
+ vmov q3, q0
+
+ teq r6, #0 @ start of a block?
+ bne .Lxtsdec3x
+
+.Lxtsdecloop3x:
+ next_tweak q3, q3, q7, q6
+.Lxtsdec3x:
+ subs r4, r4, #3
+ bmi .Lxtsdec1x
+ vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
+ vld1.8 {q2}, [r1, :64]!
+ next_tweak q4, q3, q7, q6
+ veor q0, q0, q3
+ next_tweak q5, q4, q7, q6
+ veor q1, q1, q4
+ veor q2, q2, q5
+ bl aes_decrypt_3x
+ veor q0, q0, q3
+ veor q1, q1, q4
+ veor q2, q2, q5
+ vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
+ vst1.8 {q2}, [r0, :64]!
+ vmov q3, q5
+ teq r4, #0
+ beq .Lxtsdecout
+ b .Lxtsdecloop3x
+.Lxtsdec1x:
+ adds r4, r4, #3
+ beq .Lxtsdecout
+.Lxtsdecloop:
+ vld1.8 {q0}, [r1, :64]!
+ veor q0, q0, q3
+ add ip, r2, #32 @ 3rd round key
+ bl aes_decrypt
+ veor q0, q0, q3
+ vst1.8 {q0}, [r0, :64]!
+ subs r4, r4, #1
+ beq .Lxtsdecout
+ next_tweak q3, q3, q7, q6
+ b .Lxtsdecloop
+.Lxtsdecout:
+ vst1.8 {q3}, [r5]
+ pop {r4-r6, pc}
+ENDPROC(ce_aes_xts_decrypt)
+
+ /*
+ * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
+ * AES sbox substitution on each byte in
+ * 'input'
+ */
+ENTRY(ce_aes_sub)
+ vdup.32 q1, r0
+ veor q0, q0, q0
+ aese.8 q0, q1
+ vmov r0, s0
+ bx lr
+ENDPROC(ce_aes_sub)
+
+ /*
+ * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
+ * operation on round key *src
+ */
+ENTRY(ce_aes_invert)
+ vld1.8 {q0}, [r1]
+ aesimc.8 q0, q0
+ vst1.8 {q0}, [r0]
+ bx lr
+ENDPROC(ce_aes_invert)
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
new file mode 100644
index 000000000000..b445a5d56f43
--- /dev/null
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -0,0 +1,524 @@
+/*
+ * aes-ce-glue.c - wrapper code for ARMv8 AES
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/hwcap.h>
+#include <crypto/aes.h>
+#include <crypto/ablk_helper.h>
+#include <crypto/algapi.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+/* defined in aes-ce-core.S */
+asmlinkage u32 ce_aes_sub(u32 input);
+asmlinkage void ce_aes_invert(void *dst, void *src);
+
+asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks);
+asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks);
+
+asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]);
+asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 iv[]);
+
+asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+ int rounds, int blocks, u8 ctr[]);
+
+asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
+ int rounds, int blocks, u8 iv[],
+ u8 const rk2[], int first);
+asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
+ int rounds, int blocks, u8 iv[],
+ u8 const rk2[], int first);
+
+struct aes_block {
+ u8 b[AES_BLOCK_SIZE];
+};
+
+static int num_rounds(struct crypto_aes_ctx *ctx)
+{
+ /*
+ * # of rounds specified by AES:
+ * 128 bit key 10 rounds
+ * 192 bit key 12 rounds
+ * 256 bit key 14 rounds
+ * => n byte key => 6 + (n/4) rounds
+ */
+ return 6 + ctx->key_length / 4;
+}
+
+static int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+ unsigned int key_len)
+{
+ /*
+ * The AES key schedule round constants
+ */
+ static u8 const rcon[] = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
+ };
+
+ u32 kwords = key_len / sizeof(u32);
+ struct aes_block *key_enc, *key_dec;
+ int i, j;
+
+ if (key_len != AES_KEYSIZE_128 &&
+ key_len != AES_KEYSIZE_192 &&
+ key_len != AES_KEYSIZE_256)
+ return -EINVAL;
+
+ memcpy(ctx->key_enc, in_key, key_len);
+ ctx->key_length = key_len;
+
+ kernel_neon_begin();
+ for (i = 0; i < sizeof(rcon); i++) {
+ u32 *rki = ctx->key_enc + (i * kwords);
+ u32 *rko = rki + kwords;
+
+ rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
+ rko[0] = rko[0] ^ rki[0] ^ rcon[i];
+ rko[1] = rko[0] ^ rki[1];
+ rko[2] = rko[1] ^ rki[2];
+ rko[3] = rko[2] ^ rki[3];
+
+ if (key_len == AES_KEYSIZE_192) {
+ if (i >= 7)
+ break;
+ rko[4] = rko[3] ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ } else if (key_len == AES_KEYSIZE_256) {
+ if (i >= 6)
+ break;
+ rko[4] = ce_aes_sub(rko[3]) ^ rki[4];
+ rko[5] = rko[4] ^ rki[5];
+ rko[6] = rko[5] ^ rki[6];
+ rko[7] = rko[6] ^ rki[7];
+ }
+ }
+
+ /*
+ * Generate the decryption keys for the Equivalent Inverse Cipher.
+ * This involves reversing the order of the round keys, and applying
+ * the Inverse Mix Columns transformation on all but the first and
+ * the last one.
+ */
+ key_enc = (struct aes_block *)ctx->key_enc;
+ key_dec = (struct aes_block *)ctx->key_dec;
+ j = num_rounds(ctx);
+
+ key_dec[0] = key_enc[j];
+ for (i = 1, j--; j > 0; i++, j--)
+ ce_aes_invert(key_dec + i, key_enc + j);
+ key_dec[i] = key_enc[0];
+
+ kernel_neon_end();
+ return 0;
+}
+
+static int ce_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ int ret;
+
+ ret = ce_aes_expandkey(ctx, in_key, key_len);
+ if (!ret)
+ return 0;
+
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+}
+
+struct crypto_aes_xts_ctx {
+ struct crypto_aes_ctx key1;
+ struct crypto_aes_ctx __aligned(8) key2;
+};
+
+static int xts_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ int ret;
+
+ ret = ce_aes_expandkey(&ctx->key1, in_key, key_len / 2);
+ if (!ret)
+ ret = ce_aes_expandkey(&ctx->key2, &in_key[key_len / 2],
+ key_len / 2);
+ if (!ret)
+ return 0;
+
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
+ walk.iv);
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
+ walk.iv);
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+ return err;
+}
+
+static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ int err, blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+ kernel_neon_begin();
+ while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+ ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
+ walk.iv);
+ nbytes -= blocks * AES_BLOCK_SIZE;
+ if (nbytes && nbytes == walk.nbytes % AES_BLOCK_SIZE)
+ break;
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ if (nbytes) {
+ u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
+ u8 __aligned(8) tail[AES_BLOCK_SIZE];
+
+ /*
+ * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
+ * to tell aes_ctr_encrypt() to only read half a block.
+ */
+ blocks = (nbytes <= 8) ? -1 : 1;
+
+ ce_aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc,
+ num_rounds(ctx), blocks, walk.iv);
+ memcpy(tdst, tail, nbytes);
+ err = blkcipher_walk_done(desc, &walk, 0);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = num_rounds(&ctx->key1);
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key1.key_enc, rounds, blocks,
+ walk.iv, (u8 *)ctx->key2.key_enc, first);
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct crypto_aes_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ int err, first, rounds = num_rounds(&ctx->key1);
+ struct blkcipher_walk walk;
+ unsigned int blocks;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ kernel_neon_begin();
+ for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+ ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+ (u8 *)ctx->key1.key_dec, rounds, blocks,
+ walk.iv, (u8 *)ctx->key2.key_enc, first);
+ err = blkcipher_walk_done(desc, &walk,
+ walk.nbytes % AES_BLOCK_SIZE);
+ }
+ kernel_neon_end();
+
+ return err;
+}
+
+static struct crypto_alg aes_algs[] = { {
+ .cra_name = "__ecb-aes-ce",
+ .cra_driver_name = "__driver-ecb-aes-ce",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ce_aes_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+}, {
+ .cra_name = "__cbc-aes-ce",
+ .cra_driver_name = "__driver-cbc-aes-ce",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ce_aes_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+}, {
+ .cra_name = "__ctr-aes-ce",
+ .cra_driver_name = "__driver-ctr-aes-ce",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ce_aes_setkey,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+ },
+}, {
+ .cra_name = "__xts-aes-ce",
+ .cra_driver_name = "__driver-xts-aes-ce",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_blkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_set_key,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+}, {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+}, {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-aes-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 7,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_ablkcipher = {
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ }
+} };
+
+static int __init aes_init(void)
+{
+ if (!(elf_hwcap2 & HWCAP2_AES))
+ return -ENODEV;
+ return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit aes_exit(void)
+{
+ crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+module_init(aes_init);
+module_exit(aes_exit);
diff --git a/arch/arm/crypto/aesbs-glue.c b/arch/arm/crypto/aesbs-glue.c
index 15468fbbdea3..6d685298690e 100644
--- a/arch/arm/crypto/aesbs-glue.c
+++ b/arch/arm/crypto/aesbs-glue.c
@@ -301,7 +301,8 @@ static struct crypto_alg aesbs_algs[] = { {
.cra_name = "__cbc-aes-neonbs",
.cra_driver_name = "__driver-cbc-aes-neonbs",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.cra_alignmask = 7,
@@ -319,7 +320,8 @@ static struct crypto_alg aesbs_algs[] = { {
.cra_name = "__ctr-aes-neonbs",
.cra_driver_name = "__driver-ctr-aes-neonbs",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesbs_ctr_ctx),
.cra_alignmask = 7,
@@ -337,7 +339,8 @@ static struct crypto_alg aesbs_algs[] = { {
.cra_name = "__xts-aes-neonbs",
.cra_driver_name = "__driver-xts-aes-neonbs",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesbs_xts_ctx),
.cra_alignmask = 7,
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
new file mode 100644
index 000000000000..f6ab8bcc9efe
--- /dev/null
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -0,0 +1,94 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ *
+ * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ SHASH .req q0
+ SHASH2 .req q1
+ T1 .req q2
+ T2 .req q3
+ MASK .req q4
+ XL .req q5
+ XM .req q6
+ XH .req q7
+ IN1 .req q7
+
+ SHASH_L .req d0
+ SHASH_H .req d1
+ SHASH2_L .req d2
+ T1_L .req d4
+ MASK_L .req d8
+ XL_L .req d10
+ XL_H .req d11
+ XM_L .req d12
+ XM_H .req d13
+ XH_L .req d14
+
+ .text
+ .fpu crypto-neon-fp-armv8
+
+ /*
+ * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ * struct ghash_key const *k, const char *head)
+ */
+ENTRY(pmull_ghash_update)
+ vld1.64 {SHASH}, [r3]
+ vld1.64 {XL}, [r1]
+ vmov.i8 MASK, #0xe1
+ vext.8 SHASH2, SHASH, SHASH, #8
+ vshl.u64 MASK, MASK, #57
+ veor SHASH2, SHASH2, SHASH
+
+ /* do the head block first, if supplied */
+ ldr ip, [sp]
+ teq ip, #0
+ beq 0f
+ vld1.64 {T1}, [ip]
+ teq r0, #0
+ b 1f
+
+0: vld1.64 {T1}, [r2]!
+ subs r0, r0, #1
+
+1: /* multiply XL by SHASH in GF(2^128) */
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ vrev64.8 T1, T1
+#endif
+ vext.8 T2, XL, XL, #8
+ vext.8 IN1, T1, T1, #8
+ veor T1, T1, T2
+ veor XL, XL, IN1
+
+ vmull.p64 XH, SHASH_H, XL_H @ a1 * b1
+ veor T1, T1, XL
+ vmull.p64 XL, SHASH_L, XL_L @ a0 * b0
+ vmull.p64 XM, SHASH2_L, T1_L @ (a1 + a0)(b1 + b0)
+
+ vext.8 T1, XL, XH, #8
+ veor T2, XL, XH
+ veor XM, XM, T1
+ veor XM, XM, T2
+ vmull.p64 T2, XL_L, MASK_L
+
+ vmov XH_L, XM_H
+ vmov XM_H, XL_L
+
+ veor XL, XM, T2
+ vext.8 T2, XL, XL, #8
+ vmull.p64 XL, XL_L, MASK_L
+ veor T2, T2, XH
+ veor XL, XL, T2
+
+ bne 0b
+
+ vst1.64 {XL}, [r1]
+ bx lr
+ENDPROC(pmull_ghash_update)
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
new file mode 100644
index 000000000000..03a39fe29246
--- /dev/null
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -0,0 +1,320 @@
+/*
+ * Accelerated GHASH implementation with ARMv8 vmull.p64 instructions.
+ *
+ * Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/cryptd.h>
+#include <crypto/internal/hash.h>
+#include <crypto/gf128mul.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+#define GHASH_BLOCK_SIZE 16
+#define GHASH_DIGEST_SIZE 16
+
+struct ghash_key {
+ u64 a;
+ u64 b;
+};
+
+struct ghash_desc_ctx {
+ u64 digest[GHASH_DIGEST_SIZE/sizeof(u64)];
+ u8 buf[GHASH_BLOCK_SIZE];
+ u32 count;
+};
+
+struct ghash_async_ctx {
+ struct cryptd_ahash *cryptd_tfm;
+};
+
+asmlinkage void pmull_ghash_update(int blocks, u64 dg[], const char *src,
+ struct ghash_key const *k, const char *head);
+
+static int ghash_init(struct shash_desc *desc)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ ctx->count += len;
+
+ if ((partial + len) >= GHASH_BLOCK_SIZE) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+ int blocks;
+
+ if (partial) {
+ int p = GHASH_BLOCK_SIZE - partial;
+
+ memcpy(ctx->buf + partial, src, p);
+ src += p;
+ len -= p;
+ }
+
+ blocks = len / GHASH_BLOCK_SIZE;
+ len %= GHASH_BLOCK_SIZE;
+
+ kernel_neon_begin();
+ pmull_ghash_update(blocks, ctx->digest, src, key,
+ partial ? ctx->buf : NULL);
+ kernel_neon_end();
+ src += blocks * GHASH_BLOCK_SIZE;
+ partial = 0;
+ }
+ if (len)
+ memcpy(ctx->buf + partial, src, len);
+ return 0;
+}
+
+static int ghash_final(struct shash_desc *desc, u8 *dst)
+{
+ struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
+ unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
+
+ if (partial) {
+ struct ghash_key *key = crypto_shash_ctx(desc->tfm);
+
+ memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
+ kernel_neon_begin();
+ pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
+ kernel_neon_end();
+ }
+ put_unaligned_be64(ctx->digest[1], dst);
+ put_unaligned_be64(ctx->digest[0], dst + 8);
+
+ *ctx = (struct ghash_desc_ctx){};
+ return 0;
+}
+
+static int ghash_setkey(struct crypto_shash *tfm,
+ const u8 *inkey, unsigned int keylen)
+{
+ struct ghash_key *key = crypto_shash_ctx(tfm);
+ u64 a, b;
+
+ if (keylen != GHASH_BLOCK_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ /* perform multiplication by 'x' in GF(2^128) */
+ b = get_unaligned_be64(inkey);
+ a = get_unaligned_be64(inkey + 8);
+
+ key->a = (a << 1) | (b >> 63);
+ key->b = (b << 1) | (a >> 63);
+
+ if (b >> 63)
+ key->b ^= 0xc200000000000000UL;
+
+ return 0;
+}
+
+static struct shash_alg ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = ghash_init,
+ .update = ghash_update,
+ .final = ghash_final,
+ .setkey = ghash_setkey,
+ .descsize = sizeof(struct ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "__driver-ghash-ce",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ghash_key),
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static int ghash_async_init(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *cryptd_req = ahash_request_ctx(req);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+ if (!may_use_simd()) {
+ memcpy(cryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+ return crypto_ahash_init(cryptd_req);
+ } else {
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+ desc->tfm = child;
+ desc->flags = req->base.flags;
+ return crypto_shash_init(desc);
+ }
+}
+
+static int ghash_async_update(struct ahash_request *req)
+{
+ struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+ if (!may_use_simd()) {
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+ memcpy(cryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+ return crypto_ahash_update(cryptd_req);
+ } else {
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ return shash_ahash_update(req, desc);
+ }
+}
+
+static int ghash_async_final(struct ahash_request *req)
+{
+ struct ahash_request *cryptd_req = ahash_request_ctx(req);
+
+ if (!may_use_simd()) {
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+ memcpy(cryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+ return crypto_ahash_final(cryptd_req);
+ } else {
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ return crypto_shash_final(desc, req->result);
+ }
+}
+
+static int ghash_async_digest(struct ahash_request *req)
+{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct ahash_request *cryptd_req = ahash_request_ctx(req);
+ struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
+
+ if (!may_use_simd()) {
+ memcpy(cryptd_req, req, sizeof(*req));
+ ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
+ return crypto_ahash_digest(cryptd_req);
+ } else {
+ struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
+ struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
+
+ desc->tfm = child;
+ desc->flags = req->base.flags;
+ return shash_ahash_digest(req, desc);
+ }
+}
+
+static int ghash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
+ struct crypto_ahash *child = &ctx->cryptd_tfm->base;
+ int err;
+
+ crypto_ahash_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+ crypto_ahash_set_flags(child, crypto_ahash_get_flags(tfm)
+ & CRYPTO_TFM_REQ_MASK);
+ err = crypto_ahash_setkey(child, key, keylen);
+ crypto_ahash_set_flags(tfm, crypto_ahash_get_flags(child)
+ & CRYPTO_TFM_RES_MASK);
+
+ return err;
+}
+
+static int ghash_async_init_tfm(struct crypto_tfm *tfm)
+{
+ struct cryptd_ahash *cryptd_tfm;
+ struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
+ CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
+ if (IS_ERR(cryptd_tfm))
+ return PTR_ERR(cryptd_tfm);
+ ctx->cryptd_tfm = cryptd_tfm;
+ crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
+ sizeof(struct ahash_request) +
+ crypto_ahash_reqsize(&cryptd_tfm->base));
+
+ return 0;
+}
+
+static void ghash_async_exit_tfm(struct crypto_tfm *tfm)
+{
+ struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ cryptd_free_ahash(ctx->cryptd_tfm);
+}
+
+static struct ahash_alg ghash_async_alg = {
+ .init = ghash_async_init,
+ .update = ghash_async_update,
+ .final = ghash_async_final,
+ .setkey = ghash_async_setkey,
+ .digest = ghash_async_digest,
+ .halg.digestsize = GHASH_DIGEST_SIZE,
+ .halg.base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "ghash-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_type = &crypto_ahash_type,
+ .cra_ctxsize = sizeof(struct ghash_async_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_init = ghash_async_init_tfm,
+ .cra_exit = ghash_async_exit_tfm,
+ },
+};
+
+static int __init ghash_ce_mod_init(void)
+{
+ int err;
+
+ if (!(elf_hwcap2 & HWCAP2_PMULL))
+ return -ENODEV;
+
+ err = crypto_register_shash(&ghash_alg);
+ if (err)
+ return err;
+ err = crypto_register_ahash(&ghash_async_alg);
+ if (err)
+ goto err_shash;
+
+ return 0;
+
+err_shash:
+ crypto_unregister_shash(&ghash_alg);
+ return err;
+}
+
+static void __exit ghash_ce_mod_exit(void)
+{
+ crypto_unregister_ahash(&ghash_async_alg);
+ crypto_unregister_shash(&ghash_alg);
+}
+
+module_init(ghash_ce_mod_init);
+module_exit(ghash_ce_mod_exit);
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
new file mode 100644
index 000000000000..b623f51ccbcf
--- /dev/null
+++ b/arch/arm/crypto/sha1-ce-core.S
@@ -0,0 +1,125 @@
+/*
+ * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .fpu crypto-neon-fp-armv8
+
+ k0 .req q0
+ k1 .req q1
+ k2 .req q2
+ k3 .req q3
+
+ ta0 .req q4
+ ta1 .req q5
+ tb0 .req q5
+ tb1 .req q4
+
+ dga .req q6
+ dgb .req q7
+ dgbs .req s28
+
+ dg0 .req q12
+ dg1a0 .req q13
+ dg1a1 .req q14
+ dg1b0 .req q14
+ dg1b1 .req q13
+
+ .macro add_only, op, ev, rc, s0, dg1
+ .ifnb \s0
+ vadd.u32 tb\ev, q\s0, \rc
+ .endif
+ sha1h.32 dg1b\ev, dg0
+ .ifb \dg1
+ sha1\op\().32 dg0, dg1a\ev, ta\ev
+ .else
+ sha1\op\().32 dg0, \dg1, ta\ev
+ .endif
+ .endm
+
+ .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
+ sha1su0.32 q\s0, q\s1, q\s2
+ add_only \op, \ev, \rc, \s1, \dg1
+ sha1su1.32 q\s0, q\s3
+ .endm
+
+ .align 6
+.Lsha1_rcon:
+ .word 0x5a827999, 0x5a827999, 0x5a827999, 0x5a827999
+ .word 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1, 0x6ed9eba1
+ .word 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc, 0x8f1bbcdc
+ .word 0xca62c1d6, 0xca62c1d6, 0xca62c1d6, 0xca62c1d6
+
+ /*
+ * void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
+ * int blocks);
+ */
+ENTRY(sha1_ce_transform)
+ /* load round constants */
+ adr ip, .Lsha1_rcon
+ vld1.32 {k0-k1}, [ip, :128]!
+ vld1.32 {k2-k3}, [ip, :128]
+
+ /* load state */
+ vld1.32 {dga}, [r0]
+ vldr dgbs, [r0, #16]
+
+ /* load input */
+0: vld1.32 {q8-q9}, [r1]!
+ vld1.32 {q10-q11}, [r1]!
+ subs r2, r2, #1
+
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ vrev32.8 q8, q8
+ vrev32.8 q9, q9
+ vrev32.8 q10, q10
+ vrev32.8 q11, q11
+#endif
+
+ vadd.u32 ta0, q8, k0
+ vmov dg0, dga
+
+ add_update c, 0, k0, 8, 9, 10, 11, dgb
+ add_update c, 1, k0, 9, 10, 11, 8
+ add_update c, 0, k0, 10, 11, 8, 9
+ add_update c, 1, k0, 11, 8, 9, 10
+ add_update c, 0, k1, 8, 9, 10, 11
+
+ add_update p, 1, k1, 9, 10, 11, 8
+ add_update p, 0, k1, 10, 11, 8, 9
+ add_update p, 1, k1, 11, 8, 9, 10
+ add_update p, 0, k1, 8, 9, 10, 11
+ add_update p, 1, k2, 9, 10, 11, 8
+
+ add_update m, 0, k2, 10, 11, 8, 9
+ add_update m, 1, k2, 11, 8, 9, 10
+ add_update m, 0, k2, 8, 9, 10, 11
+ add_update m, 1, k2, 9, 10, 11, 8
+ add_update m, 0, k3, 10, 11, 8, 9
+
+ add_update p, 1, k3, 11, 8, 9, 10
+ add_only p, 0, k3, 9
+ add_only p, 1, k3, 10
+ add_only p, 0, k3, 11
+ add_only p, 1
+
+ /* update state */
+ vadd.u32 dga, dga, dg0
+ vadd.u32 dgb, dgb, dg1a0
+ bne 0b
+
+ /* store new state */
+ vst1.32 {dga}, [r0]
+ vstr dgbs, [r0, #16]
+ bx lr
+ENDPROC(sha1_ce_transform)
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
new file mode 100644
index 000000000000..80bc2fcd241a
--- /dev/null
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -0,0 +1,96 @@
+/*
+ * sha1-ce-glue.c - SHA-1 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha1_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+#include "sha1.h"
+
+MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha1_ce_transform(struct sha1_state *sst, u8 const *src,
+ int blocks);
+
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ if (!may_use_simd() ||
+ (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+ return sha1_update_arm(desc, data, len);
+
+ kernel_neon_begin();
+ sha1_base_do_update(desc, data, len, sha1_ce_transform);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ if (!may_use_simd())
+ return sha1_finup_arm(desc, data, len, out);
+
+ kernel_neon_begin();
+ if (len)
+ sha1_base_do_update(desc, data, len, sha1_ce_transform);
+ sha1_base_do_finalize(desc, sha1_ce_transform);
+ kernel_neon_end();
+
+ return sha1_base_finish(desc, out);
+}
+
+static int sha1_ce_final(struct shash_desc *desc, u8 *out)
+{
+ return sha1_ce_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg alg = {
+ .init = sha1_base_init,
+ .update = sha1_ce_update,
+ .final = sha1_ce_final,
+ .finup = sha1_ce_finup,
+ .descsize = sizeof(struct sha1_state),
+ .digestsize = SHA1_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name = "sha1-ce",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init sha1_ce_mod_init(void)
+{
+ if (!(elf_hwcap2 & HWCAP2_SHA1))
+ return -ENODEV;
+ return crypto_register_shash(&alg);
+}
+
+static void __exit sha1_ce_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(sha1_ce_mod_init);
+module_exit(sha1_ce_mod_fini);
diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/crypto/sha1.h
index 75e6a417416b..ffd8bd08b1a7 100644
--- a/arch/arm/include/asm/crypto/sha1.h
+++ b/arch/arm/crypto/sha1.h
@@ -7,4 +7,7 @@
extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len);
+extern int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out);
+
#endif
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index e31b0440c613..6fc73bf8766d 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -22,127 +22,47 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
+#include <crypto/sha1_base.h>
#include <asm/byteorder.h>
-#include <asm/crypto/sha1.h>
+#include "sha1.h"
asmlinkage void sha1_block_data_order(u32 *digest,
const unsigned char *data, unsigned int rounds);
-
-static int sha1_init(struct shash_desc *desc)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha1_state){
- .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
- };
-
- return 0;
-}
-
-
-static int __sha1_update(struct sha1_state *sctx, const u8 *data,
- unsigned int len, unsigned int partial)
-{
- unsigned int done = 0;
-
- sctx->count += len;
-
- if (partial) {
- done = SHA1_BLOCK_SIZE - partial;
- memcpy(sctx->buffer + partial, data, done);
- sha1_block_data_order(sctx->state, sctx->buffer, 1);
- }
-
- if (len - done >= SHA1_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
- sha1_block_data_order(sctx->state, data + done, rounds);
- done += rounds * SHA1_BLOCK_SIZE;
- }
-
- memcpy(sctx->buffer, data + done, len - done);
- return 0;
-}
-
-
int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
- int res;
+ /* make sure casting to sha1_block_fn() is safe */
+ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
- /* Handle the fast case right here */
- if (partial + len < SHA1_BLOCK_SIZE) {
- sctx->count += len;
- memcpy(sctx->buffer + partial, data, len);
- return 0;
- }
- res = __sha1_update(sctx, data, len, partial);
- return res;
+ return sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_block_data_order);
}
EXPORT_SYMBOL_GPL(sha1_update_arm);
-
-/* Add padding and return the message digest. */
static int sha1_final(struct shash_desc *desc, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be32 *dst = (__be32 *)out;
- __be64 bits;
- static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 and append length */
- index = sctx->count % SHA1_BLOCK_SIZE;
- padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
- /* We need to fill a whole block for __sha1_update() */
- if (padlen <= 56) {
- sctx->count += padlen;
- memcpy(sctx->buffer + index, padding, padlen);
- } else {
- __sha1_update(sctx, padding, padlen, index);
- }
- __sha1_update(sctx, (const u8 *)&bits, sizeof(bits), 56);
-
- /* Store state in digest */
- for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
- return 0;
+ sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_block_data_order);
+ return sha1_base_finish(desc, out);
}
-
-static int sha1_export(struct shash_desc *desc, void *out)
+int sha1_finup_arm(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_block_data_order);
+ return sha1_final(desc, out);
}
-
-
-static int sha1_import(struct shash_desc *desc, const void *in)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
-}
-
+EXPORT_SYMBOL_GPL(sha1_finup_arm);
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
- .init = sha1_init,
+ .init = sha1_base_init,
.update = sha1_update_arm,
.final = sha1_final,
- .export = sha1_export,
- .import = sha1_import,
+ .finup = sha1_finup_arm,
.descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-asm",
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 0b0083757d47..4e22f122f966 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -25,147 +25,60 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
-#include <asm/byteorder.h>
+#include <crypto/sha1_base.h>
#include <asm/neon.h>
#include <asm/simd.h>
-#include <asm/crypto/sha1.h>
+#include "sha1.h"
asmlinkage void sha1_transform_neon(void *state_h, const char *data,
unsigned int rounds);
-
-static int sha1_neon_init(struct shash_desc *desc)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha1_state){
- .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
- };
-
- return 0;
-}
-
-static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, unsigned int partial)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int done = 0;
-
- sctx->count += len;
-
- if (partial) {
- done = SHA1_BLOCK_SIZE - partial;
- memcpy(sctx->buffer + partial, data, done);
- sha1_transform_neon(sctx->state, sctx->buffer, 1);
- }
-
- if (len - done >= SHA1_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-
- sha1_transform_neon(sctx->state, data + done, rounds);
- done += rounds * SHA1_BLOCK_SIZE;
- }
-
- memcpy(sctx->buffer, data + done, len - done);
-
- return 0;
-}
-
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+ unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
- int res;
- /* Handle the fast case right here */
- if (partial + len < SHA1_BLOCK_SIZE) {
- sctx->count += len;
- memcpy(sctx->buffer + partial, data, len);
+ if (!may_use_simd() ||
+ (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+ return sha1_update_arm(desc, data, len);
- return 0;
- }
-
- if (!may_use_simd()) {
- res = sha1_update_arm(desc, data, len);
- } else {
- kernel_neon_begin();
- res = __sha1_neon_update(desc, data, len, partial);
- kernel_neon_end();
- }
-
- return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha1_neon_final(struct shash_desc *desc, u8 *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be32 *dst = (__be32 *)out;
- __be64 bits;
- static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 and append length */
- index = sctx->count % SHA1_BLOCK_SIZE;
- padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
- if (!may_use_simd()) {
- sha1_update_arm(desc, padding, padlen);
- sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
- } else {
- kernel_neon_begin();
- /* We need to fill a whole block for __sha1_neon_update() */
- if (padlen <= 56) {
- sctx->count += padlen;
- memcpy(sctx->buffer + index, padding, padlen);
- } else {
- __sha1_neon_update(desc, padding, padlen, index);
- }
- __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
- kernel_neon_end();
- }
-
- /* Store state in digest */
- for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
+ kernel_neon_begin();
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_transform_neon);
+ kernel_neon_end();
return 0;
}
-static int sha1_neon_export(struct shash_desc *desc, void *out)
+static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
+ if (!may_use_simd())
+ return sha1_finup_arm(desc, data, len, out);
- memcpy(out, sctx, sizeof(*sctx));
+ kernel_neon_begin();
+ if (len)
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_transform_neon);
+ sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_neon);
+ kernel_neon_end();
- return 0;
+ return sha1_base_finish(desc, out);
}
-static int sha1_neon_import(struct shash_desc *desc, const void *in)
+static int sha1_neon_final(struct shash_desc *desc, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
+ return sha1_neon_finup(desc, NULL, 0, out);
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
- .init = sha1_neon_init,
+ .init = sha1_base_init,
.update = sha1_neon_update,
.final = sha1_neon_final,
- .export = sha1_neon_export,
- .import = sha1_neon_import,
+ .finup = sha1_neon_finup,
.descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-neon",
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
new file mode 100644
index 000000000000..87ec11a5f405
--- /dev/null
+++ b/arch/arm/crypto/sha2-ce-core.S
@@ -0,0 +1,125 @@
+/*
+ * sha2-ce-core.S - SHA-224/256 secure hash using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd.
+ * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+ .fpu crypto-neon-fp-armv8
+
+ k0 .req q7
+ k1 .req q8
+ rk .req r3
+
+ ta0 .req q9
+ ta1 .req q10
+ tb0 .req q10
+ tb1 .req q9
+
+ dga .req q11
+ dgb .req q12
+
+ dg0 .req q13
+ dg1 .req q14
+ dg2 .req q15
+
+ .macro add_only, ev, s0
+ vmov dg2, dg0
+ .ifnb \s0
+ vld1.32 {k\ev}, [rk, :128]!
+ .endif
+ sha256h.32 dg0, dg1, tb\ev
+ sha256h2.32 dg1, dg2, tb\ev
+ .ifnb \s0
+ vadd.u32 ta\ev, q\s0, k\ev
+ .endif
+ .endm
+
+ .macro add_update, ev, s0, s1, s2, s3
+ sha256su0.32 q\s0, q\s1
+ add_only \ev, \s1
+ sha256su1.32 q\s0, q\s2, q\s3
+ .endm
+
+ .align 6
+.Lsha256_rcon:
+ .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+ .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+ .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+ .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+ .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+ .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+ .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+ .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+ .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+ .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+ .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+ .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+ .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+ .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+ .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+ /*
+ * void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
+ int blocks);
+ */
+ENTRY(sha2_ce_transform)
+ /* load state */
+ vld1.32 {dga-dgb}, [r0]
+
+ /* load input */
+0: vld1.32 {q0-q1}, [r1]!
+ vld1.32 {q2-q3}, [r1]!
+ subs r2, r2, #1
+
+#ifndef CONFIG_CPU_BIG_ENDIAN
+ vrev32.8 q0, q0
+ vrev32.8 q1, q1
+ vrev32.8 q2, q2
+ vrev32.8 q3, q3
+#endif
+
+ /* load first round constant */
+ adr rk, .Lsha256_rcon
+ vld1.32 {k0}, [rk, :128]!
+
+ vadd.u32 ta0, q0, k0
+ vmov dg0, dga
+ vmov dg1, dgb
+
+ add_update 1, 0, 1, 2, 3
+ add_update 0, 1, 2, 3, 0
+ add_update 1, 2, 3, 0, 1
+ add_update 0, 3, 0, 1, 2
+ add_update 1, 0, 1, 2, 3
+ add_update 0, 1, 2, 3, 0
+ add_update 1, 2, 3, 0, 1
+ add_update 0, 3, 0, 1, 2
+ add_update 1, 0, 1, 2, 3
+ add_update 0, 1, 2, 3, 0
+ add_update 1, 2, 3, 0, 1
+ add_update 0, 3, 0, 1, 2
+
+ add_only 1, 1
+ add_only 0, 2
+ add_only 1, 3
+ add_only 0
+
+ /* update state */
+ vadd.u32 dga, dga, dg0
+ vadd.u32 dgb, dgb, dg1
+ bne 0b
+
+ /* store new state */
+ vst1.32 {dga-dgb}, [r0]
+ bx lr
+ENDPROC(sha2_ce_transform)
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
new file mode 100644
index 000000000000..0755b2d657f3
--- /dev/null
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -0,0 +1,114 @@
+/*
+ * sha2-ce-glue.c - SHA-224/SHA-256 using ARMv8 Crypto Extensions
+ *
+ * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+#include <asm/unaligned.h>
+
+#include "sha256_glue.h"
+
+MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+
+asmlinkage void sha2_ce_transform(struct sha256_state *sst, u8 const *src,
+ int blocks);
+
+static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ if (!may_use_simd() ||
+ (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+ return crypto_sha256_arm_update(desc, data, len);
+
+ kernel_neon_begin();
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha2_ce_transform);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int sha2_ce_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ if (!may_use_simd())
+ return crypto_sha256_arm_finup(desc, data, len, out);
+
+ kernel_neon_begin();
+ if (len)
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha2_ce_transform);
+ sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
+ kernel_neon_end();
+
+ return sha256_base_finish(desc, out);
+}
+
+static int sha2_ce_final(struct shash_desc *desc, u8 *out)
+{
+ return sha2_ce_finup(desc, NULL, 0, out);
+}
+
+static struct shash_alg algs[] = { {
+ .init = sha224_base_init,
+ .update = sha2_ce_update,
+ .final = sha2_ce_final,
+ .finup = sha2_ce_finup,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA224_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .init = sha256_base_init,
+ .update = sha2_ce_update,
+ .final = sha2_ce_final,
+ .finup = sha2_ce_finup,
+ .descsize = sizeof(struct sha256_state),
+ .digestsize = SHA256_DIGEST_SIZE,
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-ce",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha2_ce_mod_init(void)
+{
+ if (!(elf_hwcap2 & HWCAP2_SHA2))
+ return -ENODEV;
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit sha2_ce_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(sha2_ce_mod_init);
+module_exit(sha2_ce_mod_fini);
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
new file mode 100644
index 000000000000..fac0533ea633
--- /dev/null
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -0,0 +1,716 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+#
+# Permission to use under GPL terms is granted.
+# ====================================================================
+
+# SHA256 block procedure for ARMv4. May 2007.
+
+# Performance is ~2x better than gcc 3.4 generated code and in "abso-
+# lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+# byte [on single-issue Xscale PXA250 core].
+
+# July 2010.
+#
+# Rescheduling for dual-issue pipeline resulted in 22% improvement on
+# Cortex A8 core and ~20 cycles per processed byte.
+
+# February 2011.
+#
+# Profiler-assisted and platform-specific optimization resulted in 16%
+# improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+# September 2013.
+#
+# Add NEON implementation. On Cortex A8 it was measured to process one
+# byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+# S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+# code (meaning that latter performs sub-optimally, nothing was done
+# about it).
+
+# May 2014.
+#
+# Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$ctx="r0"; $t0="r0";
+$inp="r1"; $t4="r1";
+$len="r2"; $t1="r2";
+$T1="r3"; $t3="r3";
+$A="r4";
+$B="r5";
+$C="r6";
+$D="r7";
+$E="r8";
+$F="r9";
+$G="r10";
+$H="r11";
+@V=($A,$B,$C,$D,$E,$F,$G,$H);
+$t2="r12";
+$Ktbl="r14";
+
+@Sigma0=( 2,13,22);
+@Sigma1=( 6,11,25);
+@sigma0=( 7,18, 3);
+@sigma1=(17,19,10);
+
+sub BODY_00_15 {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+$code.=<<___ if ($i<16);
+#if __ARM_ARCH__>=7
+ @ ldr $t1,[$inp],#4 @ $i
+# if $i==15
+ str $inp,[sp,#17*4] @ make room for $t4
+# endif
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+# ifndef __ARMEB__
+ rev $t1,$t1
+# endif
+#else
+ @ ldrb $t1,[$inp,#3] @ $i
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ ldrb $t2,[$inp,#2]
+ ldrb $t0,[$inp,#1]
+ orr $t1,$t1,$t2,lsl#8
+ ldrb $t2,[$inp],#4
+ orr $t1,$t1,$t0,lsl#16
+# if $i==15
+ str $inp,[sp,#17*4] @ make room for $t4
+# endif
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
+ orr $t1,$t1,$t2,lsl#24
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+#endif
+___
+$code.=<<___;
+ ldr $t2,[$Ktbl],#4 @ *K256++
+ add $h,$h,$t1 @ h+=X[i]
+ str $t1,[sp,#`$i%16`*4]
+ eor $t1,$f,$g
+ add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
+ and $t1,$t1,$e
+ add $h,$h,$t2 @ h+=K256[i]
+ eor $t1,$t1,$g @ Ch(e,f,g)
+ eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
+ add $h,$h,$t1 @ h+=Ch(e,f,g)
+#if $i==31
+ and $t2,$t2,#0xff
+ cmp $t2,#0xf2 @ done?
+#endif
+#if $i<15
+# if __ARM_ARCH__>=7
+ ldr $t1,[$inp],#4 @ prefetch
+# else
+ ldrb $t1,[$inp,#3]
+# endif
+ eor $t2,$a,$b @ a^b, b^c in next round
+#else
+ ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
+ eor $t2,$a,$b @ a^b, b^c in next round
+ ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
+#endif
+ eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
+ and $t3,$t3,$t2 @ (b^c)&=(a^b)
+ add $d,$d,$h @ d+=h
+ eor $t3,$t3,$b @ Maj(a,b,c)
+ add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
+ @ add $h,$h,$t3 @ h+=Maj(a,b,c)
+___
+ ($t2,$t3)=($t3,$t2);
+}
+
+sub BODY_16_XX {
+my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
+
+$code.=<<___;
+ @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
+ @ ldr $t4,[sp,#`($i+14)%16`*4]
+ mov $t0,$t1,ror#$sigma0[0]
+ add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
+ mov $t2,$t4,ror#$sigma1[0]
+ eor $t0,$t0,$t1,ror#$sigma0[1]
+ eor $t2,$t2,$t4,ror#$sigma1[1]
+ eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
+ ldr $t1,[sp,#`($i+0)%16`*4]
+ eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
+ ldr $t4,[sp,#`($i+9)%16`*4]
+
+ add $t2,$t2,$t0
+ eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
+ add $t1,$t1,$t2
+ eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
+ add $t1,$t1,$t4 @ X[i]
+___
+ &BODY_00_15(@_);
+}
+
+$code=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code 32
+#else
+.syntax unified
+# ifdef __thumb2__
+# define adrl adr
+.thumb
+# else
+.code 32
+# endif
+#endif
+
+.type K256,%object
+.align 5
+K256:
+.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size K256,.-K256
+.word 0 @ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align 5
+
+.global sha256_block_data_order
+.type sha256_block_data_order,%function
+sha256_block_data_order:
+#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ sha256_block_data_order
+#else
+ adr r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#ARMV8_SHA256
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
+ stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
+ ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
+ sub $Ktbl,r3,#256+32 @ K256
+ sub sp,sp,#16*4 @ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+ ldr $t1,[$inp],#4
+# else
+ ldrb $t1,[$inp,#3]
+# endif
+ eor $t3,$B,$C @ magic
+ eor $t2,$t2,$t2
+___
+for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
+$code.=".Lrounds_16_xx:\n";
+for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+#if __ARM_ARCH__>=7
+ ite eq @ Thumb2 thing, sanity check in ARM
+#endif
+ ldreq $t3,[sp,#16*4] @ pull ctx
+ bne .Lrounds_16_xx
+
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
+ ldr $t0,[$t3,#0]
+ ldr $t1,[$t3,#4]
+ ldr $t2,[$t3,#8]
+ add $A,$A,$t0
+ ldr $t0,[$t3,#12]
+ add $B,$B,$t1
+ ldr $t1,[$t3,#16]
+ add $C,$C,$t2
+ ldr $t2,[$t3,#20]
+ add $D,$D,$t0
+ ldr $t0,[$t3,#24]
+ add $E,$E,$t1
+ ldr $t1,[$t3,#28]
+ add $F,$F,$t2
+ ldr $inp,[sp,#17*4] @ pull inp
+ ldr $t2,[sp,#18*4] @ pull inp+len
+ add $G,$G,$t0
+ add $H,$H,$t1
+ stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
+ cmp $inp,$t2
+ sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
+ bne .Loop
+
+ add sp,sp,#`16+3`*4 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size sha256_block_data_order,.-sha256_block_data_order
+___
+######################################################################
+# NEON stuff
+#
+{{{
+my @X=map("q$_",(0..3));
+my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
+my $Xfer=$t4;
+my $j=0;
+
+sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
+sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
+
+sub AUTOLOAD() # thunk [simplified] x86-style perlasm
+{ my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
+ my $arg = pop;
+ $arg = "#$arg" if ($arg*1 eq $arg);
+ $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
+}
+
+sub Xupdate()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+ &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T2,$T0,$sigma0[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T1,$T0,$sigma0[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T2,$T0,32-$sigma0[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T3,$T0,$sigma0[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T1,$T1,$T2);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T3,$T0,32-$sigma0[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T1,$T1,$T3); # sigma0(X[1..4])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4); # sigma1(X[14..15])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &veor ($T5,$T5,$T4); # sigma1(X[16..17])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 ($T0,$T0,@X[0]);
+ while($#insns>=2) { eval(shift(@insns)); }
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+
+ push(@X,shift(@X)); # "rotate" X[]
+}
+
+sub Xpreload()
+{ use integer;
+ my $body = shift;
+ my @insns = (&$body,&$body,&$body,&$body);
+ my ($a,$b,$c,$d,$e,$f,$g,$h);
+
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vld1_32 ("{$T0}","[$Ktbl,:128]!");
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vrev32_8 (@X[0],@X[0]);
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ eval(shift(@insns));
+ &vadd_i32 ($T0,$T0,@X[0]);
+ foreach (@insns) { eval; } # remaining instructions
+ &vst1_32 ("{$T0}","[$Xfer,:128]!");
+
+ push(@X,shift(@X)); # "rotate" X[]
+}
+
+sub body_00_15 () {
+ (
+ '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
+ '&add ($h,$h,$t1)', # h+=X[i]+K[i]
+ '&eor ($t1,$f,$g)',
+ '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
+ '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
+ '&and ($t1,$t1,$e)',
+ '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
+ '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
+ '&eor ($t1,$t1,$g)', # Ch(e,f,g)
+ '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
+ '&eor ($t2,$a,$b)', # a^b, b^c in next round
+ '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
+ '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
+ '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
+ '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
+ '&ldr ($t1,"[sp,#64]") if ($j==31)',
+ '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
+ '&add ($d,$d,$h)', # d+=h
+ '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
+ '&eor ($t3,$t3,$b)', # Maj(a,b,c)
+ '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
+ )
+}
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.global sha256_block_data_order_neon
+.type sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+
+ sub $H,sp,#16*4+16
+ adrl $Ktbl,K256
+ bic $H,$H,#15 @ align for 128-bit stores
+ mov $t2,sp
+ mov sp,$H @ alloca
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
+
+ vld1.8 {@X[0]},[$inp]!
+ vld1.8 {@X[1]},[$inp]!
+ vld1.8 {@X[2]},[$inp]!
+ vld1.8 {@X[3]},[$inp]!
+ vld1.32 {$T0},[$Ktbl,:128]!
+ vld1.32 {$T1},[$Ktbl,:128]!
+ vld1.32 {$T2},[$Ktbl,:128]!
+ vld1.32 {$T3},[$Ktbl,:128]!
+ vrev32.8 @X[0],@X[0] @ yes, even on
+ str $ctx,[sp,#64]
+ vrev32.8 @X[1],@X[1] @ big-endian
+ str $inp,[sp,#68]
+ mov $Xfer,sp
+ vrev32.8 @X[2],@X[2]
+ str $len,[sp,#72]
+ vrev32.8 @X[3],@X[3]
+ str $t2,[sp,#76] @ save original sp
+ vadd.i32 $T0,$T0,@X[0]
+ vadd.i32 $T1,$T1,@X[1]
+ vst1.32 {$T0},[$Xfer,:128]!
+ vadd.i32 $T2,$T2,@X[2]
+ vst1.32 {$T1},[$Xfer,:128]!
+ vadd.i32 $T3,$T3,@X[3]
+ vst1.32 {$T2},[$Xfer,:128]!
+ vst1.32 {$T3},[$Xfer,:128]!
+
+ ldmia $ctx,{$A-$H}
+ sub $Xfer,$Xfer,#64
+ ldr $t1,[sp,#0]
+ eor $t2,$t2,$t2
+ eor $t3,$B,$C
+ b .L_00_48
+
+.align 4
+.L_00_48:
+___
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+ &Xupdate(\&body_00_15);
+$code.=<<___;
+ teq $t1,#0 @ check for K256 terminator
+ ldr $t1,[sp,#0]
+ sub $Xfer,$Xfer,#64
+ bne .L_00_48
+
+ ldr $inp,[sp,#68]
+ ldr $t0,[sp,#72]
+ sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
+ teq $inp,$t0
+ it eq
+ subeq $inp,$inp,#64 @ avoid SEGV
+ vld1.8 {@X[0]},[$inp]! @ load next input block
+ vld1.8 {@X[1]},[$inp]!
+ vld1.8 {@X[2]},[$inp]!
+ vld1.8 {@X[3]},[$inp]!
+ it ne
+ strne $inp,[sp,#68]
+ mov $Xfer,sp
+___
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+ &Xpreload(\&body_00_15);
+$code.=<<___;
+ ldr $t0,[$t1,#0]
+ add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
+ ldr $t2,[$t1,#4]
+ ldr $t3,[$t1,#8]
+ ldr $t4,[$t1,#12]
+ add $A,$A,$t0 @ accumulate
+ ldr $t0,[$t1,#16]
+ add $B,$B,$t2
+ ldr $t2,[$t1,#20]
+ add $C,$C,$t3
+ ldr $t3,[$t1,#24]
+ add $D,$D,$t4
+ ldr $t4,[$t1,#28]
+ add $E,$E,$t0
+ str $A,[$t1],#4
+ add $F,$F,$t2
+ str $B,[$t1],#4
+ add $G,$G,$t3
+ str $C,[$t1],#4
+ add $H,$H,$t4
+ str $D,[$t1],#4
+ stmia $t1,{$E-$H}
+
+ ittte ne
+ movne $Xfer,sp
+ ldrne $t1,[sp,#0]
+ eorne $t2,$t2,$t2
+ ldreq sp,[sp,#76] @ restore original sp
+ itt ne
+ eorne $t3,$B,$C
+ bne .L_00_48
+
+ ldmia sp!,{r4-r12,pc}
+.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+___
+}}}
+######################################################################
+# ARMv8 stuff
+#
+{{{
+my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
+my @MSG=map("q$_",(8..11));
+my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
+my $Ktbl="r3";
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+# define INST(a,b,c,d) .byte c,d|0xc,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d
+# endif
+
+.type sha256_block_data_order_armv8,%function
+.align 5
+sha256_block_data_order_armv8:
+.LARMv8:
+ vld1.32 {$ABCD,$EFGH},[$ctx]
+# ifdef __thumb2__
+ adr $Ktbl,.LARMv8
+ sub $Ktbl,$Ktbl,#.LARMv8-K256
+# else
+ adrl $Ktbl,K256
+# endif
+ add $len,$inp,$len,lsl#6 @ len to point at the end of inp
+
+.Loop_v8:
+ vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
+ vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
+ vld1.32 {$W0},[$Ktbl]!
+ vrev32.8 @MSG[0],@MSG[0]
+ vrev32.8 @MSG[1],@MSG[1]
+ vrev32.8 @MSG[2],@MSG[2]
+ vrev32.8 @MSG[3],@MSG[3]
+ vmov $ABCD_SAVE,$ABCD @ offload
+ vmov $EFGH_SAVE,$EFGH
+ teq $inp,$len
+___
+for($i=0;$i<12;$i++) {
+$code.=<<___;
+ vld1.32 {$W1},[$Ktbl]!
+ vadd.i32 $W0,$W0,@MSG[0]
+ sha256su0 @MSG[0],@MSG[1]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+ sha256su1 @MSG[0],@MSG[2],@MSG[3]
+___
+ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
+}
+$code.=<<___;
+ vld1.32 {$W1},[$Ktbl]!
+ vadd.i32 $W0,$W0,@MSG[0]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+
+ vld1.32 {$W0},[$Ktbl]!
+ vadd.i32 $W1,$W1,@MSG[1]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W1
+ sha256h2 $EFGH,$abcd,$W1
+
+ vld1.32 {$W1},[$Ktbl]
+ vadd.i32 $W0,$W0,@MSG[2]
+ sub $Ktbl,$Ktbl,#256-16 @ rewind
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W0
+ sha256h2 $EFGH,$abcd,$W0
+
+ vadd.i32 $W1,$W1,@MSG[3]
+ vmov $abcd,$ABCD
+ sha256h $ABCD,$EFGH,$W1
+ sha256h2 $EFGH,$abcd,$W1
+
+ vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
+ vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
+ it ne
+ bne .Loop_v8
+
+ vst1.32 {$ABCD,$EFGH},[$ctx]
+
+ ret @ bx lr
+.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+___
+}}}
+$code.=<<___;
+.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm OPENSSL_armcap_P,4,4
+#endif
+___
+
+open SELF,$0;
+while(<SELF>) {
+ next if (/^#!/);
+ last if (!s/^#/@/ and !/^$/);
+ print;
+}
+close SELF;
+
+{ my %opcode = (
+ "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
+ "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
+
+ sub unsha256 {
+ my ($mnemonic,$arg)=@_;
+
+ if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
+ my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
+ |(($2&7)<<17)|(($2&8)<<4)
+ |(($3&7)<<1) |(($3&8)<<2);
+ # since ARMv7 instructions are always encoded little-endian.
+ # correct solution is to use .inst directive, but older
+ # assemblers don't implement it:-(
+ sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
+ $word&0xff,($word>>8)&0xff,
+ ($word>>16)&0xff,($word>>24)&0xff,
+ $mnemonic,$arg;
+ }
+ }
+}
+
+foreach (split($/,$code)) {
+
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
+
+ s/\bret\b/bx lr/go or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
new file mode 100644
index 000000000000..555a1a8eec90
--- /dev/null
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -0,0 +1,2808 @@
+
+@ ====================================================================
+@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+@ project. The module is, however, dual licensed under OpenSSL and
+@ CRYPTOGAMS licenses depending on where you obtain it. For further
+@ details see http://www.openssl.org/~appro/cryptogams/.
+@
+@ Permission to use under GPL terms is granted.
+@ ====================================================================
+
+@ SHA256 block procedure for ARMv4. May 2007.
+
+@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
+@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
+@ byte [on single-issue Xscale PXA250 core].
+
+@ July 2010.
+@
+@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
+@ Cortex A8 core and ~20 cycles per processed byte.
+
+@ February 2011.
+@
+@ Profiler-assisted and platform-specific optimization resulted in 16%
+@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
+
+@ September 2013.
+@
+@ Add NEON implementation. On Cortex A8 it was measured to process one
+@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
+@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
+@ code (meaning that latter performs sub-optimally, nothing was done
+@ about it).
+
+@ May 2014.
+@
+@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
+
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
+#endif
+
+.text
+#if __ARM_ARCH__<7
+.code 32
+#else
+.syntax unified
+# ifdef __thumb2__
+# define adrl adr
+.thumb
+# else
+.code 32
+# endif
+#endif
+
+.type K256,%object
+.align 5
+K256:
+.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.size K256,.-K256
+.word 0 @ terminator
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.LOPENSSL_armcap:
+.word OPENSSL_armcap_P-sha256_block_data_order
+#endif
+.align 5
+
+.global sha256_block_data_order
+.type sha256_block_data_order,%function
+sha256_block_data_order:
+#if __ARM_ARCH__<7
+ sub r3,pc,#8 @ sha256_block_data_order
+#else
+ adr r3,sha256_block_data_order
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ ldr r12,.LOPENSSL_armcap
+ ldr r12,[r3,r12] @ OPENSSL_armcap_P
+ tst r12,#ARMV8_SHA256
+ bne .LARMv8
+ tst r12,#ARMV7_NEON
+ bne .LNEON
+#endif
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+ stmdb sp!,{r0,r1,r2,r4-r11,lr}
+ ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
+ sub r14,r3,#256+32 @ K256
+ sub sp,sp,#16*4 @ alloca(X[16])
+.Loop:
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ magic
+ eor r12,r12,r12
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 0
+# if 0==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 0
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 0==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#0*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 0==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 0<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#2*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#15*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 1
+# if 1==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 1
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 1==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#1*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 1==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 1<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#3*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#0*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 2
+# if 2==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 2
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 2==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#2*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 2==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 2<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#4*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#1*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 3
+# if 3==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 3
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 3==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#3*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 3==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 3<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#5*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#2*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 4
+# if 4==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 4
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 4==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#4*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 4==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 4<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#6*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#3*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 5
+# if 5==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 5==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#5*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 5==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 5<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#7*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#4*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 6
+# if 6==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 6
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 6==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#6*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 6==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 6<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#8*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#5*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 7
+# if 7==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 7==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#7*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 7==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 7<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#9*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#6*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 8
+# if 8==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 8
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 8==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r8,r8,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#8*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 8==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 8<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#10*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#7*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 9
+# if 9==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 9
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 9==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r7,r7,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#9*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 9==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 9<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#11*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#8*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 10
+# if 10==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 10
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 10==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r6,r6,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#10*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 10==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 10<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#12*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#9*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 11
+# if 11==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 11
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 11==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r5,r5,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#11*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 11==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 11<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#13*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#10*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 12
+# if 12==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 12
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 12==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r4,r4,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#12*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 12==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 12<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#14*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#11*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 13
+# if 13==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 13
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 13==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r11,r11,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#13*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 13==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 13<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#15*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#12*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 14
+# if 14==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 14
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ ldrb r12,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r12,lsl#8
+ ldrb r12,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 14==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r10,r10,ror#5
+ orr r2,r2,r12,lsl#24
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+#endif
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#14*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 14==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 14<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#0*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#13*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ @ ldr r2,[r1],#4 @ 15
+# if 15==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+# ifndef __ARMEB__
+ rev r2,r2
+# endif
+#else
+ @ ldrb r2,[r1,#3] @ 15
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ ldrb r3,[r1,#2]
+ ldrb r0,[r1,#1]
+ orr r2,r2,r3,lsl#8
+ ldrb r3,[r1],#4
+ orr r2,r2,r0,lsl#16
+# if 15==15
+ str r1,[sp,#17*4] @ make room for r1
+# endif
+ eor r0,r9,r9,ror#5
+ orr r2,r2,r3,lsl#24
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+#endif
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#15*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 15==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 15<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#1*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#14*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+.Lrounds_16_xx:
+ @ ldr r2,[sp,#1*4] @ 16
+ @ ldr r1,[sp,#14*4]
+ mov r0,r2,ror#7
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#0*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#9*4]
+
+ add r12,r12,r0
+ eor r0,r8,r8,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#0*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 16==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 16<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#2*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#15*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#2*4] @ 17
+ @ ldr r1,[sp,#15*4]
+ mov r0,r2,ror#7
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#1*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#10*4]
+
+ add r3,r3,r0
+ eor r0,r7,r7,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#1*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 17==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 17<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#3*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#0*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#3*4] @ 18
+ @ ldr r1,[sp,#0*4]
+ mov r0,r2,ror#7
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#2*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#11*4]
+
+ add r12,r12,r0
+ eor r0,r6,r6,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#2*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 18==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 18<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#4*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#1*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#4*4] @ 19
+ @ ldr r1,[sp,#1*4]
+ mov r0,r2,ror#7
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#3*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#12*4]
+
+ add r3,r3,r0
+ eor r0,r5,r5,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#3*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 19==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 19<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#5*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#2*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#5*4] @ 20
+ @ ldr r1,[sp,#2*4]
+ mov r0,r2,ror#7
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#4*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#13*4]
+
+ add r12,r12,r0
+ eor r0,r4,r4,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#4*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 20==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 20<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#6*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#3*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#6*4] @ 21
+ @ ldr r1,[sp,#3*4]
+ mov r0,r2,ror#7
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#5*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#14*4]
+
+ add r3,r3,r0
+ eor r0,r11,r11,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#5*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 21==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 21<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#7*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#4*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#7*4] @ 22
+ @ ldr r1,[sp,#4*4]
+ mov r0,r2,ror#7
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#6*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#15*4]
+
+ add r12,r12,r0
+ eor r0,r10,r10,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#6*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 22==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 22<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#8*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#5*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#8*4] @ 23
+ @ ldr r1,[sp,#5*4]
+ mov r0,r2,ror#7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#7*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#0*4]
+
+ add r3,r3,r0
+ eor r0,r9,r9,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#7*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 23==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 23<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#9*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#6*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#9*4] @ 24
+ @ ldr r1,[sp,#6*4]
+ mov r0,r2,ror#7
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#8*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#1*4]
+
+ add r12,r12,r0
+ eor r0,r8,r8,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r8,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r11,r11,r2 @ h+=X[i]
+ str r2,[sp,#8*4]
+ eor r2,r9,r10
+ add r11,r11,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r8
+ add r11,r11,r12 @ h+=K256[i]
+ eor r2,r2,r10 @ Ch(e,f,g)
+ eor r0,r4,r4,ror#11
+ add r11,r11,r2 @ h+=Ch(e,f,g)
+#if 24==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 24<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r4,r5 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#10*4] @ from future BODY_16_xx
+ eor r12,r4,r5 @ a^b, b^c in next round
+ ldr r1,[sp,#7*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r4,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r7,r7,r11 @ d+=h
+ eor r3,r3,r5 @ Maj(a,b,c)
+ add r11,r11,r0,ror#2 @ h+=Sigma0(a)
+ @ add r11,r11,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#10*4] @ 25
+ @ ldr r1,[sp,#7*4]
+ mov r0,r2,ror#7
+ add r11,r11,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#9*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#2*4]
+
+ add r3,r3,r0
+ eor r0,r7,r7,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r7,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r10,r10,r2 @ h+=X[i]
+ str r2,[sp,#9*4]
+ eor r2,r8,r9
+ add r10,r10,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r7
+ add r10,r10,r3 @ h+=K256[i]
+ eor r2,r2,r9 @ Ch(e,f,g)
+ eor r0,r11,r11,ror#11
+ add r10,r10,r2 @ h+=Ch(e,f,g)
+#if 25==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 25<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r11,r4 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#11*4] @ from future BODY_16_xx
+ eor r3,r11,r4 @ a^b, b^c in next round
+ ldr r1,[sp,#8*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r11,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r6,r6,r10 @ d+=h
+ eor r12,r12,r4 @ Maj(a,b,c)
+ add r10,r10,r0,ror#2 @ h+=Sigma0(a)
+ @ add r10,r10,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#11*4] @ 26
+ @ ldr r1,[sp,#8*4]
+ mov r0,r2,ror#7
+ add r10,r10,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#10*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#3*4]
+
+ add r12,r12,r0
+ eor r0,r6,r6,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r6,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r9,r9,r2 @ h+=X[i]
+ str r2,[sp,#10*4]
+ eor r2,r7,r8
+ add r9,r9,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r6
+ add r9,r9,r12 @ h+=K256[i]
+ eor r2,r2,r8 @ Ch(e,f,g)
+ eor r0,r10,r10,ror#11
+ add r9,r9,r2 @ h+=Ch(e,f,g)
+#if 26==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 26<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r10,r11 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#12*4] @ from future BODY_16_xx
+ eor r12,r10,r11 @ a^b, b^c in next round
+ ldr r1,[sp,#9*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r10,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r5,r5,r9 @ d+=h
+ eor r3,r3,r11 @ Maj(a,b,c)
+ add r9,r9,r0,ror#2 @ h+=Sigma0(a)
+ @ add r9,r9,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#12*4] @ 27
+ @ ldr r1,[sp,#9*4]
+ mov r0,r2,ror#7
+ add r9,r9,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#11*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#4*4]
+
+ add r3,r3,r0
+ eor r0,r5,r5,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r5,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r8,r8,r2 @ h+=X[i]
+ str r2,[sp,#11*4]
+ eor r2,r6,r7
+ add r8,r8,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r5
+ add r8,r8,r3 @ h+=K256[i]
+ eor r2,r2,r7 @ Ch(e,f,g)
+ eor r0,r9,r9,ror#11
+ add r8,r8,r2 @ h+=Ch(e,f,g)
+#if 27==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 27<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r9,r10 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#13*4] @ from future BODY_16_xx
+ eor r3,r9,r10 @ a^b, b^c in next round
+ ldr r1,[sp,#10*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r9,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r4,r4,r8 @ d+=h
+ eor r12,r12,r10 @ Maj(a,b,c)
+ add r8,r8,r0,ror#2 @ h+=Sigma0(a)
+ @ add r8,r8,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#13*4] @ 28
+ @ ldr r1,[sp,#10*4]
+ mov r0,r2,ror#7
+ add r8,r8,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#12*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#5*4]
+
+ add r12,r12,r0
+ eor r0,r4,r4,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r4,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r7,r7,r2 @ h+=X[i]
+ str r2,[sp,#12*4]
+ eor r2,r5,r6
+ add r7,r7,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r4
+ add r7,r7,r12 @ h+=K256[i]
+ eor r2,r2,r6 @ Ch(e,f,g)
+ eor r0,r8,r8,ror#11
+ add r7,r7,r2 @ h+=Ch(e,f,g)
+#if 28==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 28<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r8,r9 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#14*4] @ from future BODY_16_xx
+ eor r12,r8,r9 @ a^b, b^c in next round
+ ldr r1,[sp,#11*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r8,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r11,r11,r7 @ d+=h
+ eor r3,r3,r9 @ Maj(a,b,c)
+ add r7,r7,r0,ror#2 @ h+=Sigma0(a)
+ @ add r7,r7,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#14*4] @ 29
+ @ ldr r1,[sp,#11*4]
+ mov r0,r2,ror#7
+ add r7,r7,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#13*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#6*4]
+
+ add r3,r3,r0
+ eor r0,r11,r11,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r11,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r6,r6,r2 @ h+=X[i]
+ str r2,[sp,#13*4]
+ eor r2,r4,r5
+ add r6,r6,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r11
+ add r6,r6,r3 @ h+=K256[i]
+ eor r2,r2,r5 @ Ch(e,f,g)
+ eor r0,r7,r7,ror#11
+ add r6,r6,r2 @ h+=Ch(e,f,g)
+#if 29==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 29<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r7,r8 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#15*4] @ from future BODY_16_xx
+ eor r3,r7,r8 @ a^b, b^c in next round
+ ldr r1,[sp,#12*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r7,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r10,r10,r6 @ d+=h
+ eor r12,r12,r8 @ Maj(a,b,c)
+ add r6,r6,r0,ror#2 @ h+=Sigma0(a)
+ @ add r6,r6,r12 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#15*4] @ 30
+ @ ldr r1,[sp,#12*4]
+ mov r0,r2,ror#7
+ add r6,r6,r12 @ h+=Maj(a,b,c) from the past
+ mov r12,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r12,r12,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#14*4]
+ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#7*4]
+
+ add r12,r12,r0
+ eor r0,r10,r10,ror#5 @ from BODY_00_15
+ add r2,r2,r12
+ eor r0,r0,r10,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r12,[r14],#4 @ *K256++
+ add r5,r5,r2 @ h+=X[i]
+ str r2,[sp,#14*4]
+ eor r2,r11,r4
+ add r5,r5,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r10
+ add r5,r5,r12 @ h+=K256[i]
+ eor r2,r2,r4 @ Ch(e,f,g)
+ eor r0,r6,r6,ror#11
+ add r5,r5,r2 @ h+=Ch(e,f,g)
+#if 30==31
+ and r12,r12,#0xff
+ cmp r12,#0xf2 @ done?
+#endif
+#if 30<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r12,r6,r7 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#0*4] @ from future BODY_16_xx
+ eor r12,r6,r7 @ a^b, b^c in next round
+ ldr r1,[sp,#13*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r6,ror#20 @ Sigma0(a)
+ and r3,r3,r12 @ (b^c)&=(a^b)
+ add r9,r9,r5 @ d+=h
+ eor r3,r3,r7 @ Maj(a,b,c)
+ add r5,r5,r0,ror#2 @ h+=Sigma0(a)
+ @ add r5,r5,r3 @ h+=Maj(a,b,c)
+ @ ldr r2,[sp,#0*4] @ 31
+ @ ldr r1,[sp,#13*4]
+ mov r0,r2,ror#7
+ add r5,r5,r3 @ h+=Maj(a,b,c) from the past
+ mov r3,r1,ror#17
+ eor r0,r0,r2,ror#18
+ eor r3,r3,r1,ror#19
+ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
+ ldr r2,[sp,#15*4]
+ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
+ ldr r1,[sp,#8*4]
+
+ add r3,r3,r0
+ eor r0,r9,r9,ror#5 @ from BODY_00_15
+ add r2,r2,r3
+ eor r0,r0,r9,ror#19 @ Sigma1(e)
+ add r2,r2,r1 @ X[i]
+ ldr r3,[r14],#4 @ *K256++
+ add r4,r4,r2 @ h+=X[i]
+ str r2,[sp,#15*4]
+ eor r2,r10,r11
+ add r4,r4,r0,ror#6 @ h+=Sigma1(e)
+ and r2,r2,r9
+ add r4,r4,r3 @ h+=K256[i]
+ eor r2,r2,r11 @ Ch(e,f,g)
+ eor r0,r5,r5,ror#11
+ add r4,r4,r2 @ h+=Ch(e,f,g)
+#if 31==31
+ and r3,r3,#0xff
+ cmp r3,#0xf2 @ done?
+#endif
+#if 31<15
+# if __ARM_ARCH__>=7
+ ldr r2,[r1],#4 @ prefetch
+# else
+ ldrb r2,[r1,#3]
+# endif
+ eor r3,r5,r6 @ a^b, b^c in next round
+#else
+ ldr r2,[sp,#1*4] @ from future BODY_16_xx
+ eor r3,r5,r6 @ a^b, b^c in next round
+ ldr r1,[sp,#14*4] @ from future BODY_16_xx
+#endif
+ eor r0,r0,r5,ror#20 @ Sigma0(a)
+ and r12,r12,r3 @ (b^c)&=(a^b)
+ add r8,r8,r4 @ d+=h
+ eor r12,r12,r6 @ Maj(a,b,c)
+ add r4,r4,r0,ror#2 @ h+=Sigma0(a)
+ @ add r4,r4,r12 @ h+=Maj(a,b,c)
+#if __ARM_ARCH__>=7
+ ite eq @ Thumb2 thing, sanity check in ARM
+#endif
+ ldreq r3,[sp,#16*4] @ pull ctx
+ bne .Lrounds_16_xx
+
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldr r0,[r3,#0]
+ ldr r2,[r3,#4]
+ ldr r12,[r3,#8]
+ add r4,r4,r0
+ ldr r0,[r3,#12]
+ add r5,r5,r2
+ ldr r2,[r3,#16]
+ add r6,r6,r12
+ ldr r12,[r3,#20]
+ add r7,r7,r0
+ ldr r0,[r3,#24]
+ add r8,r8,r2
+ ldr r2,[r3,#28]
+ add r9,r9,r12
+ ldr r1,[sp,#17*4] @ pull inp
+ ldr r12,[sp,#18*4] @ pull inp+len
+ add r10,r10,r0
+ add r11,r11,r2
+ stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
+ cmp r1,r12
+ sub r14,r14,#256 @ rewind Ktbl
+ bne .Loop
+
+ add sp,sp,#19*4 @ destroy frame
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r11,pc}
+#else
+ ldmia sp!,{r4-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size sha256_block_data_order,.-sha256_block_data_order
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.global sha256_block_data_order_neon
+.type sha256_block_data_order_neon,%function
+.align 4
+sha256_block_data_order_neon:
+.LNEON:
+ stmdb sp!,{r4-r12,lr}
+
+ sub r11,sp,#16*4+16
+ adrl r14,K256
+ bic r11,r11,#15 @ align for 128-bit stores
+ mov r12,sp
+ mov sp,r11 @ alloca
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+
+ vld1.8 {q0},[r1]!
+ vld1.8 {q1},[r1]!
+ vld1.8 {q2},[r1]!
+ vld1.8 {q3},[r1]!
+ vld1.32 {q8},[r14,:128]!
+ vld1.32 {q9},[r14,:128]!
+ vld1.32 {q10},[r14,:128]!
+ vld1.32 {q11},[r14,:128]!
+ vrev32.8 q0,q0 @ yes, even on
+ str r0,[sp,#64]
+ vrev32.8 q1,q1 @ big-endian
+ str r1,[sp,#68]
+ mov r1,sp
+ vrev32.8 q2,q2
+ str r2,[sp,#72]
+ vrev32.8 q3,q3
+ str r12,[sp,#76] @ save original sp
+ vadd.i32 q8,q8,q0
+ vadd.i32 q9,q9,q1
+ vst1.32 {q8},[r1,:128]!
+ vadd.i32 q10,q10,q2
+ vst1.32 {q9},[r1,:128]!
+ vadd.i32 q11,q11,q3
+ vst1.32 {q10},[r1,:128]!
+ vst1.32 {q11},[r1,:128]!
+
+ ldmia r0,{r4-r11}
+ sub r1,r1,#64
+ ldr r2,[sp,#0]
+ eor r12,r12,r12
+ eor r3,r5,r6
+ b .L_00_48
+
+.align 4
+.L_00_48:
+ vext.8 q8,q0,q1,#4
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ vext.8 q9,q2,q3,#4
+ add r4,r4,r12
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vadd.i32 q0,q0,q9
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#4]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ veor q9,q9,q10
+ add r10,r10,r2
+ vsli.32 q11,q8,#14
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ vshr.u32 d24,d7,#17
+ add r11,r11,r3
+ and r2,r2,r7
+ veor q9,q9,q11
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ vsli.32 d24,d7,#15
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ vshr.u32 d25,d7,#10
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ vadd.i32 q0,q0,q9
+ add r10,r10,r2
+ ldr r2,[sp,#8]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r6,r6,r10
+ vshr.u32 d24,d7,#19
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ vsli.32 d24,d7,#13
+ add r9,r9,r2
+ eor r2,r7,r8
+ veor d25,d25,d24
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ vadd.i32 d0,d0,d25
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ vshr.u32 d24,d0,#17
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ vsli.32 d24,d0,#15
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ vshr.u32 d25,d0,#10
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#12]
+ and r3,r3,r12
+ vshr.u32 d24,d0,#19
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ vld1.32 {q8},[r14,:128]!
+ add r8,r8,r2
+ vsli.32 d24,d0,#13
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ veor d25,d25,d24
+ add r9,r9,r3
+ and r2,r2,r5
+ vadd.i32 d1,d1,d25
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ vadd.i32 q8,q8,q0
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#16]
+ and r12,r12,r3
+ add r4,r4,r8
+ vst1.32 {q8},[r1,:128]!
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vext.8 q8,q1,q2,#4
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ vext.8 q9,q3,q0,#4
+ add r8,r8,r12
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vadd.i32 q1,q1,q9
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#20]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ veor q9,q9,q10
+ add r6,r6,r2
+ vsli.32 q11,q8,#14
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ vshr.u32 d24,d1,#17
+ add r7,r7,r3
+ and r2,r2,r11
+ veor q9,q9,q11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ vsli.32 d24,d1,#15
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ vshr.u32 d25,d1,#10
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ vadd.i32 q1,q1,q9
+ add r6,r6,r2
+ ldr r2,[sp,#24]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r10,r10,r6
+ vshr.u32 d24,d1,#19
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ vsli.32 d24,d1,#13
+ add r5,r5,r2
+ eor r2,r11,r4
+ veor d25,d25,d24
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ vadd.i32 d2,d2,d25
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ vshr.u32 d24,d2,#17
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ vsli.32 d24,d2,#15
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ vshr.u32 d25,d2,#10
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#28]
+ and r3,r3,r12
+ vshr.u32 d24,d2,#19
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ vld1.32 {q8},[r14,:128]!
+ add r4,r4,r2
+ vsli.32 d24,d2,#13
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ veor d25,d25,d24
+ add r5,r5,r3
+ and r2,r2,r9
+ vadd.i32 d3,d3,d25
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ vadd.i32 q8,q8,q1
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#32]
+ and r12,r12,r3
+ add r8,r8,r4
+ vst1.32 {q8},[r1,:128]!
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vext.8 q8,q2,q3,#4
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ vext.8 q9,q0,q1,#4
+ add r4,r4,r12
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vadd.i32 q2,q2,q9
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#36]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ veor q9,q9,q10
+ add r10,r10,r2
+ vsli.32 q11,q8,#14
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ vshr.u32 d24,d3,#17
+ add r11,r11,r3
+ and r2,r2,r7
+ veor q9,q9,q11
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ vsli.32 d24,d3,#15
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ vshr.u32 d25,d3,#10
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ vadd.i32 q2,q2,q9
+ add r10,r10,r2
+ ldr r2,[sp,#40]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r6,r6,r10
+ vshr.u32 d24,d3,#19
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ vsli.32 d24,d3,#13
+ add r9,r9,r2
+ eor r2,r7,r8
+ veor d25,d25,d24
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ vadd.i32 d4,d4,d25
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ vshr.u32 d24,d4,#17
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ vsli.32 d24,d4,#15
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ vshr.u32 d25,d4,#10
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#44]
+ and r3,r3,r12
+ vshr.u32 d24,d4,#19
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ vld1.32 {q8},[r14,:128]!
+ add r8,r8,r2
+ vsli.32 d24,d4,#13
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ veor d25,d25,d24
+ add r9,r9,r3
+ and r2,r2,r5
+ vadd.i32 d5,d5,d25
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ vadd.i32 q8,q8,q2
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#48]
+ and r12,r12,r3
+ add r4,r4,r8
+ vst1.32 {q8},[r1,:128]!
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vext.8 q8,q3,q0,#4
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ vext.8 q9,q1,q2,#4
+ add r8,r8,r12
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ vshr.u32 q10,q8,#7
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vadd.i32 q3,q3,q9
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ vshr.u32 q9,q8,#3
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vsli.32 q10,q8,#25
+ ldr r2,[sp,#52]
+ and r3,r3,r12
+ vshr.u32 q11,q8,#18
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ veor q9,q9,q10
+ add r6,r6,r2
+ vsli.32 q11,q8,#14
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ vshr.u32 d24,d5,#17
+ add r7,r7,r3
+ and r2,r2,r11
+ veor q9,q9,q11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ vsli.32 d24,d5,#15
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ vshr.u32 d25,d5,#10
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ vadd.i32 q3,q3,q9
+ add r6,r6,r2
+ ldr r2,[sp,#56]
+ veor d25,d25,d24
+ and r12,r12,r3
+ add r10,r10,r6
+ vshr.u32 d24,d5,#19
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ vsli.32 d24,d5,#13
+ add r5,r5,r2
+ eor r2,r11,r4
+ veor d25,d25,d24
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ vadd.i32 d6,d6,d25
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ vshr.u32 d24,d6,#17
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ vsli.32 d24,d6,#15
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ vshr.u32 d25,d6,#10
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ veor d25,d25,d24
+ ldr r2,[sp,#60]
+ and r3,r3,r12
+ vshr.u32 d24,d6,#19
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ vld1.32 {q8},[r14,:128]!
+ add r4,r4,r2
+ vsli.32 d24,d6,#13
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ veor d25,d25,d24
+ add r5,r5,r3
+ and r2,r2,r9
+ vadd.i32 d7,d7,d25
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ vadd.i32 q8,q8,q3
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[r14]
+ and r12,r12,r3
+ add r8,r8,r4
+ vst1.32 {q8},[r1,:128]!
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ teq r2,#0 @ check for K256 terminator
+ ldr r2,[sp,#0]
+ sub r1,r1,#64
+ bne .L_00_48
+
+ ldr r1,[sp,#68]
+ ldr r0,[sp,#72]
+ sub r14,r14,#256 @ rewind r14
+ teq r1,r0
+ it eq
+ subeq r1,r1,#64 @ avoid SEGV
+ vld1.8 {q0},[r1]! @ load next input block
+ vld1.8 {q1},[r1]!
+ vld1.8 {q2},[r1]!
+ vld1.8 {q3},[r1]!
+ it ne
+ strne r1,[sp,#68]
+ mov r1,sp
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vrev32.8 q0,q0
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vadd.i32 q8,q8,q0
+ ldr r2,[sp,#4]
+ and r3,r3,r12
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ add r10,r10,r2
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3
+ and r2,r2,r7
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ add r10,r10,r2
+ ldr r2,[sp,#8]
+ and r12,r12,r3
+ add r6,r6,r10
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ add r9,r9,r2
+ eor r2,r7,r8
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ ldr r2,[sp,#12]
+ and r3,r3,r12
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ add r8,r8,r2
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3
+ and r2,r2,r5
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#16]
+ and r12,r12,r3
+ add r4,r4,r8
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vst1.32 {q8},[r1,:128]!
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vrev32.8 q1,q1
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vadd.i32 q8,q8,q1
+ ldr r2,[sp,#20]
+ and r3,r3,r12
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ add r6,r6,r2
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3
+ and r2,r2,r11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ add r6,r6,r2
+ ldr r2,[sp,#24]
+ and r12,r12,r3
+ add r10,r10,r6
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ add r5,r5,r2
+ eor r2,r11,r4
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ ldr r2,[sp,#28]
+ and r3,r3,r12
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ add r4,r4,r2
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3
+ and r2,r2,r9
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#32]
+ and r12,r12,r3
+ add r8,r8,r4
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vst1.32 {q8},[r1,:128]!
+ add r11,r11,r2
+ eor r2,r9,r10
+ eor r0,r8,r8,ror#5
+ add r4,r4,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r8
+ eor r12,r0,r8,ror#19
+ eor r0,r4,r4,ror#11
+ eor r2,r2,r10
+ vrev32.8 q2,q2
+ add r11,r11,r12,ror#6
+ eor r12,r4,r5
+ eor r0,r0,r4,ror#20
+ add r11,r11,r2
+ vadd.i32 q8,q8,q2
+ ldr r2,[sp,#36]
+ and r3,r3,r12
+ add r7,r7,r11
+ add r11,r11,r0,ror#2
+ eor r3,r3,r5
+ add r10,r10,r2
+ eor r2,r8,r9
+ eor r0,r7,r7,ror#5
+ add r11,r11,r3
+ and r2,r2,r7
+ eor r3,r0,r7,ror#19
+ eor r0,r11,r11,ror#11
+ eor r2,r2,r9
+ add r10,r10,r3,ror#6
+ eor r3,r11,r4
+ eor r0,r0,r11,ror#20
+ add r10,r10,r2
+ ldr r2,[sp,#40]
+ and r12,r12,r3
+ add r6,r6,r10
+ add r10,r10,r0,ror#2
+ eor r12,r12,r4
+ add r9,r9,r2
+ eor r2,r7,r8
+ eor r0,r6,r6,ror#5
+ add r10,r10,r12
+ and r2,r2,r6
+ eor r12,r0,r6,ror#19
+ eor r0,r10,r10,ror#11
+ eor r2,r2,r8
+ add r9,r9,r12,ror#6
+ eor r12,r10,r11
+ eor r0,r0,r10,ror#20
+ add r9,r9,r2
+ ldr r2,[sp,#44]
+ and r3,r3,r12
+ add r5,r5,r9
+ add r9,r9,r0,ror#2
+ eor r3,r3,r11
+ add r8,r8,r2
+ eor r2,r6,r7
+ eor r0,r5,r5,ror#5
+ add r9,r9,r3
+ and r2,r2,r5
+ eor r3,r0,r5,ror#19
+ eor r0,r9,r9,ror#11
+ eor r2,r2,r7
+ add r8,r8,r3,ror#6
+ eor r3,r9,r10
+ eor r0,r0,r9,ror#20
+ add r8,r8,r2
+ ldr r2,[sp,#48]
+ and r12,r12,r3
+ add r4,r4,r8
+ add r8,r8,r0,ror#2
+ eor r12,r12,r10
+ vst1.32 {q8},[r1,:128]!
+ add r7,r7,r2
+ eor r2,r5,r6
+ eor r0,r4,r4,ror#5
+ add r8,r8,r12
+ vld1.32 {q8},[r14,:128]!
+ and r2,r2,r4
+ eor r12,r0,r4,ror#19
+ eor r0,r8,r8,ror#11
+ eor r2,r2,r6
+ vrev32.8 q3,q3
+ add r7,r7,r12,ror#6
+ eor r12,r8,r9
+ eor r0,r0,r8,ror#20
+ add r7,r7,r2
+ vadd.i32 q8,q8,q3
+ ldr r2,[sp,#52]
+ and r3,r3,r12
+ add r11,r11,r7
+ add r7,r7,r0,ror#2
+ eor r3,r3,r9
+ add r6,r6,r2
+ eor r2,r4,r5
+ eor r0,r11,r11,ror#5
+ add r7,r7,r3
+ and r2,r2,r11
+ eor r3,r0,r11,ror#19
+ eor r0,r7,r7,ror#11
+ eor r2,r2,r5
+ add r6,r6,r3,ror#6
+ eor r3,r7,r8
+ eor r0,r0,r7,ror#20
+ add r6,r6,r2
+ ldr r2,[sp,#56]
+ and r12,r12,r3
+ add r10,r10,r6
+ add r6,r6,r0,ror#2
+ eor r12,r12,r8
+ add r5,r5,r2
+ eor r2,r11,r4
+ eor r0,r10,r10,ror#5
+ add r6,r6,r12
+ and r2,r2,r10
+ eor r12,r0,r10,ror#19
+ eor r0,r6,r6,ror#11
+ eor r2,r2,r4
+ add r5,r5,r12,ror#6
+ eor r12,r6,r7
+ eor r0,r0,r6,ror#20
+ add r5,r5,r2
+ ldr r2,[sp,#60]
+ and r3,r3,r12
+ add r9,r9,r5
+ add r5,r5,r0,ror#2
+ eor r3,r3,r7
+ add r4,r4,r2
+ eor r2,r10,r11
+ eor r0,r9,r9,ror#5
+ add r5,r5,r3
+ and r2,r2,r9
+ eor r3,r0,r9,ror#19
+ eor r0,r5,r5,ror#11
+ eor r2,r2,r11
+ add r4,r4,r3,ror#6
+ eor r3,r5,r6
+ eor r0,r0,r5,ror#20
+ add r4,r4,r2
+ ldr r2,[sp,#64]
+ and r12,r12,r3
+ add r8,r8,r4
+ add r4,r4,r0,ror#2
+ eor r12,r12,r6
+ vst1.32 {q8},[r1,:128]!
+ ldr r0,[r2,#0]
+ add r4,r4,r12 @ h+=Maj(a,b,c) from the past
+ ldr r12,[r2,#4]
+ ldr r3,[r2,#8]
+ ldr r1,[r2,#12]
+ add r4,r4,r0 @ accumulate
+ ldr r0,[r2,#16]
+ add r5,r5,r12
+ ldr r12,[r2,#20]
+ add r6,r6,r3
+ ldr r3,[r2,#24]
+ add r7,r7,r1
+ ldr r1,[r2,#28]
+ add r8,r8,r0
+ str r4,[r2],#4
+ add r9,r9,r12
+ str r5,[r2],#4
+ add r10,r10,r3
+ str r6,[r2],#4
+ add r11,r11,r1
+ str r7,[r2],#4
+ stmia r2,{r8-r11}
+
+ ittte ne
+ movne r1,sp
+ ldrne r2,[sp,#0]
+ eorne r12,r12,r12
+ ldreq sp,[sp,#76] @ restore original sp
+ itt ne
+ eorne r3,r5,r6
+ bne .L_00_48
+
+ ldmia sp!,{r4-r12,pc}
+.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
+#endif
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+
+# ifdef __thumb2__
+# define INST(a,b,c,d) .byte c,d|0xc,a,b
+# else
+# define INST(a,b,c,d) .byte a,b,c,d
+# endif
+
+.type sha256_block_data_order_armv8,%function
+.align 5
+sha256_block_data_order_armv8:
+.LARMv8:
+ vld1.32 {q0,q1},[r0]
+# ifdef __thumb2__
+ adr r3,.LARMv8
+ sub r3,r3,#.LARMv8-K256
+# else
+ adrl r3,K256
+# endif
+ add r2,r1,r2,lsl#6 @ len to point at the end of inp
+
+.Loop_v8:
+ vld1.8 {q8-q9},[r1]!
+ vld1.8 {q10-q11},[r1]!
+ vld1.32 {q12},[r3]!
+ vrev32.8 q8,q8
+ vrev32.8 q9,q9
+ vrev32.8 q10,q10
+ vrev32.8 q11,q11
+ vmov q14,q0 @ offload
+ vmov q15,q1
+ teq r1,r2
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q10
+ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q11
+ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
+ vld1.32 {q13},[r3]!
+ vadd.i32 q12,q12,q8
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+
+ vld1.32 {q12},[r3]!
+ vadd.i32 q13,q13,q9
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+
+ vld1.32 {q13},[r3]
+ vadd.i32 q12,q12,q10
+ sub r3,r3,#256-16 @ rewind
+ vmov q2,q0
+ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
+ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
+
+ vadd.i32 q13,q13,q11
+ vmov q2,q0
+ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
+ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
+
+ vadd.i32 q0,q0,q14
+ vadd.i32 q1,q1,q15
+ it ne
+ bne .Loop_v8
+
+ vst1.32 {q0,q1},[r0]
+
+ bx lr @ bx lr
+.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
+#endif
+.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
+.align 2
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+.comm OPENSSL_armcap_P,4,4
+#endif
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
new file mode 100644
index 000000000000..a84e869ef900
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.c
@@ -0,0 +1,128 @@
+/*
+ * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
+ * using optimized ARM assembler and NEON instructions.
+ *
+ * Copyright 2015 Google Inc.
+ *
+ * This file is based on sha256_ssse3_glue.c:
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/crypto.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+#include "sha256_glue.h"
+
+asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
+ unsigned int num_blks);
+
+int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ /* make sure casting to sha256_block_fn() is safe */
+ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
+
+ return sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_block_data_order);
+}
+EXPORT_SYMBOL(crypto_sha256_arm_update);
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ sha256_base_do_finalize(desc,
+ (sha256_block_fn *)sha256_block_data_order);
+ return sha256_base_finish(desc, out);
+}
+
+int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_block_data_order);
+ return sha256_final(desc, out);
+}
+EXPORT_SYMBOL(crypto_sha256_arm_finup);
+
+static struct shash_alg algs[] = { {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = sha256_base_init,
+ .update = crypto_sha256_arm_update,
+ .final = sha256_final,
+ .finup = crypto_sha256_arm_finup,
+ .descsize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-asm",
+ .cra_priority = 150,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_base_init,
+ .update = crypto_sha256_arm_update,
+ .final = sha256_final,
+ .finup = crypto_sha256_arm_finup,
+ .descsize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-asm",
+ .cra_priority = 150,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init sha256_mod_init(void)
+{
+ int res = crypto_register_shashes(algs, ARRAY_SIZE(algs));
+
+ if (res < 0)
+ return res;
+
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon()) {
+ res = crypto_register_shashes(sha256_neon_algs,
+ ARRAY_SIZE(sha256_neon_algs));
+
+ if (res < 0)
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+ }
+
+ return res;
+}
+
+static void __exit sha256_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && cpu_has_neon())
+ crypto_unregister_shashes(sha256_neon_algs,
+ ARRAY_SIZE(sha256_neon_algs));
+}
+
+module_init(sha256_mod_init);
+module_exit(sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm (ARM), including NEON");
+
+MODULE_ALIAS_CRYPTO("sha256");
diff --git a/arch/arm/crypto/sha256_glue.h b/arch/arm/crypto/sha256_glue.h
new file mode 100644
index 000000000000..7cf0bf786ada
--- /dev/null
+++ b/arch/arm/crypto/sha256_glue.h
@@ -0,0 +1,14 @@
+#ifndef _CRYPTO_SHA256_GLUE_H
+#define _CRYPTO_SHA256_GLUE_H
+
+#include <linux/crypto.h>
+
+extern struct shash_alg sha256_neon_algs[2];
+
+int crypto_sha256_arm_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len);
+
+int crypto_sha256_arm_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *hash);
+
+#endif /* _CRYPTO_SHA256_GLUE_H */
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
new file mode 100644
index 000000000000..39ccd658817e
--- /dev/null
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -0,0 +1,101 @@
+/*
+ * Glue code for the SHA256 Secure Hash Algorithm assembly implementation
+ * using NEON instructions.
+ *
+ * Copyright 2015 Google Inc.
+ *
+ * This file is based on sha512_neon_glue.c:
+ * Copyright 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
+#include <asm/byteorder.h>
+#include <asm/simd.h>
+#include <asm/neon.h>
+
+#include "sha256_glue.h"
+
+asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
+ unsigned int num_blks);
+
+static int sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ if (!may_use_simd() ||
+ (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+ return crypto_sha256_arm_update(desc, data, len);
+
+ kernel_neon_begin();
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_block_data_order_neon);
+ kernel_neon_end();
+
+ return 0;
+}
+
+static int sha256_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ if (!may_use_simd())
+ return crypto_sha256_arm_finup(desc, data, len, out);
+
+ kernel_neon_begin();
+ if (len)
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_block_data_order_neon);
+ sha256_base_do_finalize(desc,
+ (sha256_block_fn *)sha256_block_data_order_neon);
+ kernel_neon_end();
+
+ return sha256_base_finish(desc, out);
+}
+
+static int sha256_final(struct shash_desc *desc, u8 *out)
+{
+ return sha256_finup(desc, NULL, 0, out);
+}
+
+struct shash_alg sha256_neon_algs[] = { {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = sha256_base_init,
+ .update = sha256_update,
+ .final = sha256_final,
+ .finup = sha256_finup,
+ .descsize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name = "sha256-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = sha224_base_init,
+ .update = sha256_update,
+ .final = sha256_final,
+ .finup = sha256_finup,
+ .descsize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name = "sha224-neon",
+ .cra_priority = 250,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index b1b5b893eb20..05d9e16c0dfd 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -284,7 +284,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__ecb-aes-" MODE,
.cra_driver_name = "__driver-ecb-aes-" MODE,
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@@ -302,7 +303,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__cbc-aes-" MODE,
.cra_driver_name = "__driver-cbc-aes-" MODE,
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@@ -320,7 +322,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__ctr-aes-" MODE,
.cra_driver_name = "__driver-ctr-aes-" MODE,
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_alignmask = 7,
@@ -338,7 +341,8 @@ static struct crypto_alg aes_algs[] = { {
.cra_name = "__xts-aes-" MODE,
.cra_driver_name = "__driver-xts-aes-" MODE,
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
.cra_alignmask = 7,
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S
index 09d57d98609c..033aae6d732a 100644
--- a/arch/arm64/crypto/sha1-ce-core.S
+++ b/arch/arm64/crypto/sha1-ce-core.S
@@ -66,8 +66,8 @@
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
/*
- * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
- * u8 *head, long bytes)
+ * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+ * int blocks)
*/
ENTRY(sha1_ce_transform)
/* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
ld1r {k3.4s}, [x6]
/* load state */
- ldr dga, [x2]
- ldr dgb, [x2, #16]
+ ldr dga, [x0]
+ ldr dgb, [x0, #16]
- /* load partial state (if supplied) */
- cbz x3, 0f
- ld1 {v8.4s-v11.4s}, [x3]
- b 1f
+ /* load sha1_ce_state::finalize */
+ ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
/* load input */
0: ld1 {v8.4s-v11.4s}, [x1], #64
- sub w0, w0, #1
+ sub w2, w2, #1
-1:
CPU_LE( rev32 v8.16b, v8.16b )
CPU_LE( rev32 v9.16b, v9.16b )
CPU_LE( rev32 v10.16b, v10.16b )
CPU_LE( rev32 v11.16b, v11.16b )
-2: add t0.4s, v8.4s, k0.4s
+1: add t0.4s, v8.4s, k0.4s
mov dg0v.16b, dgav.16b
add_update c, ev, k0, 8, 9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE( rev32 v11.16b, v11.16b )
add dgbv.2s, dgbv.2s, dg1v.2s
add dgav.4s, dgav.4s, dg0v.4s
- cbnz w0, 0b
+ cbnz w2, 0b
/*
* Final block: add padding and total bit count.
- * Skip if we have no total byte count in x4. In that case, the input
- * size was not a round multiple of the block size, and the padding is
- * handled by the C code.
+ * Skip if the input size was not a round multiple of the block size,
+ * the padding is handled by the C code in that case.
*/
cbz x4, 3f
+ ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
movi v9.2d, #0
mov x8, #0x80000000
movi v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE( rev32 v11.16b, v11.16b )
mov x4, #0
mov v11.d[0], xzr
mov v11.d[1], x7
- b 2b
+ b 1b
/* store new state */
-3: str dga, [x2]
- str dgb, [x2, #16]
+3: str dga, [x0]
+ str dgb, [x0, #16]
ret
ENDPROC(sha1_ce_transform)
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 6fe83f37a750..114e7cc5de8c 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,144 +12,81 @@
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
+#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
+#define ASM_EXPORT(sym, val) \
+ asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
-asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
- u8 *head, long bytes);
+struct sha1_ce_state {
+ struct sha1_state sst;
+ u32 finalize;
+};
-static int sha1_init(struct shash_desc *desc)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
+asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+ int blocks);
- *sctx = (struct sha1_state){
- .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
- };
- return 0;
-}
-
-static int sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
- sctx->count += len;
-
- if ((partial + len) >= SHA1_BLOCK_SIZE) {
- int blocks;
-
- if (partial) {
- int p = SHA1_BLOCK_SIZE - partial;
+ struct sha1_ce_state *sctx = shash_desc_ctx(desc);
- memcpy(sctx->buffer + partial, data, p);
- data += p;
- len -= p;
- }
-
- blocks = len / SHA1_BLOCK_SIZE;
- len %= SHA1_BLOCK_SIZE;
-
- kernel_neon_begin_partial(16);
- sha1_ce_transform(blocks, data, sctx->state,
- partial ? sctx->buffer : NULL, 0);
- kernel_neon_end();
+ sctx->finalize = 0;
+ kernel_neon_begin_partial(16);
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_ce_transform);
+ kernel_neon_end();
- data += blocks * SHA1_BLOCK_SIZE;
- partial = 0;
- }
- if (len)
- memcpy(sctx->buffer + partial, data, len);
return 0;
}
-static int sha1_final(struct shash_desc *desc, u8 *out)
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+ struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+ bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be64 bits = cpu_to_be64(sctx->count << 3);
- __be32 *dst = (__be32 *)out;
- int i;
-
- u32 padlen = SHA1_BLOCK_SIZE
- - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
-
- sha1_update(desc, padding, padlen);
- sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
- for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha1_state){};
- return 0;
-}
-
-static int sha1_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int blocks;
- int i;
-
- if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
- sha1_update(desc, data, len);
- return sha1_final(desc, out);
- }
+ ASM_EXPORT(sha1_ce_offsetof_count,
+ offsetof(struct sha1_ce_state, sst.count));
+ ASM_EXPORT(sha1_ce_offsetof_finalize,
+ offsetof(struct sha1_ce_state, finalize));
/*
- * Use a fast path if the input is a multiple of 64 bytes. In
- * this case, there is no need to copy data around, and we can
- * perform the entire digest calculation in a single invocation
- * of sha1_ce_transform()
+ * Allow the asm code to perform the finalization if there is no
+ * partial data and the input is a round multiple of the block size.
*/
- blocks = len / SHA1_BLOCK_SIZE;
+ sctx->finalize = finalize;
kernel_neon_begin_partial(16);
- sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_ce_transform);
+ if (!finalize)
+ sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
kernel_neon_end();
-
- for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha1_state){};
- return 0;
+ return sha1_base_finish(desc, out);
}
-static int sha1_export(struct shash_desc *desc, void *out)
+static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- struct sha1_state *dst = out;
-
- *dst = *sctx;
- return 0;
-}
-
-static int sha1_import(struct shash_desc *desc, const void *in)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- struct sha1_state const *src = in;
-
- *sctx = *src;
- return 0;
+ kernel_neon_begin_partial(16);
+ sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
+ kernel_neon_end();
+ return sha1_base_finish(desc, out);
}
static struct shash_alg alg = {
- .init = sha1_init,
- .update = sha1_update,
- .final = sha1_final,
- .finup = sha1_finup,
- .export = sha1_export,
- .import = sha1_import,
- .descsize = sizeof(struct sha1_state),
+ .init = sha1_base_init,
+ .update = sha1_ce_update,
+ .final = sha1_ce_final,
+ .finup = sha1_ce_finup,
+ .descsize = sizeof(struct sha1_ce_state),
.digestsize = SHA1_DIGEST_SIZE,
- .statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-ce",
diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S
index 7f29fc031ea8..5df9d9d470ad 100644
--- a/arch/arm64/crypto/sha2-ce-core.S
+++ b/arch/arm64/crypto/sha2-ce-core.S
@@ -73,8 +73,8 @@
.word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
/*
- * void sha2_ce_transform(int blocks, u8 const *src, u32 *state,
- * u8 *head, long bytes)
+ * void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+ * int blocks)
*/
ENTRY(sha2_ce_transform)
/* load round constants */
@@ -85,24 +85,21 @@ ENTRY(sha2_ce_transform)
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
- ldp dga, dgb, [x2]
+ ldp dga, dgb, [x0]
- /* load partial input (if supplied) */
- cbz x3, 0f
- ld1 {v16.4s-v19.4s}, [x3]
- b 1f
+ /* load sha256_ce_state::finalize */
+ ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
/* load input */
0: ld1 {v16.4s-v19.4s}, [x1], #64
- sub w0, w0, #1
+ sub w2, w2, #1
-1:
CPU_LE( rev32 v16.16b, v16.16b )
CPU_LE( rev32 v17.16b, v17.16b )
CPU_LE( rev32 v18.16b, v18.16b )
CPU_LE( rev32 v19.16b, v19.16b )
-2: add t0.4s, v16.4s, v0.4s
+1: add t0.4s, v16.4s, v0.4s
mov dg0v.16b, dgav.16b
mov dg1v.16b, dgbv.16b
@@ -131,15 +128,15 @@ CPU_LE( rev32 v19.16b, v19.16b )
add dgbv.4s, dgbv.4s, dg1v.4s
/* handled all input blocks? */
- cbnz w0, 0b
+ cbnz w2, 0b
/*
* Final block: add padding and total bit count.
- * Skip if we have no total byte count in x4. In that case, the input
- * size was not a round multiple of the block size, and the padding is
- * handled by the C code.
+ * Skip if the input size was not a round multiple of the block size,
+ * the padding is handled by the C code in that case.
*/
cbz x4, 3f
+ ldr x4, [x0, #:lo12:sha256_ce_offsetof_count]
movi v17.2d, #0
mov x8, #0x80000000
movi v18.2d, #0
@@ -148,9 +145,9 @@ CPU_LE( rev32 v19.16b, v19.16b )
mov x4, #0
mov v19.d[0], xzr
mov v19.d[1], x7
- b 2b
+ b 1b
/* store new state */
-3: stp dga, dgb, [x2]
+3: stp dga, dgb, [x0]
ret
ENDPROC(sha2_ce_transform)
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index ae67e88c28b9..1340e44c048b 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -12,206 +12,82 @@
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
+#define ASM_EXPORT(sym, val) \
+ asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
-asmlinkage int sha2_ce_transform(int blocks, u8 const *src, u32 *state,
- u8 *head, long bytes);
-
-static int sha224_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha256_state){
- .state = {
- SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
- SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
- }
- };
- return 0;
-}
-
-static int sha256_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha256_state){
- .state = {
- SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
- SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
- }
- };
- return 0;
-}
-
-static int sha2_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
-
- sctx->count += len;
-
- if ((partial + len) >= SHA256_BLOCK_SIZE) {
- int blocks;
-
- if (partial) {
- int p = SHA256_BLOCK_SIZE - partial;
-
- memcpy(sctx->buf + partial, data, p);
- data += p;
- len -= p;
- }
+struct sha256_ce_state {
+ struct sha256_state sst;
+ u32 finalize;
+};
- blocks = len / SHA256_BLOCK_SIZE;
- len %= SHA256_BLOCK_SIZE;
+asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
+ int blocks);
- kernel_neon_begin_partial(28);
- sha2_ce_transform(blocks, data, sctx->state,
- partial ? sctx->buf : NULL, 0);
- kernel_neon_end();
-
- data += blocks * SHA256_BLOCK_SIZE;
- partial = 0;
- }
- if (len)
- memcpy(sctx->buf + partial, data, len);
- return 0;
-}
-
-static void sha2_final(struct shash_desc *desc)
+static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
- static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be64 bits = cpu_to_be64(sctx->count << 3);
- u32 padlen = SHA256_BLOCK_SIZE
- - ((sctx->count + sizeof(bits)) % SHA256_BLOCK_SIZE);
-
- sha2_update(desc, padding, padlen);
- sha2_update(desc, (const u8 *)&bits, sizeof(bits));
-}
-
-static int sha224_final(struct shash_desc *desc, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int i;
-
- sha2_final(desc);
-
- for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha256_state){};
- return 0;
-}
+ struct sha256_ce_state *sctx = shash_desc_ctx(desc);
-static int sha256_final(struct shash_desc *desc, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int i;
-
- sha2_final(desc);
-
- for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
+ sctx->finalize = 0;
+ kernel_neon_begin_partial(28);
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha2_ce_transform);
+ kernel_neon_end();
- *sctx = (struct sha256_state){};
return 0;
}
-static void sha2_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- int blocks;
+ struct sha256_ce_state *sctx = shash_desc_ctx(desc);
+ bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
- if (sctx->count || !len || (len % SHA256_BLOCK_SIZE)) {
- sha2_update(desc, data, len);
- sha2_final(desc);
- return;
- }
+ ASM_EXPORT(sha256_ce_offsetof_count,
+ offsetof(struct sha256_ce_state, sst.count));
+ ASM_EXPORT(sha256_ce_offsetof_finalize,
+ offsetof(struct sha256_ce_state, finalize));
/*
- * Use a fast path if the input is a multiple of 64 bytes. In
- * this case, there is no need to copy data around, and we can
- * perform the entire digest calculation in a single invocation
- * of sha2_ce_transform()
+ * Allow the asm code to perform the finalization if there is no
+ * partial data and the input is a round multiple of the block size.
*/
- blocks = len / SHA256_BLOCK_SIZE;
+ sctx->finalize = finalize;
kernel_neon_begin_partial(28);
- sha2_ce_transform(blocks, data, sctx->state, NULL, len);
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha2_ce_transform);
+ if (!finalize)
+ sha256_base_do_finalize(desc,
+ (sha256_block_fn *)sha2_ce_transform);
kernel_neon_end();
+ return sha256_base_finish(desc, out);
}
-static int sha224_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
+static int sha256_ce_final(struct shash_desc *desc, u8 *out)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int i;
-
- sha2_finup(desc, data, len);
-
- for (i = 0; i < SHA224_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha256_state){};
- return 0;
-}
-
-static int sha256_finup(struct shash_desc *desc, const u8 *data,
- unsigned int len, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- int i;
-
- sha2_finup(desc, data, len);
-
- for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(__be32); i++)
- put_unaligned_be32(sctx->state[i], dst++);
-
- *sctx = (struct sha256_state){};
- return 0;
-}
-
-static int sha2_export(struct shash_desc *desc, void *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- struct sha256_state *dst = out;
-
- *dst = *sctx;
- return 0;
-}
-
-static int sha2_import(struct shash_desc *desc, const void *in)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- struct sha256_state const *src = in;
-
- *sctx = *src;
- return 0;
+ kernel_neon_begin_partial(28);
+ sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
+ kernel_neon_end();
+ return sha256_base_finish(desc, out);
}
static struct shash_alg algs[] = { {
- .init = sha224_init,
- .update = sha2_update,
- .final = sha224_final,
- .finup = sha224_finup,
- .export = sha2_export,
- .import = sha2_import,
- .descsize = sizeof(struct sha256_state),
+ .init = sha224_base_init,
+ .update = sha256_ce_update,
+ .final = sha256_ce_final,
+ .finup = sha256_ce_finup,
+ .descsize = sizeof(struct sha256_ce_state),
.digestsize = SHA224_DIGEST_SIZE,
- .statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ce",
@@ -221,15 +97,12 @@ static struct shash_alg algs[] = { {
.cra_module = THIS_MODULE,
}
}, {
- .init = sha256_init,
- .update = sha2_update,
- .final = sha256_final,
- .finup = sha256_finup,
- .export = sha2_export,
- .import = sha2_import,
- .descsize = sizeof(struct sha256_state),
+ .init = sha256_base_init,
+ .update = sha256_ce_update,
+ .final = sha256_ce_final,
+ .finup = sha256_ce_finup,
+ .descsize = sizeof(struct sha256_ce_state),
.digestsize = SHA256_DIGEST_SIZE,
- .statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ce",
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile
index a74f76d85a2f..f7aa9d5d3b87 100644
--- a/arch/mips/cavium-octeon/crypto/Makefile
+++ b/arch/mips/cavium-octeon/crypto/Makefile
@@ -4,4 +4,7 @@
obj-y += octeon-crypto.o
-obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
+obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
+obj-$(CONFIG_CRYPTO_SHA1_OCTEON) += octeon-sha1.o
+obj-$(CONFIG_CRYPTO_SHA256_OCTEON) += octeon-sha256.o
+obj-$(CONFIG_CRYPTO_SHA512_OCTEON) += octeon-sha512.o
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
index 7c82ff463b65..f66bd1adc7ff 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -17,7 +17,7 @@
* crypto operations in calls to octeon_crypto_enable/disable in order to make
* sure the state of COP2 isn't corrupted if userspace is also performing
* hardware crypto operations. Allocate the state parameter on the stack.
- * Preemption must be disabled to prevent context switches.
+ * Returns with preemption disabled.
*
* @state: Pointer to state structure to store current COP2 state in.
*
@@ -28,6 +28,7 @@ unsigned long octeon_crypto_enable(struct octeon_cop2_state *state)
int status;
unsigned long flags;
+ preempt_disable();
local_irq_save(flags);
status = read_c0_status();
write_c0_status(status | ST0_CU2);
@@ -62,5 +63,6 @@ void octeon_crypto_disable(struct octeon_cop2_state *state,
else
write_c0_status(read_c0_status() & ~ST0_CU2);
local_irq_restore(flags);
+ preempt_enable();
}
EXPORT_SYMBOL_GPL(octeon_crypto_disable);
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
index e2a4aece9c24..355072535110 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
@@ -5,7 +5,8 @@
*
* Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved.
*
- * MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ * MD5/SHA1/SHA256/SHA512 instruction definitions added by
+ * Aaro Koskinen <aaro.koskinen@iki.fi>.
*
*/
#ifndef __LINUX_OCTEON_CRYPTO_H
@@ -21,11 +22,11 @@ extern void octeon_crypto_disable(struct octeon_cop2_state *state,
unsigned long flags);
/*
- * Macros needed to implement MD5:
+ * Macros needed to implement MD5/SHA1/SHA256:
*/
/*
- * The index can be 0-1.
+ * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
*/
#define write_octeon_64bit_hash_dword(value, index) \
do { \
@@ -36,7 +37,7 @@ do { \
} while (0)
/*
- * The index can be 0-1.
+ * The index can be 0-1 (MD5) or 0-2 (SHA1), 0-3 (SHA256).
*/
#define read_octeon_64bit_hash_dword(index) \
({ \
@@ -72,4 +73,78 @@ do { \
: [rt] "d" (value)); \
} while (0)
+/*
+ * The value is the final block dword (64-bit).
+ */
+#define octeon_sha1_start(value) \
+do { \
+ __asm__ __volatile__ ( \
+ "dmtc2 %[rt],0x4057" \
+ : \
+ : [rt] "d" (value)); \
+} while (0)
+
+/*
+ * The value is the final block dword (64-bit).
+ */
+#define octeon_sha256_start(value) \
+do { \
+ __asm__ __volatile__ ( \
+ "dmtc2 %[rt],0x404f" \
+ : \
+ : [rt] "d" (value)); \
+} while (0)
+
+/*
+ * Macros needed to implement SHA512:
+ */
+
+/*
+ * The index can be 0-7.
+ */
+#define write_octeon_64bit_hash_sha512(value, index) \
+do { \
+ __asm__ __volatile__ ( \
+ "dmtc2 %[rt],0x0250+" STR(index) \
+ : \
+ : [rt] "d" (value)); \
+} while (0)
+
+/*
+ * The index can be 0-7.
+ */
+#define read_octeon_64bit_hash_sha512(index) \
+({ \
+ u64 __value; \
+ \
+ __asm__ __volatile__ ( \
+ "dmfc2 %[rt],0x0250+" STR(index) \
+ : [rt] "=d" (__value) \
+ : ); \
+ \
+ __value; \
+})
+
+/*
+ * The index can be 0-14.
+ */
+#define write_octeon_64bit_block_sha512(value, index) \
+do { \
+ __asm__ __volatile__ ( \
+ "dmtc2 %[rt],0x0240+" STR(index) \
+ : \
+ : [rt] "d" (value)); \
+} while (0)
+
+/*
+ * The value is the final block word (64-bit).
+ */
+#define octeon_sha512_start(value) \
+do { \
+ __asm__ __volatile__ ( \
+ "dmtc2 %[rt],0x424f" \
+ : \
+ : [rt] "d" (value)); \
+} while (0)
+
#endif /* __LINUX_OCTEON_CRYPTO_H */
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
index b909881ba6c1..12dccdb38286 100644
--- a/arch/mips/cavium-octeon/crypto/octeon-md5.c
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -97,8 +97,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data,
avail);
- local_bh_disable();
- preempt_disable();
flags = octeon_crypto_enable(&state);
octeon_md5_store_hash(mctx);
@@ -114,8 +112,6 @@ static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
octeon_md5_read_hash(mctx);
octeon_crypto_disable(&state, flags);
- preempt_enable();
- local_bh_enable();
memcpy(mctx->block, data, len);
@@ -133,8 +129,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
*p++ = 0x80;
- local_bh_disable();
- preempt_disable();
flags = octeon_crypto_enable(&state);
octeon_md5_store_hash(mctx);
@@ -152,8 +146,6 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out)
octeon_md5_read_hash(mctx);
octeon_crypto_disable(&state, flags);
- preempt_enable();
- local_bh_enable();
memcpy(out, mctx->hash, sizeof(mctx->hash));
memset(mctx, 0, sizeof(*mctx));
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
new file mode 100644
index 000000000000..2b74b5b67cae
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c
@@ -0,0 +1,241 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA1 Secure Hash Algorithm.
+ *
+ * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
+ * Based on crypto/sha1_generic.c, which is:
+ *
+ * Copyright (c) Alan Smithee.
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/mm.h>
+#include <crypto/sha.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
+
+#include "octeon-crypto.h"
+
+/*
+ * We pass everything as 64-bit. OCTEON can handle misaligned data.
+ */
+
+static void octeon_sha1_store_hash(struct sha1_state *sctx)
+{
+ u64 *hash = (u64 *)sctx->state;
+ union {
+ u32 word[2];
+ u64 dword;
+ } hash_tail = { { sctx->state[4], } };
+
+ write_octeon_64bit_hash_dword(hash[0], 0);
+ write_octeon_64bit_hash_dword(hash[1], 1);
+ write_octeon_64bit_hash_dword(hash_tail.dword, 2);
+ memzero_explicit(&hash_tail.word[0], sizeof(hash_tail.word[0]));
+}
+
+static void octeon_sha1_read_hash(struct sha1_state *sctx)
+{
+ u64 *hash = (u64 *)sctx->state;
+ union {
+ u32 word[2];
+ u64 dword;
+ } hash_tail;
+
+ hash[0] = read_octeon_64bit_hash_dword(0);
+ hash[1] = read_octeon_64bit_hash_dword(1);
+ hash_tail.dword = read_octeon_64bit_hash_dword(2);
+ sctx->state[4] = hash_tail.word[0];
+ memzero_explicit(&hash_tail.dword, sizeof(hash_tail.dword));
+}
+
+static void octeon_sha1_transform(const void *_block)
+{
+ const u64 *block = _block;
+
+ write_octeon_64bit_block_dword(block[0], 0);
+ write_octeon_64bit_block_dword(block[1], 1);
+ write_octeon_64bit_block_dword(block[2], 2);
+ write_octeon_64bit_block_dword(block[3], 3);
+ write_octeon_64bit_block_dword(block[4], 4);
+ write_octeon_64bit_block_dword(block[5], 5);
+ write_octeon_64bit_block_dword(block[6], 6);
+ octeon_sha1_start(block[7]);
+}
+
+static int octeon_sha1_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA1_H0;
+ sctx->state[1] = SHA1_H1;
+ sctx->state[2] = SHA1_H2;
+ sctx->state[3] = SHA1_H3;
+ sctx->state[4] = SHA1_H4;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static void __octeon_sha1_update(struct sha1_state *sctx, const u8 *data,
+ unsigned int len)
+{
+ unsigned int partial;
+ unsigned int done;
+ const u8 *src;
+
+ partial = sctx->count % SHA1_BLOCK_SIZE;
+ sctx->count += len;
+ done = 0;
+ src = data;
+
+ if ((partial + len) >= SHA1_BLOCK_SIZE) {
+ if (partial) {
+ done = -partial;
+ memcpy(sctx->buffer + partial, data,
+ done + SHA1_BLOCK_SIZE);
+ src = sctx->buffer;
+ }
+
+ do {
+ octeon_sha1_transform(src);
+ done += SHA1_BLOCK_SIZE;
+ src = data + done;
+ } while (done + SHA1_BLOCK_SIZE <= len);
+
+ partial = 0;
+ }
+ memcpy(sctx->buffer + partial, src, len - done);
+}
+
+static int octeon_sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ struct octeon_cop2_state state;
+ unsigned long flags;
+
+ /*
+ * Small updates never reach the crypto engine, so the generic sha1 is
+ * faster because of the heavyweight octeon_crypto_enable() /
+ * octeon_crypto_disable().
+ */
+ if ((sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+ return crypto_sha1_update(desc, data, len);
+
+ flags = octeon_crypto_enable(&state);
+ octeon_sha1_store_hash(sctx);
+
+ __octeon_sha1_update(sctx, data, len);
+
+ octeon_sha1_read_hash(sctx);
+ octeon_crypto_disable(&state, flags);
+
+ return 0;
+}
+
+static int octeon_sha1_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ static const u8 padding[64] = { 0x80, };
+ struct octeon_cop2_state state;
+ __be32 *dst = (__be32 *)out;
+ unsigned int pad_len;
+ unsigned long flags;
+ unsigned int index;
+ __be64 bits;
+ int i;
+
+ /* Save number of bits. */
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64. */
+ index = sctx->count & 0x3f;
+ pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+
+ flags = octeon_crypto_enable(&state);
+ octeon_sha1_store_hash(sctx);
+
+ __octeon_sha1_update(sctx, padding, pad_len);
+
+ /* Append length (before padding). */
+ __octeon_sha1_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+ octeon_sha1_read_hash(sctx);
+ octeon_crypto_disable(&state, flags);
+
+ /* Store state in digest */
+ for (i = 0; i < 5; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Zeroize sensitive information. */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int octeon_sha1_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+ return 0;
+}
+
+static int octeon_sha1_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+ return 0;
+}
+
+static struct shash_alg octeon_sha1_alg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = octeon_sha1_init,
+ .update = octeon_sha1_update,
+ .final = octeon_sha1_final,
+ .export = octeon_sha1_export,
+ .import = octeon_sha1_import,
+ .descsize = sizeof(struct sha1_state),
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name= "octeon-sha1",
+ .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init octeon_sha1_mod_init(void)
+{
+ if (!octeon_has_crypto())
+ return -ENOTSUPP;
+ return crypto_register_shash(&octeon_sha1_alg);
+}
+
+static void __exit octeon_sha1_mod_fini(void)
+{
+ crypto_unregister_shash(&octeon_sha1_alg);
+}
+
+module_init(octeon_sha1_mod_init);
+module_exit(octeon_sha1_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (OCTEON)");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
new file mode 100644
index 000000000000..97e96fead08a
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c
@@ -0,0 +1,280 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-224 and SHA-256 Secure Hash Algorithm.
+ *
+ * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
+ * Based on crypto/sha256_generic.c, which is:
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * SHA224 Support Copyright 2007 Intel Corporation <jonathan.lynch@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/mm.h>
+#include <crypto/sha.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
+
+#include "octeon-crypto.h"
+
+/*
+ * We pass everything as 64-bit. OCTEON can handle misaligned data.
+ */
+
+static void octeon_sha256_store_hash(struct sha256_state *sctx)
+{
+ u64 *hash = (u64 *)sctx->state;
+
+ write_octeon_64bit_hash_dword(hash[0], 0);
+ write_octeon_64bit_hash_dword(hash[1], 1);
+ write_octeon_64bit_hash_dword(hash[2], 2);
+ write_octeon_64bit_hash_dword(hash[3], 3);
+}
+
+static void octeon_sha256_read_hash(struct sha256_state *sctx)
+{
+ u64 *hash = (u64 *)sctx->state;
+
+ hash[0] = read_octeon_64bit_hash_dword(0);
+ hash[1] = read_octeon_64bit_hash_dword(1);
+ hash[2] = read_octeon_64bit_hash_dword(2);
+ hash[3] = read_octeon_64bit_hash_dword(3);
+}
+
+static void octeon_sha256_transform(const void *_block)
+{
+ const u64 *block = _block;
+
+ write_octeon_64bit_block_dword(block[0], 0);
+ write_octeon_64bit_block_dword(block[1], 1);
+ write_octeon_64bit_block_dword(block[2], 2);
+ write_octeon_64bit_block_dword(block[3], 3);
+ write_octeon_64bit_block_dword(block[4], 4);
+ write_octeon_64bit_block_dword(block[5], 5);
+ write_octeon_64bit_block_dword(block[6], 6);
+ octeon_sha256_start(block[7]);
+}
+
+static int octeon_sha224_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA224_H0;
+ sctx->state[1] = SHA224_H1;
+ sctx->state[2] = SHA224_H2;
+ sctx->state[3] = SHA224_H3;
+ sctx->state[4] = SHA224_H4;
+ sctx->state[5] = SHA224_H5;
+ sctx->state[6] = SHA224_H6;
+ sctx->state[7] = SHA224_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static int octeon_sha256_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA256_H0;
+ sctx->state[1] = SHA256_H1;
+ sctx->state[2] = SHA256_H2;
+ sctx->state[3] = SHA256_H3;
+ sctx->state[4] = SHA256_H4;
+ sctx->state[5] = SHA256_H5;
+ sctx->state[6] = SHA256_H6;
+ sctx->state[7] = SHA256_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static void __octeon_sha256_update(struct sha256_state *sctx, const u8 *data,
+ unsigned int len)
+{
+ unsigned int partial;
+ unsigned int done;
+ const u8 *src;
+
+ partial = sctx->count % SHA256_BLOCK_SIZE;
+ sctx->count += len;
+ done = 0;
+ src = data;
+
+ if ((partial + len) >= SHA256_BLOCK_SIZE) {
+ if (partial) {
+ done = -partial;
+ memcpy(sctx->buf + partial, data,
+ done + SHA256_BLOCK_SIZE);
+ src = sctx->buf;
+ }
+
+ do {
+ octeon_sha256_transform(src);
+ done += SHA256_BLOCK_SIZE;
+ src = data + done;
+ } while (done + SHA256_BLOCK_SIZE <= len);
+
+ partial = 0;
+ }
+ memcpy(sctx->buf + partial, src, len - done);
+}
+
+static int octeon_sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct octeon_cop2_state state;
+ unsigned long flags;
+
+ /*
+ * Small updates never reach the crypto engine, so the generic sha256 is
+ * faster because of the heavyweight octeon_crypto_enable() /
+ * octeon_crypto_disable().
+ */
+ if ((sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+ return crypto_sha256_update(desc, data, len);
+
+ flags = octeon_crypto_enable(&state);
+ octeon_sha256_store_hash(sctx);
+
+ __octeon_sha256_update(sctx, data, len);
+
+ octeon_sha256_read_hash(sctx);
+ octeon_crypto_disable(&state, flags);
+
+ return 0;
+}
+
+static int octeon_sha256_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ static const u8 padding[64] = { 0x80, };
+ struct octeon_cop2_state state;
+ __be32 *dst = (__be32 *)out;
+ unsigned int pad_len;
+ unsigned long flags;
+ unsigned int index;
+ __be64 bits;
+ int i;
+
+ /* Save number of bits. */
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64. */
+ index = sctx->count & 0x3f;
+ pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+
+ flags = octeon_crypto_enable(&state);
+ octeon_sha256_store_hash(sctx);
+
+ __octeon_sha256_update(sctx, padding, pad_len);
+
+ /* Append length (before padding). */
+ __octeon_sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+ octeon_sha256_read_hash(sctx);
+ octeon_crypto_disable(&state, flags);
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Zeroize sensitive information. */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
+
+static int octeon_sha224_final(struct shash_desc *desc, u8 *hash)
+{
+ u8 D[SHA256_DIGEST_SIZE];
+
+ octeon_sha256_final(desc, D);
+
+ memcpy(hash, D, SHA224_DIGEST_SIZE);
+ memzero_explicit(D, SHA256_DIGEST_SIZE);
+
+ return 0;
+}
+
+static int octeon_sha256_export(struct shash_desc *desc, void *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+ return 0;
+}
+
+static int octeon_sha256_import(struct shash_desc *desc, const void *in)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+ return 0;
+}
+
+static struct shash_alg octeon_sha256_algs[2] = { {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = octeon_sha256_init,
+ .update = octeon_sha256_update,
+ .final = octeon_sha256_final,
+ .export = octeon_sha256_export,
+ .import = octeon_sha256_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name= "octeon-sha256",
+ .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = octeon_sha224_init,
+ .update = octeon_sha256_update,
+ .final = octeon_sha224_final,
+ .descsize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name= "octeon-sha224",
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init octeon_sha256_mod_init(void)
+{
+ if (!octeon_has_crypto())
+ return -ENOTSUPP;
+ return crypto_register_shashes(octeon_sha256_algs,
+ ARRAY_SIZE(octeon_sha256_algs));
+}
+
+static void __exit octeon_sha256_mod_fini(void)
+{
+ crypto_unregister_shashes(octeon_sha256_algs,
+ ARRAY_SIZE(octeon_sha256_algs));
+}
+
+module_init(octeon_sha256_mod_init);
+module_exit(octeon_sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm (OCTEON)");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
new file mode 100644
index 000000000000..d5fb3c6f22ae
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c
@@ -0,0 +1,277 @@
+/*
+ * Cryptographic API.
+ *
+ * SHA-512 and SHA-384 Secure Hash Algorithm.
+ *
+ * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
+ * Based on crypto/sha512_generic.c, which is:
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2003 Kyle McMartin <kyle@debian.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ */
+
+#include <linux/mm.h>
+#include <crypto/sha.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <asm/byteorder.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
+
+#include "octeon-crypto.h"
+
+/*
+ * We pass everything as 64-bit. OCTEON can handle misaligned data.
+ */
+
+static void octeon_sha512_store_hash(struct sha512_state *sctx)
+{
+ write_octeon_64bit_hash_sha512(sctx->state[0], 0);
+ write_octeon_64bit_hash_sha512(sctx->state[1], 1);
+ write_octeon_64bit_hash_sha512(sctx->state[2], 2);
+ write_octeon_64bit_hash_sha512(sctx->state[3], 3);
+ write_octeon_64bit_hash_sha512(sctx->state[4], 4);
+ write_octeon_64bit_hash_sha512(sctx->state[5], 5);
+ write_octeon_64bit_hash_sha512(sctx->state[6], 6);
+ write_octeon_64bit_hash_sha512(sctx->state[7], 7);
+}
+
+static void octeon_sha512_read_hash(struct sha512_state *sctx)
+{
+ sctx->state[0] = read_octeon_64bit_hash_sha512(0);
+ sctx->state[1] = read_octeon_64bit_hash_sha512(1);
+ sctx->state[2] = read_octeon_64bit_hash_sha512(2);
+ sctx->state[3] = read_octeon_64bit_hash_sha512(3);
+ sctx->state[4] = read_octeon_64bit_hash_sha512(4);
+ sctx->state[5] = read_octeon_64bit_hash_sha512(5);
+ sctx->state[6] = read_octeon_64bit_hash_sha512(6);
+ sctx->state[7] = read_octeon_64bit_hash_sha512(7);
+}
+
+static void octeon_sha512_transform(const void *_block)
+{
+ const u64 *block = _block;
+
+ write_octeon_64bit_block_sha512(block[0], 0);
+ write_octeon_64bit_block_sha512(block[1], 1);
+ write_octeon_64bit_block_sha512(block[2], 2);
+ write_octeon_64bit_block_sha512(block[3], 3);
+ write_octeon_64bit_block_sha512(block[4], 4);
+ write_octeon_64bit_block_sha512(block[5], 5);
+ write_octeon_64bit_block_sha512(block[6], 6);
+ write_octeon_64bit_block_sha512(block[7], 7);
+ write_octeon_64bit_block_sha512(block[8], 8);
+ write_octeon_64bit_block_sha512(block[9], 9);
+ write_octeon_64bit_block_sha512(block[10], 10);
+ write_octeon_64bit_block_sha512(block[11], 11);
+ write_octeon_64bit_block_sha512(block[12], 12);
+ write_octeon_64bit_block_sha512(block[13], 13);
+ write_octeon_64bit_block_sha512(block[14], 14);
+ octeon_sha512_start(block[15]);
+}
+
+static int octeon_sha512_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA512_H0;
+ sctx->state[1] = SHA512_H1;
+ sctx->state[2] = SHA512_H2;
+ sctx->state[3] = SHA512_H3;
+ sctx->state[4] = SHA512_H4;
+ sctx->state[5] = SHA512_H5;
+ sctx->state[6] = SHA512_H6;
+ sctx->state[7] = SHA512_H7;
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static int octeon_sha384_init(struct shash_desc *desc)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA384_H0;
+ sctx->state[1] = SHA384_H1;
+ sctx->state[2] = SHA384_H2;
+ sctx->state[3] = SHA384_H3;
+ sctx->state[4] = SHA384_H4;
+ sctx->state[5] = SHA384_H5;
+ sctx->state[6] = SHA384_H6;
+ sctx->state[7] = SHA384_H7;
+ sctx->count[0] = sctx->count[1] = 0;
+
+ return 0;
+}
+
+static void __octeon_sha512_update(struct sha512_state *sctx, const u8 *data,
+ unsigned int len)
+{
+ unsigned int part_len;
+ unsigned int index;
+ unsigned int i;
+
+ /* Compute number of bytes mod 128. */
+ index = sctx->count[0] % SHA512_BLOCK_SIZE;
+
+ /* Update number of bytes. */
+ if ((sctx->count[0] += len) < len)
+ sctx->count[1]++;
+
+ part_len = SHA512_BLOCK_SIZE - index;
+
+ /* Transform as many times as possible. */
+ if (len >= part_len) {
+ memcpy(&sctx->buf[index], data, part_len);
+ octeon_sha512_transform(sctx->buf);
+
+ for (i = part_len; i + SHA512_BLOCK_SIZE <= len;
+ i += SHA512_BLOCK_SIZE)
+ octeon_sha512_transform(&data[i]);
+
+ index = 0;
+ } else {
+ i = 0;
+ }
+
+ /* Buffer remaining input. */
+ memcpy(&sctx->buf[index], &data[i], len - i);
+}
+
+static int octeon_sha512_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ struct octeon_cop2_state state;
+ unsigned long flags;
+
+ /*
+ * Small updates never reach the crypto engine, so the generic sha512 is
+ * faster because of the heavyweight octeon_crypto_enable() /
+ * octeon_crypto_disable().
+ */
+ if ((sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
+ return crypto_sha512_update(desc, data, len);
+
+ flags = octeon_crypto_enable(&state);
+ octeon_sha512_store_hash(sctx);
+
+ __octeon_sha512_update(sctx, data, len);
+
+ octeon_sha512_read_hash(sctx);
+ octeon_crypto_disable(&state, flags);
+
+ return 0;
+}
+
+static int octeon_sha512_final(struct shash_desc *desc, u8 *hash)
+{
+ struct sha512_state *sctx = shash_desc_ctx(desc);
+ static u8 padding[128] = { 0x80, };
+ struct octeon_cop2_state state;
+ __be64 *dst = (__be64 *)hash;
+ unsigned int pad_len;
+ unsigned long flags;
+ unsigned int index;
+ __be64 bits[2];
+ int i;
+
+ /* Save number of bits. */
+ bits[1] = cpu_to_be64(sctx->count[0] << 3);
+ bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
+
+ /* Pad out to 112 mod 128. */
+ index = sctx->count[0] & 0x7f;
+ pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
+
+ flags = octeon_crypto_enable(&state);
+ octeon_sha512_store_hash(sctx);
+
+ __octeon_sha512_update(sctx, padding, pad_len);
+
+ /* Append length (before padding). */
+ __octeon_sha512_update(sctx, (const u8 *)bits, sizeof(bits));
+
+ octeon_sha512_read_hash(sctx);
+ octeon_crypto_disable(&state, flags);
+
+ /* Store state in digest. */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be64(sctx->state[i]);
+
+ /* Zeroize sensitive information. */
+ memset(sctx, 0, sizeof(struct sha512_state));
+
+ return 0;
+}
+
+static int octeon_sha384_final(struct shash_desc *desc, u8 *hash)
+{
+ u8 D[64];
+
+ octeon_sha512_final(desc, D);
+
+ memcpy(hash, D, 48);
+ memzero_explicit(D, 64);
+
+ return 0;
+}
+
+static struct shash_alg octeon_sha512_algs[2] = { {
+ .digestsize = SHA512_DIGEST_SIZE,
+ .init = octeon_sha512_init,
+ .update = octeon_sha512_update,
+ .final = octeon_sha512_final,
+ .descsize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha512",
+ .cra_driver_name= "octeon-sha512",
+ .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA512_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA384_DIGEST_SIZE,
+ .init = octeon_sha384_init,
+ .update = octeon_sha512_update,
+ .final = octeon_sha384_final,
+ .descsize = sizeof(struct sha512_state),
+ .base = {
+ .cra_name = "sha384",
+ .cra_driver_name= "octeon-sha384",
+ .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA384_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init octeon_sha512_mod_init(void)
+{
+ if (!octeon_has_crypto())
+ return -ENOTSUPP;
+ return crypto_register_shashes(octeon_sha512_algs,
+ ARRAY_SIZE(octeon_sha512_algs));
+}
+
+static void __exit octeon_sha512_mod_fini(void)
+{
+ crypto_unregister_shashes(octeon_sha512_algs,
+ ARRAY_SIZE(octeon_sha512_algs));
+}
+
+module_init(octeon_sha512_mod_init);
+module_exit(octeon_sha512_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms (OCTEON)");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
index 4794067cb5a7..5035f09c5427 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
@@ -1259,20 +1259,6 @@
#define M2M_DSTID_REG(x) ((x) * 0x40 + 0x18)
/*************************************************************************
- * _REG relative to RSET_RNG
- *************************************************************************/
-
-#define RNG_CTRL 0x00
-#define RNG_EN (1 << 0)
-
-#define RNG_STAT 0x04
-#define RNG_AVAIL_MASK (0xff000000)
-
-#define RNG_DATA 0x08
-#define RNG_THRES 0x0c
-#define RNG_MASK 0x10
-
-/*************************************************************************
* _REG relative to RSET_SPI
*************************************************************************/
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 2926fb9c570a..9c221b69c181 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -4,6 +4,14 @@
# Arch-specific CryptoAPI modules.
#
+obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
+obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
+obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) += sha1-ppc-spe.o
+obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) += sha256-ppc-spe.o
+aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
+md5-ppc-y := md5-asm.o md5-glue.o
sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
+sha1-ppc-spe-y := sha1-spe-asm.o sha1-spe-glue.o
+sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
new file mode 100644
index 000000000000..5dc6bce90a77
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-core.S
@@ -0,0 +1,351 @@
+/*
+ * Fast AES implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#define EAD(in, bpos) \
+ rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
+
+#define DAD(in, bpos) \
+ rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
+
+#define LWH(out, off) \
+ evlwwsplat out,off(rT0); /* load word high */
+
+#define LWL(out, off) \
+ lwz out,off(rT0); /* load word low */
+
+#define LBZ(out, tab, off) \
+ lbz out,off(tab); /* load byte */
+
+#define LAH(out, in, bpos, off) \
+ EAD(in, bpos) /* calc addr + load word high */ \
+ LWH(out, off)
+
+#define LAL(out, in, bpos, off) \
+ EAD(in, bpos) /* calc addr + load word low */ \
+ LWL(out, off)
+
+#define LAE(out, in, bpos) \
+ EAD(in, bpos) /* calc addr + load enc byte */ \
+ LBZ(out, rT0, 8)
+
+#define LBE(out) \
+ LBZ(out, rT0, 8) /* load enc byte */
+
+#define LAD(out, in, bpos) \
+ DAD(in, bpos) /* calc addr + load dec byte */ \
+ LBZ(out, rT1, 0)
+
+#define LBD(out) \
+ LBZ(out, rT1, 0)
+
+/*
+ * ppc_encrypt_block: The central encryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the ouput registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_encrypt_block)
+ LAH(rW4, rD1, 2, 4)
+ LAH(rW6, rD0, 3, 0)
+ LAH(rW3, rD0, 1, 8)
+ppc_encrypt_block_loop:
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,16(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 2)
+ evxor rW3,rW3,rW5
+ LWH(rW4, 4)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,32(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,40(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 2)
+ evxor rW3,rW3,rW5
+ LWH(rW4, 4)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ addi rKP,rKP,32
+ bdnz ppc_encrypt_block_loop
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,16(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW5
+ LBE(rW2)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 1)
+ evxor rD3,rD3,rW3
+ LBE(rW6)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 0)
+ evmergehi rD2,rD2,rD3
+ LBE(rW1)
+ LAE(rW0, rD3, 0)
+ LAE(rW1, rD0, 0)
+ LAE(rW4, rD2, 1)
+ LAE(rW5, rD3, 1)
+ LAE(rW3, rD2, 0)
+ LAE(rW7, rD1, 1)
+ rlwimi rW0,rW4,8,16,23
+ rlwimi rW1,rW5,8,16,23
+ LAE(rW4, rD1, 2)
+ LAE(rW5, rD2, 2)
+ rlwimi rW2,rW6,8,16,23
+ rlwimi rW3,rW7,8,16,23
+ LAE(rW6, rD3, 2)
+ LAE(rW7, rD0, 2)
+ rlwimi rW0,rW4,16,8,15
+ rlwimi rW1,rW5,16,8,15
+ LAE(rW4, rD0, 3)
+ LAE(rW5, rD1, 3)
+ rlwimi rW2,rW6,16,8,15
+ lwz rD0,32(rKP)
+ rlwimi rW3,rW7,16,8,15
+ lwz rD1,36(rKP)
+ LAE(rW6, rD2, 3)
+ LAE(rW7, rD3, 3)
+ rlwimi rW0,rW4,24,0,7
+ lwz rD2,40(rKP)
+ rlwimi rW1,rW5,24,0,7
+ lwz rD3,44(rKP)
+ rlwimi rW2,rW6,24,0,7
+ rlwimi rW3,rW7,24,0,7
+ blr
+
+/*
+ * ppc_decrypt_block: The central decryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the ouput registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_decrypt_block)
+ LAH(rW0, rD1, 0, 12)
+ LAH(rW6, rD0, 3, 0)
+ LAH(rW3, rD0, 1, 8)
+ppc_decrypt_block_loop:
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,16(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LWH(rW0, 12)
+ evxor rW3,rW3,rW1
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW5
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,32(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,40(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LWH(rW0, 12)
+ evxor rW3,rW3,rW1
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW5
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ addi rKP,rKP,32
+ bdnz ppc_decrypt_block_loop
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,16(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ DAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LBD(rW0)
+ evxor rW3,rW3,rW1
+ DAD(rD0, 1)
+ evxor rD3,rD3,rW3
+ LBD(rW6)
+ evxor rD3,rD3,rW5
+ DAD(rD0, 0)
+ evmergehi rD2,rD2,rD3
+ LBD(rW3)
+ LAD(rW2, rD3, 0)
+ LAD(rW1, rD2, 0)
+ LAD(rW4, rD2, 1)
+ LAD(rW5, rD3, 1)
+ LAD(rW7, rD1, 1)
+ rlwimi rW0,rW4,8,16,23
+ rlwimi rW1,rW5,8,16,23
+ LAD(rW4, rD3, 2)
+ LAD(rW5, rD0, 2)
+ rlwimi rW2,rW6,8,16,23
+ rlwimi rW3,rW7,8,16,23
+ LAD(rW6, rD1, 2)
+ LAD(rW7, rD2, 2)
+ rlwimi rW0,rW4,16,8,15
+ rlwimi rW1,rW5,16,8,15
+ LAD(rW4, rD0, 3)
+ LAD(rW5, rD1, 3)
+ rlwimi rW2,rW6,16,8,15
+ lwz rD0,32(rKP)
+ rlwimi rW3,rW7,16,8,15
+ lwz rD1,36(rKP)
+ LAD(rW6, rD2, 3)
+ LAD(rW7, rD3, 3)
+ rlwimi rW0,rW4,24,0,7
+ lwz rD2,40(rKP)
+ rlwimi rW1,rW5,24,0,7
+ lwz rD3,44(rKP)
+ rlwimi rW2,rW6,24,0,7
+ rlwimi rW3,rW7,24,0,7
+ blr
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
new file mode 100644
index 000000000000..bd5e63f72ad4
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -0,0 +1,512 @@
+/*
+ * Glue code for AES implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/aes.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <crypto/algapi.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). e500 cores can issue two
+ * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
+ * bit unit (SU2). One of these can be a memory access that is executed via
+ * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
+ * 16 byte block block or 25 cycles per byte. Thus 768 bytes of input data
+ * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
+ * included. Even with the low end model clocked at 667 MHz this equals to a
+ * critical time window of less than 30us. The value has been choosen to
+ * process a 512 byte disk block in one or a large 1400 bytes IPsec network
+ * packet in two runs.
+ *
+ */
+#define MAX_BYTES 768
+
+struct ppc_aes_ctx {
+ u32 key_enc[AES_MAX_KEYLENGTH_U32];
+ u32 key_dec[AES_MAX_KEYLENGTH_U32];
+ u32 rounds;
+};
+
+struct ppc_xts_ctx {
+ u32 key_enc[AES_MAX_KEYLENGTH_U32];
+ u32 key_dec[AES_MAX_KEYLENGTH_U32];
+ u32 key_twk[AES_MAX_KEYLENGTH_U32];
+ u32 rounds;
+};
+
+extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
+extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
+extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes);
+extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+ u32 bytes);
+extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes, u8 *iv);
+extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+ u32 bytes, u8 *iv);
+extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes, u8 *iv);
+extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes, u8 *iv, u32 *key_twk);
+extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+ u32 bytes, u8 *iv, u32 *key_twk);
+
+extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
+
+extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
+ unsigned int key_len);
+
+static void spe_begin(void)
+{
+ /* disable preemption and save users SPE registers if required */
+ preempt_disable();
+ enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+ /* reenable preemption */
+ preempt_enable();
+}
+
+static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (key_len != AES_KEYSIZE_128 &&
+ key_len != AES_KEYSIZE_192 &&
+ key_len != AES_KEYSIZE_256) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ switch (key_len) {
+ case AES_KEYSIZE_128:
+ ctx->rounds = 4;
+ ppc_expand_key_128(ctx->key_enc, in_key);
+ break;
+ case AES_KEYSIZE_192:
+ ctx->rounds = 5;
+ ppc_expand_key_192(ctx->key_enc, in_key);
+ break;
+ case AES_KEYSIZE_256:
+ ctx->rounds = 6;
+ ppc_expand_key_256(ctx->key_enc, in_key);
+ break;
+ }
+
+ ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+ return 0;
+}
+
+static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ key_len >>= 1;
+
+ if (key_len != AES_KEYSIZE_128 &&
+ key_len != AES_KEYSIZE_192 &&
+ key_len != AES_KEYSIZE_256) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
+ switch (key_len) {
+ case AES_KEYSIZE_128:
+ ctx->rounds = 4;
+ ppc_expand_key_128(ctx->key_enc, in_key);
+ ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
+ break;
+ case AES_KEYSIZE_192:
+ ctx->rounds = 5;
+ ppc_expand_key_192(ctx->key_enc, in_key);
+ ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
+ break;
+ case AES_KEYSIZE_256:
+ ctx->rounds = 6;
+ ppc_expand_key_256(ctx->key_enc, in_key);
+ ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
+ break;
+ }
+
+ ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+ return 0;
+}
+
+static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ spe_begin();
+ ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
+ spe_end();
+}
+
+static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ spe_begin();
+ ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
+ spe_end();
+}
+
+static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int ubytes;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ ubytes = nbytes > MAX_BYTES ?
+ nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+ nbytes -= ubytes;
+
+ spe_begin();
+ ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes);
+ spe_end();
+
+ err = blkcipher_walk_done(desc, &walk, ubytes);
+ }
+
+ return err;
+}
+
+static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int ubytes;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ ubytes = nbytes > MAX_BYTES ?
+ nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+ nbytes -= ubytes;
+
+ spe_begin();
+ ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes);
+ spe_end();
+
+ err = blkcipher_walk_done(desc, &walk, ubytes);
+ }
+
+ return err;
+}
+
+static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int ubytes;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ ubytes = nbytes > MAX_BYTES ?
+ nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+ nbytes -= ubytes;
+
+ spe_begin();
+ ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+ spe_end();
+
+ err = blkcipher_walk_done(desc, &walk, ubytes);
+ }
+
+ return err;
+}
+
+static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int ubytes;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+
+ while ((nbytes = walk.nbytes)) {
+ ubytes = nbytes > MAX_BYTES ?
+ nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+ nbytes -= ubytes;
+
+ spe_begin();
+ ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes, walk.iv);
+ spe_end();
+
+ err = blkcipher_walk_done(desc, &walk, ubytes);
+ }
+
+ return err;
+}
+
+static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int pbytes, ubytes;
+ int err;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+
+ while ((pbytes = walk.nbytes)) {
+ pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
+ pbytes = pbytes == nbytes ?
+ nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
+ ubytes = walk.nbytes - pbytes;
+
+ spe_begin();
+ ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, pbytes , walk.iv);
+ spe_end();
+
+ nbytes -= pbytes;
+ err = blkcipher_walk_done(desc, &walk, ubytes);
+ }
+
+ return err;
+}
+
+static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int ubytes;
+ int err;
+ u32 *twk;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+ twk = ctx->key_twk;
+
+ while ((nbytes = walk.nbytes)) {
+ ubytes = nbytes > MAX_BYTES ?
+ nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+ nbytes -= ubytes;
+
+ spe_begin();
+ ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
+ spe_end();
+
+ twk = NULL;
+ err = blkcipher_walk_done(desc, &walk, ubytes);
+ }
+
+ return err;
+}
+
+static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct blkcipher_walk walk;
+ unsigned int ubytes;
+ int err;
+ u32 *twk;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ blkcipher_walk_init(&walk, dst, src, nbytes);
+ err = blkcipher_walk_virt(desc, &walk);
+ twk = ctx->key_twk;
+
+ while ((nbytes = walk.nbytes)) {
+ ubytes = nbytes > MAX_BYTES ?
+ nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
+ nbytes -= ubytes;
+
+ spe_begin();
+ ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
+ spe_end();
+
+ twk = NULL;
+ err = blkcipher_walk_done(desc, &walk, ubytes);
+ }
+
+ return err;
+}
+
+/*
+ * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
+ * because the e500 platform can handle unaligned reads/writes very efficently.
+ * This improves IPsec thoughput by another few percent. Additionally we assume
+ * that AES context is always aligned to at least 8 bytes because it is created
+ * with kmalloc() in the crypto infrastructure
+ *
+ */
+static struct crypto_alg aes_algs[] = { {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = ppc_aes_setkey,
+ .cia_encrypt = ppc_aes_encrypt,
+ .cia_decrypt = ppc_aes_decrypt
+ }
+ }
+}, {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey,
+ .encrypt = ppc_ecb_encrypt,
+ .decrypt = ppc_ecb_decrypt,
+ }
+ }
+}, {
+ .cra_name = "cbc(aes)",
+ .cra_driver_name = "cbc-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey,
+ .encrypt = ppc_cbc_encrypt,
+ .decrypt = ppc_cbc_decrypt,
+ }
+ }
+}, {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey,
+ .encrypt = ppc_ctr_crypt,
+ .decrypt = ppc_ctr_crypt,
+ }
+ }
+}, {
+ .cra_name = "xts(aes)",
+ .cra_driver_name = "xts-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ppc_xts_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = AES_MIN_KEY_SIZE * 2,
+ .max_keysize = AES_MAX_KEY_SIZE * 2,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_xts_setkey,
+ .encrypt = ppc_xts_encrypt,
+ .decrypt = ppc_xts_decrypt,
+ }
+ }
+} };
+
+static int __init ppc_aes_mod_init(void)
+{
+ return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+static void __exit ppc_aes_mod_fini(void)
+{
+ crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+}
+
+module_init(ppc_aes_mod_init);
+module_exit(ppc_aes_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("aes-ppc-spe");
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S
new file mode 100644
index 000000000000..be8090f3d700
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-keys.S
@@ -0,0 +1,283 @@
+/*
+ * Key handling functions for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_KEY(d, s, off) \
+ lwz d,off(s);
+#else
+#define LOAD_KEY(d, s, off) \
+ li r0,off; \
+ lwbrx d,s,r0;
+#endif
+
+#define INITIALIZE_KEY \
+ stwu r1,-32(r1); /* create stack frame */ \
+ stw r14,8(r1); /* save registers */ \
+ stw r15,12(r1); \
+ stw r16,16(r1);
+
+#define FINALIZE_KEY \
+ lwz r14,8(r1); /* restore registers */ \
+ lwz r15,12(r1); \
+ lwz r16,16(r1); \
+ xor r5,r5,r5; /* clear sensitive data */ \
+ xor r6,r6,r6; \
+ xor r7,r7,r7; \
+ xor r8,r8,r8; \
+ xor r9,r9,r9; \
+ xor r10,r10,r10; \
+ xor r11,r11,r11; \
+ xor r12,r12,r12; \
+ addi r1,r1,32; /* cleanup stack */
+
+#define LS_BOX(r, t1, t2) \
+ lis t2,PPC_AES_4K_ENCTAB@h; \
+ ori t2,t2,PPC_AES_4K_ENCTAB@l; \
+ rlwimi t2,r,4,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,0,24,31; \
+ rlwimi t2,r,28,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,8,16,23; \
+ rlwimi t2,r,20,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,16,8,15; \
+ rlwimi t2,r,12,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,24,0,7;
+
+#define GF8_MUL(out, in, t1, t2) \
+ lis t1,0x8080; /* multiplication in GF8 */ \
+ ori t1,t1,0x8080; \
+ and t1,t1,in; \
+ srwi t1,t1,7; \
+ mulli t1,t1,0x1b; \
+ lis t2,0x7f7f; \
+ ori t2,t2,0x7f7f; \
+ and t2,t2,in; \
+ slwi t2,t2,1; \
+ xor out,t1,t2;
+
+/*
+ * ppc_expand_key_128(u32 *key_enc, const u8 *key)
+ *
+ * Expand 128 bit key into 176 bytes encryption key. It consists of
+ * key itself plus 10 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_128)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ stw r5,0(r3) /* key[0..3] = input data */
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ li r16,10 /* 10 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_128_loop:
+ addi r3,r3,16
+ mr r14,r8 /* apply LS_BOX to 4th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor next 4 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ stw r5,0(r3) /* store next 4 keys */
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */
+ subi r16,r16,1
+ cmpwi r16,0
+ bt eq,ppc_expand_128_end
+ b ppc_expand_128_loop
+ppc_expand_128_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_expand_key_192(u32 *key_enc, const u8 *key)
+ *
+ * Expand 192 bit key into 208 bytes encryption key. It consists of key
+ * itself plus 12 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_192)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ LOAD_KEY(r9,r4,16)
+ LOAD_KEY(r10,r4,20)
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ stw r9,16(r3)
+ stw r10,20(r3)
+ li r16,8 /* 8 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_192_loop:
+ addi r3,r3,24
+ mr r14,r10 /* apply LS_BOX to 6th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor next 6 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ xor r9,r9,r8
+ xor r10,r10,r9
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ subi r16,r16,1
+ cmpwi r16,0 /* last round early kick out */
+ bt eq,ppc_expand_192_end
+ stw r9,16(r3)
+ stw r10,20(r3)
+ GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */
+ b ppc_expand_192_loop
+ppc_expand_192_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_expand_key_256(u32 *key_enc, const u8 *key)
+ *
+ * Expand 256 bit key into 240 bytes encryption key. It consists of key
+ * itself plus 14 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_256)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ LOAD_KEY(r9,r4,16)
+ LOAD_KEY(r10,r4,20)
+ LOAD_KEY(r11,r4,24)
+ LOAD_KEY(r12,r4,28)
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ stw r9,16(r3)
+ stw r10,20(r3)
+ stw r11,24(r3)
+ stw r12,28(r3)
+ li r16,7 /* 7 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_256_loop:
+ addi r3,r3,32
+ mr r14,r12 /* apply LS_BOX to 8th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor 4 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ mr r14,r8
+ LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */
+ xor r9,r9,r14 /* xor 4 keys */
+ xor r10,r10,r9
+ xor r11,r11,r10
+ xor r12,r12,r11
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ subi r16,r16,1
+ cmpwi r16,0 /* last round early kick out */
+ bt eq,ppc_expand_256_end
+ stw r9,16(r3)
+ stw r10,20(r3)
+ stw r11,24(r3)
+ stw r12,28(r3)
+ GF8_MUL(r0, r0, r4, r14)
+ b ppc_expand_256_loop
+ppc_expand_256_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_generate_decrypt_key: derive decryption key from encryption key
+ * number of bytes to handle are calculated from length of key (16/24/32)
+ *
+ */
+_GLOBAL(ppc_generate_decrypt_key)
+ addi r6,r5,24
+ slwi r6,r6,2
+ lwzx r7,r4,r6 /* first/last 4 words are same */
+ stw r7,0(r3)
+ lwz r7,0(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,4(r3)
+ lwz r7,4(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,8(r3)
+ lwz r7,8(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,12(r3)
+ lwz r7,12(r4)
+ stwx r7,r3,r6
+ addi r3,r3,16
+ add r4,r4,r6
+ subi r4,r4,28
+ addi r5,r5,20
+ srwi r5,r5,2
+ppc_generate_decrypt_block:
+ li r6,4
+ mtctr r6
+ppc_generate_decrypt_word:
+ lwz r6,0(r4)
+ GF8_MUL(r7, r6, r0, r7)
+ GF8_MUL(r8, r7, r0, r8)
+ GF8_MUL(r9, r8, r0, r9)
+ xor r10,r9,r6
+ xor r11,r7,r8
+ xor r11,r11,r9
+ xor r12,r7,r10
+ rotrwi r12,r12,24
+ xor r11,r11,r12
+ xor r12,r8,r10
+ rotrwi r12,r12,16
+ xor r11,r11,r12
+ rotrwi r12,r10,8
+ xor r11,r11,r12
+ stw r11,0(r3)
+ addi r3,r3,4
+ addi r4,r4,4
+ bdnz ppc_generate_decrypt_word
+ subi r4,r4,32
+ subi r5,r5,1
+ cmpwi r5,0
+ bt gt,ppc_generate_decrypt_block
+ blr
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S
new file mode 100644
index 000000000000..ad48032ca8e0
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-modes.S
@@ -0,0 +1,630 @@
+/*
+ * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
+
+#define LOAD_DATA(reg, off) \
+ lwz reg,off(rSP); /* load with offset */
+#define SAVE_DATA(reg, off) \
+ stw reg,off(rDP); /* save with offset */
+#define NEXT_BLOCK \
+ addi rSP,rSP,16; /* increment pointers per bloc */ \
+ addi rDP,rDP,16;
+#define LOAD_IV(reg, off) \
+ lwz reg,off(rIP); /* IV loading with offset */
+#define SAVE_IV(reg, off) \
+ stw reg,off(rIP); /* IV saving with offset */
+#define START_IV /* nothing to reset */
+#define CBC_DEC 16 /* CBC decrement per block */
+#define CTR_DEC 1 /* CTR decrement one byte */
+
+#else /* Macros for little endian */
+
+#define LOAD_DATA(reg, off) \
+ lwbrx reg,0,rSP; /* load reversed */ \
+ addi rSP,rSP,4; /* and increment pointer */
+#define SAVE_DATA(reg, off) \
+ stwbrx reg,0,rDP; /* save reversed */ \
+ addi rDP,rDP,4; /* and increment pointer */
+#define NEXT_BLOCK /* nothing todo */
+#define LOAD_IV(reg, off) \
+ lwbrx reg,0,rIP; /* load reversed */ \
+ addi rIP,rIP,4; /* and increment pointer */
+#define SAVE_IV(reg, off) \
+ stwbrx reg,0,rIP; /* load reversed */ \
+ addi rIP,rIP,4; /* and increment pointer */
+#define START_IV \
+ subi rIP,rIP,16; /* must reset pointer */
+#define CBC_DEC 32 /* 2 blocks because of incs */
+#define CTR_DEC 17 /* 1 block because of incs */
+
+#endif
+
+#define SAVE_0_REGS
+#define LOAD_0_REGS
+
+#define SAVE_4_REGS \
+ stw rI0,96(r1); /* save 32 bit registers */ \
+ stw rI1,100(r1); \
+ stw rI2,104(r1); \
+ stw rI3,108(r1);
+
+#define LOAD_4_REGS \
+ lwz rI0,96(r1); /* restore 32 bit registers */ \
+ lwz rI1,100(r1); \
+ lwz rI2,104(r1); \
+ lwz rI3,108(r1);
+
+#define SAVE_8_REGS \
+ SAVE_4_REGS \
+ stw rG0,112(r1); /* save 32 bit registers */ \
+ stw rG1,116(r1); \
+ stw rG2,120(r1); \
+ stw rG3,124(r1);
+
+#define LOAD_8_REGS \
+ LOAD_4_REGS \
+ lwz rG0,112(r1); /* restore 32 bit registers */ \
+ lwz rG1,116(r1); \
+ lwz rG2,120(r1); \
+ lwz rG3,124(r1);
+
+#define INITIALIZE_CRYPT(tab,nr32bitregs) \
+ mflr r0; \
+ stwu r1,-160(r1); /* create stack frame */ \
+ lis rT0,tab@h; /* en-/decryption table pointer */ \
+ stw r0,8(r1); /* save link register */ \
+ ori rT0,rT0,tab@l; \
+ evstdw r14,16(r1); \
+ mr rKS,rKP; \
+ evstdw r15,24(r1); /* We must save non volatile */ \
+ evstdw r16,32(r1); /* registers. Take the chance */ \
+ evstdw r17,40(r1); /* and save the SPE part too */ \
+ evstdw r18,48(r1); \
+ evstdw r19,56(r1); \
+ evstdw r20,64(r1); \
+ evstdw r21,72(r1); \
+ evstdw r22,80(r1); \
+ evstdw r23,88(r1); \
+ SAVE_##nr32bitregs##_REGS
+
+#define FINALIZE_CRYPT(nr32bitregs) \
+ lwz r0,8(r1); \
+ evldw r14,16(r1); /* restore SPE registers */ \
+ evldw r15,24(r1); \
+ evldw r16,32(r1); \
+ evldw r17,40(r1); \
+ evldw r18,48(r1); \
+ evldw r19,56(r1); \
+ evldw r20,64(r1); \
+ evldw r21,72(r1); \
+ evldw r22,80(r1); \
+ evldw r23,88(r1); \
+ LOAD_##nr32bitregs##_REGS \
+ mtlr r0; /* restore link register */ \
+ xor r0,r0,r0; \
+ stw r0,16(r1); /* delete sensitive data */ \
+ stw r0,24(r1); /* that we might have pushed */ \
+ stw r0,32(r1); /* from other context that runs */ \
+ stw r0,40(r1); /* the same code */ \
+ stw r0,48(r1); \
+ stw r0,56(r1); \
+ stw r0,64(r1); \
+ stw r0,72(r1); \
+ stw r0,80(r1); \
+ stw r0,88(r1); \
+ addi r1,r1,160; /* cleanup stack frame */
+
+#define ENDIAN_SWAP(t0, t1, s0, s1) \
+ rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
+ rotrwi t1,s1,8; \
+ rlwimi t0,s0,8,8,15; \
+ rlwimi t1,s1,8,8,15; \
+ rlwimi t0,s0,8,24,31; \
+ rlwimi t1,s1,8,24,31;
+
+#define GF128_MUL(d0, d1, d2, d3, t0) \
+ li t0,0x87; /* multiplication in GF128 */ \
+ cmpwi d3,-1; \
+ iselgt t0,0,t0; \
+ rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
+ rotlwi d3,d3,1; \
+ rlwimi d2,d1,0,0,0; \
+ rotlwi d2,d2,1; \
+ rlwimi d1,d0,0,0,0; \
+ slwi d0,d0,1; /* shift left 128 bit */ \
+ rotlwi d1,d1,1; \
+ xor d0,d0,t0;
+
+#define START_KEY(d0, d1, d2, d3) \
+ lwz rW0,0(rKP); \
+ mtctr rRR; \
+ lwz rW1,4(rKP); \
+ lwz rW2,8(rKP); \
+ lwz rW3,12(rKP); \
+ xor rD0,d0,rW0; \
+ xor rD1,d1,rW1; \
+ xor rD2,d2,rW2; \
+ xor rD3,d3,rW3;
+
+/*
+ * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds)
+ *
+ * called from glue layer to encrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_aes)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+ LOAD_DATA(rD0, 0)
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds)
+ *
+ * called from glue layer to decrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_aes)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
+ LOAD_DATA(rD0, 0)
+ addi rT1,rT0,4096
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes);
+ *
+ * called from glue layer to encrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_ecb)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+ppc_encrypt_ecb_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_ecb_loop
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 bytes);
+ *
+ * called from glue layer to decrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_ecb)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
+ addi rT1,rT0,4096
+ppc_decrypt_ecb_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_decrypt_ecb_loop
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
+ * 32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt multiple blocks via CBC
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_cbc)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ LOAD_IV(rI3, 12)
+ppc_encrypt_cbc_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ SAVE_DATA(rI0, 0)
+ xor rI1,rD1,rW1
+ SAVE_DATA(rI1, 4)
+ xor rI2,rD2,rW2
+ SAVE_DATA(rI2, 8)
+ xor rI3,rD3,rW3
+ SAVE_DATA(rI3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_cbc_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to decrypt multiple blocks via CBC
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_cbc)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
+ li rT1,15
+ LOAD_IV(rI0, 0)
+ andc rLN,rLN,rT1
+ LOAD_IV(rI1, 4)
+ subi rLN,rLN,16
+ LOAD_IV(rI2, 8)
+ add rSP,rSP,rLN /* reverse processing */
+ LOAD_IV(rI3, 12)
+ add rDP,rDP,rLN
+ LOAD_DATA(rD0, 0)
+ addi rT1,rT0,4096
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_IV
+ SAVE_IV(rD0, 0)
+ SAVE_IV(rD1, 4)
+ SAVE_IV(rD2, 8)
+ cmpwi rLN,16
+ SAVE_IV(rD3, 12)
+ bt lt,ppc_decrypt_cbc_end
+ppc_decrypt_cbc_loop:
+ mr rKP,rKS
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ subi rLN,rLN,16
+ subi rSP,rSP,CBC_DEC
+ xor rW0,rD0,rW0
+ LOAD_DATA(rD0, 0)
+ xor rW1,rD1,rW1
+ LOAD_DATA(rD1, 4)
+ xor rW2,rD2,rW2
+ LOAD_DATA(rD2, 8)
+ xor rW3,rD3,rW3
+ LOAD_DATA(rD3, 12)
+ xor rW0,rW0,rD0
+ SAVE_DATA(rW0, 0)
+ xor rW1,rW1,rD1
+ SAVE_DATA(rW1, 4)
+ xor rW2,rW2,rD2
+ SAVE_DATA(rW2, 8)
+ xor rW3,rW3,rD3
+ SAVE_DATA(rW3, 12)
+ cmpwi rLN,15
+ subi rDP,rDP,CBC_DEC
+ bt gt,ppc_decrypt_cbc_loop
+ppc_decrypt_cbc_end:
+ mr rKP,rKS
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rW0,rW0,rD0
+ xor rW1,rW1,rD1
+ xor rW2,rW2,rD2
+ xor rW3,rW3,rD3
+ xor rW0,rW0,rI0 /* decrypt with initial IV */
+ SAVE_DATA(rW0, 0)
+ xor rW1,rW1,rI1
+ SAVE_DATA(rW1, 4)
+ xor rW2,rW2,rI2
+ SAVE_DATA(rW2, 8)
+ xor rW3,rW3,rI3
+ SAVE_DATA(rW3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt/decrypt multiple blocks
+ * via CTR. Number of bytes does not need to be a multiple of
+ * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_crypt_ctr)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rLN,16
+ LOAD_IV(rI3, 12)
+ START_IV
+ bt lt,ppc_crypt_ctr_partial
+ppc_crypt_ctr_loop:
+ mr rKP,rKS
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rW0,rD0,rW0
+ xor rW1,rD1,rW1
+ xor rW2,rD2,rW2
+ xor rW3,rD3,rW3
+ LOAD_DATA(rD0, 0)
+ subi rLN,rLN,16
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ addic rI3,rI3,1 /* increase counter */
+ addze rI2,rI2
+ addze rI1,rI1
+ addze rI0,rI0
+ NEXT_BLOCK
+ cmpwi rLN,15
+ bt gt,ppc_crypt_ctr_loop
+ppc_crypt_ctr_partial:
+ cmpwi rLN,0
+ bt eq,ppc_crypt_ctr_end
+ mr rKP,rKS
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rW0,rD0,rW0
+ SAVE_IV(rW0, 0)
+ xor rW1,rD1,rW1
+ SAVE_IV(rW1, 4)
+ xor rW2,rD2,rW2
+ SAVE_IV(rW2, 8)
+ xor rW3,rD3,rW3
+ SAVE_IV(rW3, 12)
+ mtctr rLN
+ subi rIP,rIP,CTR_DEC
+ subi rSP,rSP,1
+ subi rDP,rDP,1
+ppc_crypt_ctr_xorbyte:
+ lbzu rW4,1(rIP) /* bytewise xor for partial block */
+ lbzu rW5,1(rSP)
+ xor rW4,rW4,rW5
+ stbu rW4,1(rDP)
+ bdnz ppc_crypt_ctr_xorbyte
+ subf rIP,rLN,rIP
+ addi rIP,rIP,1
+ addic rI3,rI3,1
+ addze rI2,rI2
+ addze rI1,rI1
+ addze rI0,rI0
+ppc_crypt_ctr_end:
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to encrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_xts)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rKT,0
+ LOAD_IV(rI3, 12)
+ bt eq,ppc_encrypt_xts_notweak
+ mr rKP,rKT
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ xor rI1,rD1,rW1
+ xor rI2,rD2,rW2
+ xor rI3,rD3,rW3
+ppc_encrypt_xts_notweak:
+ ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+ ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_encrypt_xts_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ xor rD1,rD1,rW1
+ xor rD2,rD2,rW2
+ xor rD3,rD3,rW3
+ xor rD0,rD0,rI0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rI1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rI2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rI3
+ SAVE_DATA(rD3, 12)
+ GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+ ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+ ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+ cmpwi rLN,0
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_xts_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(8)
+ blr
+
+/*
+ * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to decrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_xts)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
+ LOAD_IV(rI0, 0)
+ addi rT1,rT0,4096
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rKT,0
+ LOAD_IV(rI3, 12)
+ bt eq,ppc_decrypt_xts_notweak
+ subi rT0,rT0,4096
+ mr rKP,rKT
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ xor rI1,rD1,rW1
+ xor rI2,rD2,rW2
+ xor rI3,rD3,rW3
+ addi rT0,rT0,4096
+ppc_decrypt_xts_notweak:
+ ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+ ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_decrypt_xts_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ xor rD1,rD1,rW1
+ xor rD2,rD2,rW2
+ xor rD3,rD3,rW3
+ xor rD0,rD0,rI0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rI1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rI2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rI3
+ SAVE_DATA(rD3, 12)
+ GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+ ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+ ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+ cmpwi rLN,0
+ NEXT_BLOCK
+ bt gt,ppc_decrypt_xts_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(8)
+ blr
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h
new file mode 100644
index 000000000000..30d217b399c3
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-regs.h
@@ -0,0 +1,42 @@
+/*
+ * Common registers for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#define rKS r0 /* copy of en-/decryption key pointer */
+#define rDP r3 /* destination pointer */
+#define rSP r4 /* source pointer */
+#define rKP r5 /* pointer to en-/decryption key pointer */
+#define rRR r6 /* en-/decryption rounds */
+#define rLN r7 /* length of data to be processed */
+#define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */
+#define rKT r9 /* pointer to tweak key (XTS mode) */
+#define rT0 r11 /* pointers to en-/decrpytion tables */
+#define rT1 r10
+#define rD0 r9 /* data */
+#define rD1 r14
+#define rD2 r12
+#define rD3 r15
+#define rW0 r16 /* working registers */
+#define rW1 r17
+#define rW2 r18
+#define rW3 r19
+#define rW4 r20
+#define rW5 r21
+#define rW6 r22
+#define rW7 r23
+#define rI0 r24 /* IV */
+#define rI1 r25
+#define rI2 r26
+#define rI3 r27
+#define rG0 r28 /* endian reversed tweak (XTS mode) */
+#define rG1 r29
+#define rG2 r30
+#define rG3 r31
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S
new file mode 100644
index 000000000000..701e60240dc3
--- /dev/null
+++ b/arch/powerpc/crypto/aes-tab-4k.S
@@ -0,0 +1,331 @@
+/*
+ * 4K AES tables for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+/*
+ * These big endian AES encryption/decryption tables have been taken from
+ * crypto/aes_generic.c and are designed to be simply accessed by a combination
+ * of rlwimi/lwz instructions with a minimum of table registers (usually only
+ * one required). Thus they are aligned to 4K. The locality of rotated values
+ * is derived from the reduced offsets that are available in the SPE load
+ * instructions. E.g. evldw, evlwwsplat, ...
+ *
+ * For the safety-conscious it has to be noted that they might be vulnerable
+ * to cache timing attacks because of their size. Nevertheless in contrast to
+ * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
+ * This is a quite good tradeoff for low power devices (e.g. routers) without
+ * dedicated encryption hardware where we usually have no multiuser
+ * environment.
+ *
+ */
+
+#define R(a, b, c, d) \
+ 0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
+
+.data
+.align 12
+.globl PPC_AES_4K_ENCTAB
+PPC_AES_4K_ENCTAB:
+/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
+ .long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
+ .long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
+ .long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
+ .long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
+ .long R(60, 30, 30, 50), R(02, 01, 01, 03)
+ .long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
+ .long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
+ .long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
+ .long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
+ .long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
+ .long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
+ .long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
+ .long R(41, ad, ad, ec), R(b3, d4, d4, 67)
+ .long R(5f, a2, a2, fd), R(45, af, af, ea)
+ .long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
+ .long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
+ .long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
+ .long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
+ .long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
+ .long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
+ .long R(68, 34, 34, 5c), R(51, a5, a5, f4)
+ .long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
+ .long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
+ .long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
+ .long R(08, 04, 04, 0c), R(95, c7, c7, 52)
+ .long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
+ .long R(30, 18, 18, 28), R(37, 96, 96, a1)
+ .long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
+ .long R(0e, 07, 07, 09), R(24, 12, 12, 36)
+ .long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
+ .long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
+ .long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
+ .long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
+ .long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
+ .long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
+ .long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
+ .long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
+ .long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
+ .long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
+ .long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
+ .long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
+ .long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
+ .long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
+ .long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
+ .long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
+ .long R(67, be, be, d9), R(72, 39, 39, 4b)
+ .long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
+ .long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
+ .long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
+ .long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
+ .long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
+ .long R(66, 33, 33, 55), R(11, 85, 85, 94)
+ .long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
+ .long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
+ .long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
+ .long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
+ .long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
+ .long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
+ .long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
+ .long R(70, 38, 38, 48), R(f1, f5, f5, 04)
+ .long R(63, bc, bc, df), R(77, b6, b6, c1)
+ .long R(af, da, da, 75), R(42, 21, 21, 63)
+ .long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
+ .long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
+ .long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
+ .long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
+ .long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
+ .long R(88, 44, 44, cc), R(2e, 17, 17, 39)
+ .long R(93, c4, c4, 57), R(55, a7, a7, f2)
+ .long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
+ .long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
+ .long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
+ .long R(c0, 60, 60, a0), R(19, 81, 81, 98)
+ .long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
+ .long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
+ .long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
+ .long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
+ .long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
+ .long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
+ .long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
+ .long R(db, e0, e0, 3b), R(64, 32, 32, 56)
+ .long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
+ .long R(92, 49, 49, db), R(0c, 06, 06, 0a)
+ .long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
+ .long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
+ .long R(43, ac, ac, ef), R(c4, 62, 62, a6)
+ .long R(39, 91, 91, a8), R(31, 95, 95, a4)
+ .long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
+ .long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
+ .long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
+ .long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
+ .long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
+ .long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
+ .long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
+ .long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
+ .long R(47, ae, ae, e9), R(10, 08, 08, 18)
+ .long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
+ .long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
+ .long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
+ .long R(73, b4, b4, c7), R(97, c6, c6, 51)
+ .long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
+ .long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
+ .long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
+ .long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
+ .long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
+ .long R(71, b5, b5, c4), R(cc, 66, 66, aa)
+ .long R(90, 48, 48, d8), R(06, 03, 03, 05)
+ .long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
+ .long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
+ .long R(ae, 57, 57, f9), R(69, b9, b9, d0)
+ .long R(17, 86, 86, 91), R(99, c1, c1, 58)
+ .long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
+ .long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
+ .long R(2b, 98, 98, b3), R(22, 11, 11, 33)
+ .long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
+ .long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
+ .long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
+ .long R(15, 87, 87, 92), R(c9, e9, e9, 20)
+ .long R(87, ce, ce, 49), R(aa, 55, 55, ff)
+ .long R(50, 28, 28, 78), R(a5, df, df, 7a)
+ .long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
+ .long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
+ .long R(65, bf, bf, da), R(d7, e6, e6, 31)
+ .long R(84, 42, 42, c6), R(d0, 68, 68, b8)
+ .long R(82, 41, 41, c3), R(29, 99, 99, b0)
+ .long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
+ .long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
+ .long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
+.globl PPC_AES_4K_DECTAB
+PPC_AES_4K_DECTAB:
+/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
+ .long R(51, f4, a7, 50), R(7e, 41, 65, 53)
+ .long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
+ .long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
+ .long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
+ .long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
+ .long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
+ .long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
+ .long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
+ .long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
+ .long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
+ .long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
+ .long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
+ .long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
+ .long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
+ .long R(d4, be, 83, 2d), R(58, 74, 21, d3)
+ .long R(49, e0, 69, 29), R(8e, c9, c8, 44)
+ .long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
+ .long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
+ .long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
+ .long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
+ .long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
+ .long R(97, 51, 33, 60), R(62, 53, 7f, 45)
+ .long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
+ .long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
+ .long R(70, 48, 68, 58), R(8f, 45, fd, 19)
+ .long R(94, de, 6c, 87), R(52, 7b, f8, b7)
+ .long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
+ .long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
+ .long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
+ .long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
+ .long R(30, 28, 87, f2), R(23, bf, a5, b2)
+ .long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
+ .long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
+ .long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
+ .long R(65, da, f4, cd), R(06, 05, be, d5)
+ .long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
+ .long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
+ .long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
+ .long R(0b, 83, ec, 39), R(40, 60, ef, aa)
+ .long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
+ .long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
+ .long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
+ .long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
+ .long R(04, 06, d4, 6f), R(60, 50, 15, ff)
+ .long R(19, 98, fb, 24), R(d6, bd, e9, 97)
+ .long R(89, 40, 43, cc), R(67, d9, 9e, 77)
+ .long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
+ .long R(e7, 19, 5b, 38), R(79, c8, ee, db)
+ .long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
+ .long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
+ .long R(09, 80, 86, 83), R(32, 2b, ed, 48)
+ .long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
+ .long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
+ .long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
+ .long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
+ .long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
+ .long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
+ .long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
+ .long R(80, c0, c5, 4f), R(61, dc, 20, a2)
+ .long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
+ .long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
+ .long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
+ .long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
+ .long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
+ .long R(57, f1, 19, 85), R(af, 75, 07, 4c)
+ .long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
+ .long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
+ .long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
+ .long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
+ .long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
+ .long R(d7, 31, dc, ca), R(42, 63, 85, 10)
+ .long R(13, 97, 22, 40), R(84, c6, 11, 20)
+ .long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
+ .long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
+ .long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
+ .long R(0d, 86, 52, ec), R(77, c1, e3, d0)
+ .long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
+ .long R(11, 94, 48, fa), R(47, e9, 64, 22)
+ .long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
+ .long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
+ .long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
+ .long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
+ .long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
+ .long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
+ .long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
+ .long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
+ .long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
+ .long R(2e, 39, f7, 5e), R(82, c3, af, f5)
+ .long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
+ .long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
+ .long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
+ .long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
+ .long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
+ .long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
+ .long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
+ .long R(21, bc, cf, 08), R(ef, 15, e8, e6)
+ .long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
+ .long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
+ .long R(31, a4, b2, af), R(2a, 3f, 23, 31)
+ .long R(c6, a5, 94, 30), R(35, a2, 66, c0)
+ .long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
+ .long R(e0, 90, d0, b0), R(33, a7, d8, 15)
+ .long R(f1, 04, 98, 4a), R(41, ec, da, f7)
+ .long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
+ .long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
+ .long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
+ .long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
+ .long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
+ .long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
+ .long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
+ .long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
+ .long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
+ .long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
+ .long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
+ .long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
+ .long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
+ .long R(9c, d2, df, 59), R(55, f2, 73, 3f)
+ .long R(18, 14, ce, 79), R(73, c7, 37, bf)
+ .long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
+ .long R(df, 3d, 6f, 14), R(78, 44, db, 86)
+ .long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
+ .long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
+ .long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
+ .long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
+ .long R(39, a8, 01, 71), R(08, 0c, b3, de)
+ .long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
+ .long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
+ .long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
+.globl PPC_AES_4K_DECTAB2
+PPC_AES_4K_DECTAB2:
+/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
new file mode 100644
index 000000000000..10cdf5bceebb
--- /dev/null
+++ b/arch/powerpc/crypto/md5-asm.S
@@ -0,0 +1,243 @@
+/*
+ * Fast MD5 implementation for PPC
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP r3
+#define rWP r4
+
+#define rH0 r0
+#define rH1 r6
+#define rH2 r7
+#define rH3 r5
+
+#define rW00 r8
+#define rW01 r9
+#define rW02 r10
+#define rW03 r11
+#define rW04 r12
+#define rW05 r14
+#define rW06 r15
+#define rW07 r16
+#define rW08 r17
+#define rW09 r18
+#define rW10 r19
+#define rW11 r20
+#define rW12 r21
+#define rW13 r22
+#define rW14 r23
+#define rW15 r24
+
+#define rT0 r25
+#define rT1 r26
+
+#define INITIALIZE \
+ PPC_STLU r1,-INT_FRAME_SIZE(r1); \
+ SAVE_8GPRS(14, r1); /* push registers onto stack */ \
+ SAVE_4GPRS(22, r1); \
+ SAVE_GPR(26, r1)
+
+#define FINALIZE \
+ REST_8GPRS(14, r1); /* pop registers from stack */ \
+ REST_4GPRS(22, r1); \
+ REST_GPR(26, r1); \
+ addi r1,r1,INT_FRAME_SIZE;
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+ lwbrx reg,0,rWP; /* load data */
+#define INC_PTR \
+ addi rWP,rWP,4; /* increment per word */
+#define NEXT_BLOCK /* nothing to do */
+#else
+#define LOAD_DATA(reg, off) \
+ lwz reg,off(rWP); /* load data */
+#define INC_PTR /* nothing to do */
+#define NEXT_BLOCK \
+ addi rWP,rWP,64; /* increment per block */
+#endif
+
+#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
+ LOAD_DATA(w0, off) /* W */ \
+ and rT0,b,c; /* 1: f = b and c */ \
+ INC_PTR /* ptr++ */ \
+ andc rT1,d,b; /* 1: f' = ~b and d */ \
+ LOAD_DATA(w1, off+4) /* W */ \
+ or rT0,rT0,rT1; /* 1: f = f or f' */ \
+ addi w0,w0,k0l; /* 1: wk = w + k */ \
+ add a,a,rT0; /* 1: a = a + f */ \
+ addis w0,w0,k0h; /* 1: wk = w + k' */ \
+ addis w1,w1,k1h; /* 2: wk = w + k */ \
+ add a,a,w0; /* 1: a = a + wk */ \
+ addi w1,w1,k1l; /* 2: wk = w + k' */ \
+ rotrwi a,a,p; /* 1: a = a rotl x */ \
+ add d,d,w1; /* 2: a = a + wk */ \
+ add a,a,b; /* 1: a = a + b */ \
+ and rT0,a,b; /* 2: f = b and c */ \
+ andc rT1,c,a; /* 2: f' = ~b and d */ \
+ or rT0,rT0,rT1; /* 2: f = f or f' */ \
+ add d,d,rT0; /* 2: a = a + f */ \
+ INC_PTR /* ptr++ */ \
+ rotrwi d,d,q; /* 2: a = a rotl x */ \
+ add d,d,a; /* 2: a = a + b */
+
+#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+ andc rT0,c,d; /* 1: f = c and ~d */ \
+ and rT1,b,d; /* 1: f' = b and d */ \
+ addi w0,w0,k0l; /* 1: wk = w + k */ \
+ or rT0,rT0,rT1; /* 1: f = f or f' */ \
+ addis w0,w0,k0h; /* 1: wk = w + k' */ \
+ add a,a,rT0; /* 1: a = a + f */ \
+ addi w1,w1,k1l; /* 2: wk = w + k */ \
+ add a,a,w0; /* 1: a = a + wk */ \
+ addis w1,w1,k1h; /* 2: wk = w + k' */ \
+ andc rT0,b,c; /* 2: f = c and ~d */ \
+ rotrwi a,a,p; /* 1: a = a rotl x */ \
+ add a,a,b; /* 1: a = a + b */ \
+ add d,d,w1; /* 2: a = a + wk */ \
+ and rT1,a,c; /* 2: f' = b and d */ \
+ or rT0,rT0,rT1; /* 2: f = f or f' */ \
+ add d,d,rT0; /* 2: a = a + f */ \
+ rotrwi d,d,q; /* 2: a = a rotl x */ \
+ add d,d,a; /* 2: a = a +b */
+
+#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+ xor rT0,b,c; /* 1: f' = b xor c */ \
+ addi w0,w0,k0l; /* 1: wk = w + k */ \
+ xor rT1,rT0,d; /* 1: f = f xor f' */ \
+ addis w0,w0,k0h; /* 1: wk = w + k' */ \
+ add a,a,rT1; /* 1: a = a + f */ \
+ addi w1,w1,k1l; /* 2: wk = w + k */ \
+ add a,a,w0; /* 1: a = a + wk */ \
+ addis w1,w1,k1h; /* 2: wk = w + k' */ \
+ rotrwi a,a,p; /* 1: a = a rotl x */ \
+ add d,d,w1; /* 2: a = a + wk */ \
+ add a,a,b; /* 1: a = a + b */ \
+ xor rT1,rT0,a; /* 2: f = b xor f' */ \
+ add d,d,rT1; /* 2: a = a + f */ \
+ rotrwi d,d,q; /* 2: a = a rotl x */ \
+ add d,d,a; /* 2: a = a + b */
+
+#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
+ addi w0,w0,k0l; /* 1: w = w + k */ \
+ orc rT0,b,d; /* 1: f = b or ~d */ \
+ addis w0,w0,k0h; /* 1: w = w + k' */ \
+ xor rT0,rT0,c; /* 1: f = f xor c */ \
+ add a,a,w0; /* 1: a = a + wk */ \
+ addi w1,w1,k1l; /* 2: w = w + k */ \
+ add a,a,rT0; /* 1: a = a + f */ \
+ addis w1,w1,k1h; /* 2: w = w + k' */ \
+ rotrwi a,a,p; /* 1: a = a rotl x */ \
+ add a,a,b; /* 1: a = a + b */ \
+ orc rT0,a,c; /* 2: f = b or ~d */ \
+ add d,d,w1; /* 2: a = a + wk */ \
+ xor rT0,rT0,b; /* 2: f = f xor c */ \
+ add d,d,rT0; /* 2: a = a + f */ \
+ rotrwi d,d,q; /* 2: a = a rotl x */ \
+ add d,d,a; /* 2: a = a + b */
+
+_GLOBAL(ppc_md5_transform)
+ INITIALIZE
+
+ mtctr r5
+ lwz rH0,0(rHP)
+ lwz rH1,4(rHP)
+ lwz rH2,8(rHP)
+ lwz rH3,12(rHP)
+
+ppc_md5_main:
+ R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
+ 0xd76b, -23432, 0xe8c8, -18602)
+ R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
+ 0x2420, 0x70db, 0xc1be, -12562)
+ R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
+ 0xf57c, 0x0faf, 0x4788, -14806)
+ R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
+ 0xa830, 0x4613, 0xfd47, -27391)
+ R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
+ 0x6981, -26408, 0x8b45, -2129)
+ R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
+ 0xffff, 0x5bb1, 0x895d, -10306)
+ R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
+ 0x6b90, 0x1122, 0xfd98, 0x7193)
+ R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
+ 0xa679, 0x438e, 0x49b4, 0x0821)
+
+ R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
+ 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
+ R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
+ 0x9d02, -32109, 0x124c, 0x2332)
+ R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
+ 0x8ea7, 0x4a33, 0x0245, -18270)
+ R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
+ 0x8eee, -8608, 0xf258, -5095)
+ R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
+ 0x969d, -10697, 0x1cbe, -15288)
+ R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
+ 0x3317, 0x3e99, 0xdbd9, 0x7c15)
+ R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
+ 0xac4b, 0x7772, 0xd8cf, 0x331d)
+ R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
+ 0x6a28, 0x6dd8, 0x219a, 0x3b68)
+
+ R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
+ 0x29cb, 0x28e5, 0x4218, -7788)
+ R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
+ 0x473f, 0x06d1, 0x3aae, 0x3036)
+ R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
+ 0xaea1, -15134, 0x640b, -11295)
+ R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
+ 0x8f4c, 0x4887, 0xbc7c, -22499)
+ R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
+ 0x7eb8, -27199, 0x00ea, 0x6050)
+ R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
+ 0xe01a, 0x22fe, 0x4447, 0x69c5)
+ R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
+ 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
+ R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
+ 0x4701, -27017, 0xc7bd, -19859)
+
+ R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
+ 0x0988, -1462, 0x4c70, -19401)
+ R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
+ 0xadaf, -5221, 0xfc99, 0x66f7)
+ R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
+ 0x7e80, -16418, 0xba1e, -25587)
+ R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
+ 0x4130, 0x380d, 0xe0c5, 0x738d)
+ lwz rW00,0(rHP)
+ R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
+ 0xe837, -30770, 0xde8a, 0x69e8)
+ lwz rW14,4(rHP)
+ R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
+ 0x9e79, 0x260f, 0x256d, -27941)
+ lwz rW12,8(rHP)
+ R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
+ 0xab75, -20775, 0x4f9e, -28397)
+ lwz rW10,12(rHP)
+ R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
+ 0x662b, 0x7c56, 0x11b2, 0x0358)
+
+ add rH0,rH0,rW00
+ stw rH0,0(rHP)
+ add rH1,rH1,rW14
+ stw rH1,4(rHP)
+ add rH2,rH2,rW12
+ stw rH2,8(rHP)
+ add rH3,rH3,rW10
+ stw rH3,12(rHP)
+ NEXT_BLOCK
+
+ bdnz ppc_md5_main
+
+ FINALIZE
+ blr
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
new file mode 100644
index 000000000000..452fb4dc575f
--- /dev/null
+++ b/arch/powerpc/crypto/md5-glue.c
@@ -0,0 +1,165 @@
+/*
+ * Glue code for MD5 implementation for PPC assembler
+ *
+ * Based on generic implementation.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/md5.h>
+#include <asm/byteorder.h>
+
+extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
+
+static inline void ppc_md5_clear_context(struct md5_state *sctx)
+{
+ int count = sizeof(struct md5_state) >> 2;
+ u32 *ptr = (u32 *)sctx;
+
+ /* make sure we can clear the fast way */
+ BUILD_BUG_ON(sizeof(struct md5_state) % 4);
+ do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_md5_init(struct shash_desc *desc)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+
+ sctx->hash[0] = 0x67452301;
+ sctx->hash[1] = 0xefcdab89;
+ sctx->hash[2] = 0x98badcfe;
+ sctx->hash[3] = 0x10325476;
+ sctx->byte_count = 0;
+
+ return 0;
+}
+
+static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+ const unsigned int offset = sctx->byte_count & 0x3f;
+ unsigned int avail = 64 - offset;
+ const u8 *src = data;
+
+ sctx->byte_count += len;
+
+ if (avail > len) {
+ memcpy((char *)sctx->block + offset, src, len);
+ return 0;
+ }
+
+ if (offset) {
+ memcpy((char *)sctx->block + offset, src, avail);
+ ppc_md5_transform(sctx->hash, (const u8 *)sctx->block, 1);
+ len -= avail;
+ src += avail;
+ }
+
+ if (len > 63) {
+ ppc_md5_transform(sctx->hash, src, len >> 6);
+ src += len & ~0x3f;
+ len &= 0x3f;
+ }
+
+ memcpy((char *)sctx->block, src, len);
+ return 0;
+}
+
+static int ppc_md5_final(struct shash_desc *desc, u8 *out)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+ const unsigned int offset = sctx->byte_count & 0x3f;
+ const u8 *src = (const u8 *)sctx->block;
+ u8 *p = (u8 *)src + offset;
+ int padlen = 55 - offset;
+ __le64 *pbits = (__le64 *)((char *)sctx->block + 56);
+ __le32 *dst = (__le32 *)out;
+
+ *p++ = 0x80;
+
+ if (padlen < 0) {
+ memset(p, 0x00, padlen + sizeof (u64));
+ ppc_md5_transform(sctx->hash, src, 1);
+ p = (char *)sctx->block;
+ padlen = 56;
+ }
+
+ memset(p, 0, padlen);
+ *pbits = cpu_to_le64(sctx->byte_count << 3);
+ ppc_md5_transform(sctx->hash, src, 1);
+
+ dst[0] = cpu_to_le32(sctx->hash[0]);
+ dst[1] = cpu_to_le32(sctx->hash[1]);
+ dst[2] = cpu_to_le32(sctx->hash[2]);
+ dst[3] = cpu_to_le32(sctx->hash[3]);
+
+ ppc_md5_clear_context(sctx);
+ return 0;
+}
+
+static int ppc_md5_export(struct shash_desc *desc, void *out)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+ return 0;
+}
+
+static int ppc_md5_import(struct shash_desc *desc, const void *in)
+{
+ struct md5_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = MD5_DIGEST_SIZE,
+ .init = ppc_md5_init,
+ .update = ppc_md5_update,
+ .final = ppc_md5_final,
+ .export = ppc_md5_export,
+ .import = ppc_md5_import,
+ .descsize = sizeof(struct md5_state),
+ .statesize = sizeof(struct md5_state),
+ .base = {
+ .cra_name = "md5",
+ .cra_driver_name= "md5-ppc",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init ppc_md5_mod_init(void)
+{
+ return crypto_register_shash(&alg);
+}
+
+static void __exit ppc_md5_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(ppc_md5_mod_init);
+module_exit(ppc_md5_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
+
+MODULE_ALIAS_CRYPTO("md5");
+MODULE_ALIAS_CRYPTO("md5-ppc");
diff --git a/arch/powerpc/crypto/sha1-spe-asm.S b/arch/powerpc/crypto/sha1-spe-asm.S
new file mode 100644
index 000000000000..fcb6cf002889
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-asm.S
@@ -0,0 +1,299 @@
+/*
+ * Fast SHA-1 implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP r3 /* pointer to hash value */
+#define rWP r4 /* pointer to input */
+#define rKP r5 /* pointer to constants */
+
+#define rW0 r14 /* 64 bit round words */
+#define rW1 r15
+#define rW2 r16
+#define rW3 r17
+#define rW4 r18
+#define rW5 r19
+#define rW6 r20
+#define rW7 r21
+
+#define rH0 r6 /* 32 bit hash values */
+#define rH1 r7
+#define rH2 r8
+#define rH3 r9
+#define rH4 r10
+
+#define rT0 r22 /* 64 bit temporary */
+#define rT1 r0 /* 32 bit temporaries */
+#define rT2 r11
+#define rT3 r12
+
+#define rK r23 /* 64 bit constant in volatile register */
+
+#define LOAD_K01
+
+#define LOAD_K11 \
+ evlwwsplat rK,0(rKP);
+
+#define LOAD_K21 \
+ evlwwsplat rK,4(rKP);
+
+#define LOAD_K31 \
+ evlwwsplat rK,8(rKP);
+
+#define LOAD_K41 \
+ evlwwsplat rK,12(rKP);
+
+#define INITIALIZE \
+ stwu r1,-128(r1); /* create stack frame */ \
+ evstdw r14,8(r1); /* We must save non volatile */ \
+ evstdw r15,16(r1); /* registers. Take the chance */ \
+ evstdw r16,24(r1); /* and save the SPE part too */ \
+ evstdw r17,32(r1); \
+ evstdw r18,40(r1); \
+ evstdw r19,48(r1); \
+ evstdw r20,56(r1); \
+ evstdw r21,64(r1); \
+ evstdw r22,72(r1); \
+ evstdw r23,80(r1);
+
+
+#define FINALIZE \
+ evldw r14,8(r1); /* restore SPE registers */ \
+ evldw r15,16(r1); \
+ evldw r16,24(r1); \
+ evldw r17,32(r1); \
+ evldw r18,40(r1); \
+ evldw r19,48(r1); \
+ evldw r20,56(r1); \
+ evldw r21,64(r1); \
+ evldw r22,72(r1); \
+ evldw r23,80(r1); \
+ xor r0,r0,r0; \
+ stw r0,8(r1); /* Delete sensitive data */ \
+ stw r0,16(r1); /* that we might have pushed */ \
+ stw r0,24(r1); /* from other context that runs */ \
+ stw r0,32(r1); /* the same code. Assume that */ \
+ stw r0,40(r1); /* the lower part of the GPRs */ \
+ stw r0,48(r1); /* were already overwritten on */ \
+ stw r0,56(r1); /* the way down to here */ \
+ stw r0,64(r1); \
+ stw r0,72(r1); \
+ stw r0,80(r1); \
+ addi r1,r1,128; /* cleanup stack frame */
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+ lwz reg,off(rWP); /* load data */
+#define NEXT_BLOCK \
+ addi rWP,rWP,64; /* increment per block */
+#else
+#define LOAD_DATA(reg, off) \
+ lwbrx reg,0,rWP; /* load data */ \
+ addi rWP,rWP,4; /* increment per word */
+#define NEXT_BLOCK /* nothing to do */
+#endif
+
+#define R_00_15(a, b, c, d, e, w0, w1, k, off) \
+ LOAD_DATA(w0, off) /* 1: W */ \
+ and rT2,b,c; /* 1: F' = B and C */ \
+ LOAD_K##k##1 \
+ andc rT1,d,b; /* 1: F" = ~B and D */ \
+ rotrwi rT0,a,27; /* 1: A' = A rotl 5 */ \
+ or rT2,rT2,rT1; /* 1: F = F' or F" */ \
+ add e,e,rT0; /* 1: E = E + A' */ \
+ rotrwi b,b,2; /* 1: B = B rotl 30 */ \
+ add e,e,w0; /* 1: E = E + W */ \
+ LOAD_DATA(w1, off+4) /* 2: W */ \
+ add e,e,rT2; /* 1: E = E + F */ \
+ and rT1,a,b; /* 2: F' = B and C */ \
+ add e,e,rK; /* 1: E = E + K */ \
+ andc rT2,c,a; /* 2: F" = ~B and D */ \
+ add d,d,rK; /* 2: E = E + K */ \
+ or rT2,rT2,rT1; /* 2: F = F' or F" */ \
+ rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
+ add d,d,w1; /* 2: E = E + W */ \
+ rotrwi a,a,2; /* 2: B = B rotl 30 */ \
+ add d,d,rT0; /* 2: E = E + A' */ \
+ evmergelo w1,w1,w0; /* mix W[0]/W[1] */ \
+ add d,d,rT2 /* 2: E = E + F */
+
+#define R_16_19(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+ and rT2,b,c; /* 1: F' = B and C */ \
+ evmergelohi rT0,w7,w6; /* W[-3] */ \
+ andc rT1,d,b; /* 1: F" = ~B and D */ \
+ evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
+ or rT1,rT1,rT2; /* 1: F = F' or F" */ \
+ evxor w0,w0,w4; /* W = W xor W[-8] */ \
+ add e,e,rT1; /* 1: E = E + F */ \
+ evxor w0,w0,w1; /* W = W xor W[-14] */ \
+ rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
+ evrlwi w0,w0,1; /* W = W rotl 1 */ \
+ add e,e,rT2; /* 1: E = E + A' */ \
+ evaddw rT0,w0,rK; /* WK = W + K */ \
+ rotrwi b,b,2; /* 1: B = B rotl 30 */ \
+ LOAD_K##k##1 \
+ evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
+ add e,e,rT0; /* 1: E = E + WK */ \
+ add d,d,rT1; /* 2: E = E + WK */ \
+ and rT2,a,b; /* 2: F' = B and C */ \
+ andc rT1,c,a; /* 2: F" = ~B and D */ \
+ rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
+ or rT1,rT1,rT2; /* 2: F = F' or F" */ \
+ add d,d,rT0; /* 2: E = E + A' */ \
+ rotrwi a,a,2; /* 2: B = B rotl 30 */ \
+ add d,d,rT1 /* 2: E = E + F */
+
+#define R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+ evmergelohi rT0,w7,w6; /* W[-3] */ \
+ xor rT2,b,c; /* 1: F' = B xor C */ \
+ evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
+ xor rT2,rT2,d; /* 1: F = F' xor D */ \
+ evxor w0,w0,w4; /* W = W xor W[-8] */ \
+ add e,e,rT2; /* 1: E = E + F */ \
+ evxor w0,w0,w1; /* W = W xor W[-14] */ \
+ rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
+ evrlwi w0,w0,1; /* W = W rotl 1 */ \
+ add e,e,rT2; /* 1: E = E + A' */ \
+ evaddw rT0,w0,rK; /* WK = W + K */ \
+ rotrwi b,b,2; /* 1: B = B rotl 30 */ \
+ LOAD_K##k##1 \
+ evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
+ add e,e,rT0; /* 1: E = E + WK */ \
+ xor rT2,a,b; /* 2: F' = B xor C */ \
+ add d,d,rT1; /* 2: E = E + WK */ \
+ xor rT2,rT2,c; /* 2: F = F' xor D */ \
+ rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
+ add d,d,rT2; /* 2: E = E + F */ \
+ rotrwi a,a,2; /* 2: B = B rotl 30 */ \
+ add d,d,rT0 /* 2: E = E + A' */
+
+#define R_40_59(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+ and rT2,b,c; /* 1: F' = B and C */ \
+ evmergelohi rT0,w7,w6; /* W[-3] */ \
+ or rT1,b,c; /* 1: F" = B or C */ \
+ evxor w0,w0,rT0; /* W = W[-16] xor W[-3] */ \
+ and rT1,d,rT1; /* 1: F" = F" and D */ \
+ evxor w0,w0,w4; /* W = W xor W[-8] */ \
+ or rT2,rT2,rT1; /* 1: F = F' or F" */ \
+ evxor w0,w0,w1; /* W = W xor W[-14] */ \
+ add e,e,rT2; /* 1: E = E + F */ \
+ evrlwi w0,w0,1; /* W = W rotl 1 */ \
+ rotrwi rT2,a,27; /* 1: A' = A rotl 5 */ \
+ evaddw rT0,w0,rK; /* WK = W + K */ \
+ add e,e,rT2; /* 1: E = E + A' */ \
+ LOAD_K##k##1 \
+ evmergehi rT1,rT1,rT0; /* WK1/WK2 */ \
+ rotrwi b,b,2; /* 1: B = B rotl 30 */ \
+ add e,e,rT0; /* 1: E = E + WK */ \
+ and rT2,a,b; /* 2: F' = B and C */ \
+ or rT0,a,b; /* 2: F" = B or C */ \
+ add d,d,rT1; /* 2: E = E + WK */ \
+ and rT0,c,rT0; /* 2: F" = F" and D */ \
+ rotrwi a,a,2; /* 2: B = B rotl 30 */ \
+ or rT2,rT2,rT0; /* 2: F = F' or F" */ \
+ rotrwi rT0,e,27; /* 2: A' = A rotl 5 */ \
+ add d,d,rT2; /* 2: E = E + F */ \
+ add d,d,rT0 /* 2: E = E + A' */
+
+#define R_60_79(a, b, c, d, e, w0, w1, w4, w6, w7, k) \
+ R_20_39(a, b, c, d, e, w0, w1, w4, w6, w7, k)
+
+_GLOBAL(ppc_spe_sha1_transform)
+ INITIALIZE
+
+ lwz rH0,0(rHP)
+ lwz rH1,4(rHP)
+ mtctr r5
+ lwz rH2,8(rHP)
+ lis rKP,PPC_SPE_SHA1_K@h
+ lwz rH3,12(rHP)
+ ori rKP,rKP,PPC_SPE_SHA1_K@l
+ lwz rH4,16(rHP)
+
+ppc_spe_sha1_main:
+ R_00_15(rH0, rH1, rH2, rH3, rH4, rW1, rW0, 1, 0)
+ R_00_15(rH3, rH4, rH0, rH1, rH2, rW2, rW1, 0, 8)
+ R_00_15(rH1, rH2, rH3, rH4, rH0, rW3, rW2, 0, 16)
+ R_00_15(rH4, rH0, rH1, rH2, rH3, rW4, rW3, 0, 24)
+ R_00_15(rH2, rH3, rH4, rH0, rH1, rW5, rW4, 0, 32)
+ R_00_15(rH0, rH1, rH2, rH3, rH4, rW6, rW5, 0, 40)
+ R_00_15(rH3, rH4, rH0, rH1, rH2, rT3, rW6, 0, 48)
+ R_00_15(rH1, rH2, rH3, rH4, rH0, rT3, rW7, 0, 56)
+
+ R_16_19(rH4, rH0, rH1, rH2, rH3, rW0, rW1, rW4, rW6, rW7, 0)
+ R_16_19(rH2, rH3, rH4, rH0, rH1, rW1, rW2, rW5, rW7, rW0, 2)
+
+ R_20_39(rH0, rH1, rH2, rH3, rH4, rW2, rW3, rW6, rW0, rW1, 0)
+ R_20_39(rH3, rH4, rH0, rH1, rH2, rW3, rW4, rW7, rW1, rW2, 0)
+ R_20_39(rH1, rH2, rH3, rH4, rH0, rW4, rW5, rW0, rW2, rW3, 0)
+ R_20_39(rH4, rH0, rH1, rH2, rH3, rW5, rW6, rW1, rW3, rW4, 0)
+ R_20_39(rH2, rH3, rH4, rH0, rH1, rW6, rW7, rW2, rW4, rW5, 0)
+ R_20_39(rH0, rH1, rH2, rH3, rH4, rW7, rW0, rW3, rW5, rW6, 0)
+ R_20_39(rH3, rH4, rH0, rH1, rH2, rW0, rW1, rW4, rW6, rW7, 0)
+ R_20_39(rH1, rH2, rH3, rH4, rH0, rW1, rW2, rW5, rW7, rW0, 0)
+ R_20_39(rH4, rH0, rH1, rH2, rH3, rW2, rW3, rW6, rW0, rW1, 0)
+ R_20_39(rH2, rH3, rH4, rH0, rH1, rW3, rW4, rW7, rW1, rW2, 3)
+
+ R_40_59(rH0, rH1, rH2, rH3, rH4, rW4, rW5, rW0, rW2, rW3, 0)
+ R_40_59(rH3, rH4, rH0, rH1, rH2, rW5, rW6, rW1, rW3, rW4, 0)
+ R_40_59(rH1, rH2, rH3, rH4, rH0, rW6, rW7, rW2, rW4, rW5, 0)
+ R_40_59(rH4, rH0, rH1, rH2, rH3, rW7, rW0, rW3, rW5, rW6, 0)
+ R_40_59(rH2, rH3, rH4, rH0, rH1, rW0, rW1, rW4, rW6, rW7, 0)
+ R_40_59(rH0, rH1, rH2, rH3, rH4, rW1, rW2, rW5, rW7, rW0, 0)
+ R_40_59(rH3, rH4, rH0, rH1, rH2, rW2, rW3, rW6, rW0, rW1, 0)
+ R_40_59(rH1, rH2, rH3, rH4, rH0, rW3, rW4, rW7, rW1, rW2, 0)
+ R_40_59(rH4, rH0, rH1, rH2, rH3, rW4, rW5, rW0, rW2, rW3, 0)
+ R_40_59(rH2, rH3, rH4, rH0, rH1, rW5, rW6, rW1, rW3, rW4, 4)
+
+ R_60_79(rH0, rH1, rH2, rH3, rH4, rW6, rW7, rW2, rW4, rW5, 0)
+ R_60_79(rH3, rH4, rH0, rH1, rH2, rW7, rW0, rW3, rW5, rW6, 0)
+ R_60_79(rH1, rH2, rH3, rH4, rH0, rW0, rW1, rW4, rW6, rW7, 0)
+ R_60_79(rH4, rH0, rH1, rH2, rH3, rW1, rW2, rW5, rW7, rW0, 0)
+ R_60_79(rH2, rH3, rH4, rH0, rH1, rW2, rW3, rW6, rW0, rW1, 0)
+ R_60_79(rH0, rH1, rH2, rH3, rH4, rW3, rW4, rW7, rW1, rW2, 0)
+ R_60_79(rH3, rH4, rH0, rH1, rH2, rW4, rW5, rW0, rW2, rW3, 0)
+ lwz rT3,0(rHP)
+ R_60_79(rH1, rH2, rH3, rH4, rH0, rW5, rW6, rW1, rW3, rW4, 0)
+ lwz rW1,4(rHP)
+ R_60_79(rH4, rH0, rH1, rH2, rH3, rW6, rW7, rW2, rW4, rW5, 0)
+ lwz rW2,8(rHP)
+ R_60_79(rH2, rH3, rH4, rH0, rH1, rW7, rW0, rW3, rW5, rW6, 0)
+ lwz rW3,12(rHP)
+ NEXT_BLOCK
+ lwz rW4,16(rHP)
+
+ add rH0,rH0,rT3
+ stw rH0,0(rHP)
+ add rH1,rH1,rW1
+ stw rH1,4(rHP)
+ add rH2,rH2,rW2
+ stw rH2,8(rHP)
+ add rH3,rH3,rW3
+ stw rH3,12(rHP)
+ add rH4,rH4,rW4
+ stw rH4,16(rHP)
+
+ bdnz ppc_spe_sha1_main
+
+ FINALIZE
+ blr
+
+.data
+.align 4
+PPC_SPE_SHA1_K:
+ .long 0x5A827999,0x6ED9EBA1,0x8F1BBCDC,0xCA62C1D6
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
new file mode 100644
index 000000000000..3e1d22212521
--- /dev/null
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -0,0 +1,210 @@
+/*
+ * Glue code for SHA-1 implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <linux/hardirq.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 2048
+
+extern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+ /* We just start SPE operations and will save SPE registers later. */
+ preempt_disable();
+ enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+ /* reenable preemption */
+ preempt_enable();
+}
+
+static inline void ppc_sha1_clear_context(struct sha1_state *sctx)
+{
+ int count = sizeof(struct sha1_state) >> 2;
+ u32 *ptr = (u32 *)sctx;
+
+ /* make sure we can clear the fast way */
+ BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
+ do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_spe_sha1_init(struct shash_desc *desc)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA1_H0;
+ sctx->state[1] = SHA1_H1;
+ sctx->state[2] = SHA1_H2;
+ sctx->state[3] = SHA1_H3;
+ sctx->state[4] = SHA1_H4;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ const unsigned int offset = sctx->count & 0x3f;
+ const unsigned int avail = 64 - offset;
+ unsigned int bytes;
+ const u8 *src = data;
+
+ if (avail > len) {
+ sctx->count += len;
+ memcpy((char *)sctx->buffer + offset, src, len);
+ return 0;
+ }
+
+ sctx->count += len;
+
+ if (offset) {
+ memcpy((char *)sctx->buffer + offset, src, avail);
+
+ spe_begin();
+ ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
+ spe_end();
+
+ len -= avail;
+ src += avail;
+ }
+
+ while (len > 63) {
+ bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
+ bytes = bytes & ~0x3f;
+
+ spe_begin();
+ ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
+ spe_end();
+
+ src += bytes;
+ len -= bytes;
+ };
+
+ memcpy((char *)sctx->buffer, src, len);
+ return 0;
+}
+
+static int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+ const unsigned int offset = sctx->count & 0x3f;
+ char *p = (char *)sctx->buffer + offset;
+ int padlen;
+ __be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
+ __be32 *dst = (__be32 *)out;
+
+ padlen = 55 - offset;
+ *p++ = 0x80;
+
+ spe_begin();
+
+ if (padlen < 0) {
+ memset(p, 0x00, padlen + sizeof (u64));
+ ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
+ p = (char *)sctx->buffer;
+ padlen = 56;
+ }
+
+ memset(p, 0, padlen);
+ *pbits = cpu_to_be64(sctx->count << 3);
+ ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
+
+ spe_end();
+
+ dst[0] = cpu_to_be32(sctx->state[0]);
+ dst[1] = cpu_to_be32(sctx->state[1]);
+ dst[2] = cpu_to_be32(sctx->state[2]);
+ dst[3] = cpu_to_be32(sctx->state[3]);
+ dst[4] = cpu_to_be32(sctx->state[4]);
+
+ ppc_sha1_clear_context(sctx);
+ return 0;
+}
+
+static int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+ return 0;
+}
+
+static int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
+{
+ struct sha1_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = SHA1_DIGEST_SIZE,
+ .init = ppc_spe_sha1_init,
+ .update = ppc_spe_sha1_update,
+ .final = ppc_spe_sha1_final,
+ .export = ppc_spe_sha1_export,
+ .import = ppc_spe_sha1_import,
+ .descsize = sizeof(struct sha1_state),
+ .statesize = sizeof(struct sha1_state),
+ .base = {
+ .cra_name = "sha1",
+ .cra_driver_name= "sha1-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA1_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init ppc_spe_sha1_mod_init(void)
+{
+ return crypto_register_shash(&alg);
+}
+
+static void __exit ppc_spe_sha1_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(ppc_spe_sha1_mod_init);
+module_exit(ppc_spe_sha1_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("sha1");
+MODULE_ALIAS_CRYPTO("sha1-ppc-spe");
diff --git a/arch/powerpc/crypto/sha256-spe-asm.S b/arch/powerpc/crypto/sha256-spe-asm.S
new file mode 100644
index 000000000000..2d10e4c08f03
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-asm.S
@@ -0,0 +1,323 @@
+/*
+ * Fast SHA-256 implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+#define rHP r3 /* pointer to hash values in memory */
+#define rKP r24 /* pointer to round constants */
+#define rWP r4 /* pointer to input data */
+
+#define rH0 r5 /* 8 32 bit hash values in 8 registers */
+#define rH1 r6
+#define rH2 r7
+#define rH3 r8
+#define rH4 r9
+#define rH5 r10
+#define rH6 r11
+#define rH7 r12
+
+#define rW0 r14 /* 64 bit registers. 16 words in 8 registers */
+#define rW1 r15
+#define rW2 r16
+#define rW3 r17
+#define rW4 r18
+#define rW5 r19
+#define rW6 r20
+#define rW7 r21
+
+#define rT0 r22 /* 64 bit temporaries */
+#define rT1 r23
+#define rT2 r0 /* 32 bit temporaries */
+#define rT3 r25
+
+#define CMP_KN_LOOP
+#define CMP_KC_LOOP \
+ cmpwi rT1,0;
+
+#define INITIALIZE \
+ stwu r1,-128(r1); /* create stack frame */ \
+ evstdw r14,8(r1); /* We must save non volatile */ \
+ evstdw r15,16(r1); /* registers. Take the chance */ \
+ evstdw r16,24(r1); /* and save the SPE part too */ \
+ evstdw r17,32(r1); \
+ evstdw r18,40(r1); \
+ evstdw r19,48(r1); \
+ evstdw r20,56(r1); \
+ evstdw r21,64(r1); \
+ evstdw r22,72(r1); \
+ evstdw r23,80(r1); \
+ stw r24,88(r1); /* save normal registers */ \
+ stw r25,92(r1);
+
+
+#define FINALIZE \
+ evldw r14,8(r1); /* restore SPE registers */ \
+ evldw r15,16(r1); \
+ evldw r16,24(r1); \
+ evldw r17,32(r1); \
+ evldw r18,40(r1); \
+ evldw r19,48(r1); \
+ evldw r20,56(r1); \
+ evldw r21,64(r1); \
+ evldw r22,72(r1); \
+ evldw r23,80(r1); \
+ lwz r24,88(r1); /* restore normal registers */ \
+ lwz r25,92(r1); \
+ xor r0,r0,r0; \
+ stw r0,8(r1); /* Delete sensitive data */ \
+ stw r0,16(r1); /* that we might have pushed */ \
+ stw r0,24(r1); /* from other context that runs */ \
+ stw r0,32(r1); /* the same code. Assume that */ \
+ stw r0,40(r1); /* the lower part of the GPRs */ \
+ stw r0,48(r1); /* was already overwritten on */ \
+ stw r0,56(r1); /* the way down to here */ \
+ stw r0,64(r1); \
+ stw r0,72(r1); \
+ stw r0,80(r1); \
+ addi r1,r1,128; /* cleanup stack frame */
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_DATA(reg, off) \
+ lwz reg,off(rWP); /* load data */
+#define NEXT_BLOCK \
+ addi rWP,rWP,64; /* increment per block */
+#else
+#define LOAD_DATA(reg, off) \
+ lwbrx reg,0,rWP; /* load data */ \
+ addi rWP,rWP,4; /* increment per word */
+#define NEXT_BLOCK /* nothing to do */
+#endif
+
+#define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
+ LOAD_DATA(w, off) /* 1: W */ \
+ rotrwi rT0,e,6; /* 1: S1 = e rotr 6 */ \
+ rotrwi rT1,e,11; /* 1: S1' = e rotr 11 */ \
+ rotrwi rT2,e,25; /* 1: S1" = e rotr 25 */ \
+ xor rT0,rT0,rT1; /* 1: S1 = S1 xor S1' */ \
+ and rT3,e,f; /* 1: ch = e and f */ \
+ xor rT0,rT0,rT2; /* 1: S1 = S1 xor S1" */ \
+ andc rT1,g,e; /* 1: ch' = ~e and g */ \
+ lwz rT2,off(rKP); /* 1: K */ \
+ xor rT3,rT3,rT1; /* 1: ch = ch xor ch' */ \
+ add h,h,rT0; /* 1: temp1 = h + S1 */ \
+ add rT3,rT3,w; /* 1: temp1' = ch + w */ \
+ rotrwi rT0,a,2; /* 1: S0 = a rotr 2 */ \
+ add h,h,rT3; /* 1: temp1 = temp1 + temp1' */ \
+ rotrwi rT1,a,13; /* 1: S0' = a rotr 13 */ \
+ add h,h,rT2; /* 1: temp1 = temp1 + K */ \
+ rotrwi rT3,a,22; /* 1: S0" = a rotr 22 */ \
+ xor rT0,rT0,rT1; /* 1: S0 = S0 xor S0' */ \
+ add d,d,h; /* 1: d = d + temp1 */ \
+ xor rT3,rT0,rT3; /* 1: S0 = S0 xor S0" */ \
+ evmergelo w,w,w; /* shift W */ \
+ or rT2,a,b; /* 1: maj = a or b */ \
+ and rT1,a,b; /* 1: maj' = a and b */ \
+ and rT2,rT2,c; /* 1: maj = maj and c */ \
+ LOAD_DATA(w, off+4) /* 2: W */ \
+ or rT2,rT1,rT2; /* 1: maj = maj or maj' */ \
+ rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
+ add rT3,rT3,rT2; /* 1: temp2 = S0 + maj */ \
+ rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
+ add h,h,rT3; /* 1: h = temp1 + temp2 */ \
+ rotrwi rT2,d,25; /* 2: S1" = e rotr 25 */ \
+ xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
+ and rT3,d,e; /* 2: ch = e and f */ \
+ xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
+ andc rT1,f,d; /* 2: ch' = ~e and g */ \
+ lwz rT2,off+4(rKP); /* 2: K */ \
+ xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
+ add g,g,rT0; /* 2: temp1 = h + S1 */ \
+ add rT3,rT3,w; /* 2: temp1' = ch + w */ \
+ rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
+ add g,g,rT3; /* 2: temp1 = temp1 + temp1' */ \
+ rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
+ add g,g,rT2; /* 2: temp1 = temp1 + K */ \
+ rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
+ xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
+ or rT2,h,a; /* 2: maj = a or b */ \
+ xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
+ and rT1,h,a; /* 2: maj' = a and b */ \
+ and rT2,rT2,b; /* 2: maj = maj and c */ \
+ add c,c,g; /* 2: d = d + temp1 */ \
+ or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
+ add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
+ add g,g,rT3 /* 2: h = temp1 + temp2 */
+
+#define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
+ rotrwi rT2,e,6; /* 1: S1 = e rotr 6 */ \
+ evmergelohi rT0,w0,w1; /* w[-15] */ \
+ rotrwi rT3,e,11; /* 1: S1' = e rotr 11 */ \
+ evsrwiu rT1,rT0,3; /* s0 = w[-15] >> 3 */ \
+ xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
+ evrlwi rT0,rT0,25; /* s0' = w[-15] rotr 7 */ \
+ rotrwi rT3,e,25; /* 1: S1' = e rotr 25 */ \
+ evxor rT1,rT1,rT0; /* s0 = s0 xor s0' */ \
+ xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
+ evrlwi rT0,rT0,21; /* s0' = w[-15] rotr 18 */ \
+ add h,h,rT2; /* 1: temp1 = h + S1 */ \
+ evxor rT0,rT0,rT1; /* s0 = s0 xor s0' */ \
+ and rT2,e,f; /* 1: ch = e and f */ \
+ evaddw w0,w0,rT0; /* w = w[-16] + s0 */ \
+ andc rT3,g,e; /* 1: ch' = ~e and g */ \
+ evsrwiu rT0,w7,10; /* s1 = w[-2] >> 10 */ \
+ xor rT2,rT2,rT3; /* 1: ch = ch xor ch' */ \
+ evrlwi rT1,w7,15; /* s1' = w[-2] rotr 17 */ \
+ add h,h,rT2; /* 1: temp1 = temp1 + ch */ \
+ evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
+ rotrwi rT2,a,2; /* 1: S0 = a rotr 2 */ \
+ evrlwi rT1,w7,13; /* s1' = w[-2] rotr 19 */ \
+ rotrwi rT3,a,13; /* 1: S0' = a rotr 13 */ \
+ evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
+ xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
+ evldw rT1,off(rKP); /* k */ \
+ rotrwi rT3,a,22; /* 1: S0' = a rotr 22 */ \
+ evaddw w0,w0,rT0; /* w = w + s1 */ \
+ xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
+ evmergelohi rT0,w4,w5; /* w[-7] */ \
+ and rT3,a,b; /* 1: maj = a and b */ \
+ evaddw w0,w0,rT0; /* w = w + w[-7] */ \
+ CMP_K##k##_LOOP \
+ add rT2,rT2,rT3; /* 1: temp2 = S0 + maj */ \
+ evaddw rT1,rT1,w0; /* wk = w + k */ \
+ xor rT3,a,b; /* 1: maj = a xor b */ \
+ evmergehi rT0,rT1,rT1; /* wk1/wk2 */ \
+ and rT3,rT3,c; /* 1: maj = maj and c */ \
+ add h,h,rT0; /* 1: temp1 = temp1 + wk */ \
+ add rT2,rT2,rT3; /* 1: temp2 = temp2 + maj */ \
+ add g,g,rT1; /* 2: temp1 = temp1 + wk */ \
+ add d,d,h; /* 1: d = d + temp1 */ \
+ rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
+ add h,h,rT2; /* 1: h = temp1 + temp2 */ \
+ rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
+ rotrwi rT2,d,25; /* 2: S" = e rotr 25 */ \
+ xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
+ and rT3,d,e; /* 2: ch = e and f */ \
+ xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
+ andc rT1,f,d; /* 2: ch' = ~e and g */ \
+ add g,g,rT0; /* 2: temp1 = h + S1 */ \
+ xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
+ rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
+ add g,g,rT3; /* 2: temp1 = temp1 + ch */ \
+ rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
+ rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
+ xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
+ or rT2,h,a; /* 2: maj = a or b */ \
+ and rT1,h,a; /* 2: maj' = a and b */ \
+ and rT2,rT2,b; /* 2: maj = maj and c */ \
+ xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
+ or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
+ add c,c,g; /* 2: d = d + temp1 */ \
+ add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
+ add g,g,rT3 /* 2: h = temp1 + temp2 */
+
+_GLOBAL(ppc_spe_sha256_transform)
+ INITIALIZE
+
+ mtctr r5
+ lwz rH0,0(rHP)
+ lwz rH1,4(rHP)
+ lwz rH2,8(rHP)
+ lwz rH3,12(rHP)
+ lwz rH4,16(rHP)
+ lwz rH5,20(rHP)
+ lwz rH6,24(rHP)
+ lwz rH7,28(rHP)
+
+ppc_spe_sha256_main:
+ lis rKP,PPC_SPE_SHA256_K@ha
+ addi rKP,rKP,PPC_SPE_SHA256_K@l
+
+ R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
+ R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
+ R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
+ R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
+ R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
+ R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
+ R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
+ R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
+ppc_spe_sha256_16_rounds:
+ addi rKP,rKP,64
+ R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
+ rW0, rW1, rW4, rW5, rW7, N, 0)
+ R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
+ rW1, rW2, rW5, rW6, rW0, N, 8)
+ R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
+ rW2, rW3, rW6, rW7, rW1, N, 16)
+ R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
+ rW3, rW4, rW7, rW0, rW2, N, 24)
+ R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
+ rW4, rW5, rW0, rW1, rW3, N, 32)
+ R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
+ rW5, rW6, rW1, rW2, rW4, N, 40)
+ R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
+ rW6, rW7, rW2, rW3, rW5, N, 48)
+ R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
+ rW7, rW0, rW3, rW4, rW6, C, 56)
+ bt gt,ppc_spe_sha256_16_rounds
+
+ lwz rW0,0(rHP)
+ NEXT_BLOCK
+ lwz rW1,4(rHP)
+ lwz rW2,8(rHP)
+ lwz rW3,12(rHP)
+ lwz rW4,16(rHP)
+ lwz rW5,20(rHP)
+ lwz rW6,24(rHP)
+ lwz rW7,28(rHP)
+
+ add rH0,rH0,rW0
+ stw rH0,0(rHP)
+ add rH1,rH1,rW1
+ stw rH1,4(rHP)
+ add rH2,rH2,rW2
+ stw rH2,8(rHP)
+ add rH3,rH3,rW3
+ stw rH3,12(rHP)
+ add rH4,rH4,rW4
+ stw rH4,16(rHP)
+ add rH5,rH5,rW5
+ stw rH5,20(rHP)
+ add rH6,rH6,rW6
+ stw rH6,24(rHP)
+ add rH7,rH7,rW7
+ stw rH7,28(rHP)
+
+ bdnz ppc_spe_sha256_main
+
+ FINALIZE
+ blr
+
+.data
+.align 5
+PPC_SPE_SHA256_K:
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
new file mode 100644
index 000000000000..f4a616fe1a82
--- /dev/null
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -0,0 +1,275 @@
+/*
+ * Glue code for SHA-256 implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/cryptohash.h>
+#include <linux/types.h>
+#include <crypto/sha.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <linux/hardirq.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). SHA256 takes ~2,000
+ * operations per 64 bytes. e500 cores can issue two arithmetic instructions
+ * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
+ * Thus 1KB of input data will need an estimated maximum of 18,000 cycles.
+ * Headroom for cache misses included. Even with the low end model clocked
+ * at 667 MHz this equals to a critical time window of less than 27us.
+ *
+ */
+#define MAX_BYTES 1024
+
+extern void ppc_spe_sha256_transform(u32 *state, const u8 *src, u32 blocks);
+
+static void spe_begin(void)
+{
+ /* We just start SPE operations and will save SPE registers later. */
+ preempt_disable();
+ enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+ /* reenable preemption */
+ preempt_enable();
+}
+
+static inline void ppc_sha256_clear_context(struct sha256_state *sctx)
+{
+ int count = sizeof(struct sha256_state) >> 2;
+ u32 *ptr = (u32 *)sctx;
+
+ /* make sure we can clear the fast way */
+ BUILD_BUG_ON(sizeof(struct sha256_state) % 4);
+ do { *ptr++ = 0; } while (--count);
+}
+
+static int ppc_spe_sha256_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA256_H0;
+ sctx->state[1] = SHA256_H1;
+ sctx->state[2] = SHA256_H2;
+ sctx->state[3] = SHA256_H3;
+ sctx->state[4] = SHA256_H4;
+ sctx->state[5] = SHA256_H5;
+ sctx->state[6] = SHA256_H6;
+ sctx->state[7] = SHA256_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static int ppc_spe_sha224_init(struct shash_desc *desc)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ sctx->state[0] = SHA224_H0;
+ sctx->state[1] = SHA224_H1;
+ sctx->state[2] = SHA224_H2;
+ sctx->state[3] = SHA224_H3;
+ sctx->state[4] = SHA224_H4;
+ sctx->state[5] = SHA224_H5;
+ sctx->state[6] = SHA224_H6;
+ sctx->state[7] = SHA224_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+static int ppc_spe_sha256_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ const unsigned int offset = sctx->count & 0x3f;
+ const unsigned int avail = 64 - offset;
+ unsigned int bytes;
+ const u8 *src = data;
+
+ if (avail > len) {
+ sctx->count += len;
+ memcpy((char *)sctx->buf + offset, src, len);
+ return 0;
+ }
+
+ sctx->count += len;
+
+ if (offset) {
+ memcpy((char *)sctx->buf + offset, src, avail);
+
+ spe_begin();
+ ppc_spe_sha256_transform(sctx->state, (const u8 *)sctx->buf, 1);
+ spe_end();
+
+ len -= avail;
+ src += avail;
+ }
+
+ while (len > 63) {
+ /* cut input data into smaller blocks */
+ bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
+ bytes = bytes & ~0x3f;
+
+ spe_begin();
+ ppc_spe_sha256_transform(sctx->state, src, bytes >> 6);
+ spe_end();
+
+ src += bytes;
+ len -= bytes;
+ };
+
+ memcpy((char *)sctx->buf, src, len);
+ return 0;
+}
+
+static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+ const unsigned int offset = sctx->count & 0x3f;
+ char *p = (char *)sctx->buf + offset;
+ int padlen;
+ __be64 *pbits = (__be64 *)(((char *)&sctx->buf) + 56);
+ __be32 *dst = (__be32 *)out;
+
+ padlen = 55 - offset;
+ *p++ = 0x80;
+
+ spe_begin();
+
+ if (padlen < 0) {
+ memset(p, 0x00, padlen + sizeof (u64));
+ ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
+ p = (char *)sctx->buf;
+ padlen = 56;
+ }
+
+ memset(p, 0, padlen);
+ *pbits = cpu_to_be64(sctx->count << 3);
+ ppc_spe_sha256_transform(sctx->state, sctx->buf, 1);
+
+ spe_end();
+
+ dst[0] = cpu_to_be32(sctx->state[0]);
+ dst[1] = cpu_to_be32(sctx->state[1]);
+ dst[2] = cpu_to_be32(sctx->state[2]);
+ dst[3] = cpu_to_be32(sctx->state[3]);
+ dst[4] = cpu_to_be32(sctx->state[4]);
+ dst[5] = cpu_to_be32(sctx->state[5]);
+ dst[6] = cpu_to_be32(sctx->state[6]);
+ dst[7] = cpu_to_be32(sctx->state[7]);
+
+ ppc_sha256_clear_context(sctx);
+ return 0;
+}
+
+static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out)
+{
+ u32 D[SHA256_DIGEST_SIZE >> 2];
+ __be32 *dst = (__be32 *)out;
+
+ ppc_spe_sha256_final(desc, (u8 *)D);
+
+ /* avoid bytewise memcpy */
+ dst[0] = D[0];
+ dst[1] = D[1];
+ dst[2] = D[2];
+ dst[3] = D[3];
+ dst[4] = D[4];
+ dst[5] = D[5];
+ dst[6] = D[6];
+
+ /* clear sensitive data */
+ memzero_explicit(D, SHA256_DIGEST_SIZE);
+ return 0;
+}
+
+static int ppc_spe_sha256_export(struct shash_desc *desc, void *out)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(out, sctx, sizeof(*sctx));
+ return 0;
+}
+
+static int ppc_spe_sha256_import(struct shash_desc *desc, const void *in)
+{
+ struct sha256_state *sctx = shash_desc_ctx(desc);
+
+ memcpy(sctx, in, sizeof(*sctx));
+ return 0;
+}
+
+static struct shash_alg algs[2] = { {
+ .digestsize = SHA256_DIGEST_SIZE,
+ .init = ppc_spe_sha256_init,
+ .update = ppc_spe_sha256_update,
+ .final = ppc_spe_sha256_final,
+ .export = ppc_spe_sha256_export,
+ .import = ppc_spe_sha256_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha256",
+ .cra_driver_name= "sha256-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA256_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+}, {
+ .digestsize = SHA224_DIGEST_SIZE,
+ .init = ppc_spe_sha224_init,
+ .update = ppc_spe_sha256_update,
+ .final = ppc_spe_sha224_final,
+ .export = ppc_spe_sha256_export,
+ .import = ppc_spe_sha256_import,
+ .descsize = sizeof(struct sha256_state),
+ .statesize = sizeof(struct sha256_state),
+ .base = {
+ .cra_name = "sha224",
+ .cra_driver_name= "sha224-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = SHA224_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init ppc_spe_sha256_mod_init(void)
+{
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit ppc_spe_sha256_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(ppc_spe_sha256_mod_init);
+module_exit(ppc_spe_sha256_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha224-ppc-spe");
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha256-ppc-spe");
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 54f60ab41c63..112cefacf2af 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -797,7 +797,9 @@ static int rfc4106_init(struct crypto_tfm *tfm)
PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN);
struct crypto_aead *cryptd_child;
struct aesni_rfc4106_gcm_ctx *child_ctx;
- cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0);
+ cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni",
+ CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
@@ -890,15 +892,12 @@ out_free_ablkcipher:
return ret;
}
-static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
- unsigned int key_len)
+static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
+ unsigned int key_len)
{
int ret = 0;
- struct crypto_tfm *tfm = crypto_aead_tfm(parent);
- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
- struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
- struct aesni_rfc4106_gcm_ctx *child_ctx =
- aesni_rfc4106_gcm_ctx_get(cryptd_child);
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(aead);
u8 *new_key_align, *new_key_mem = NULL;
if (key_len < 4) {
@@ -943,20 +942,31 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
goto exit;
}
ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
- memcpy(child_ctx, ctx, sizeof(*ctx));
exit:
kfree(new_key_mem);
return ret;
}
-/* This is the Integrity Check Value (aka the authentication tag length and can
- * be 8, 12 or 16 bytes long. */
-static int rfc4106_set_authsize(struct crypto_aead *parent,
- unsigned int authsize)
+static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
+ unsigned int key_len)
{
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
- struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
+ struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
+ struct aesni_rfc4106_gcm_ctx *c_ctx = aesni_rfc4106_gcm_ctx_get(child);
+ struct cryptd_aead *cryptd_tfm = ctx->cryptd_tfm;
+ int ret;
+ ret = crypto_aead_setkey(child, key, key_len);
+ if (!ret) {
+ memcpy(ctx, c_ctx, sizeof(*ctx));
+ ctx->cryptd_tfm = cryptd_tfm;
+ }
+ return ret;
+}
+
+static int common_rfc4106_set_authsize(struct crypto_aead *aead,
+ unsigned int authsize)
+{
switch (authsize) {
case 8:
case 12:
@@ -965,51 +975,23 @@ static int rfc4106_set_authsize(struct crypto_aead *parent,
default:
return -EINVAL;
}
- crypto_aead_crt(parent)->authsize = authsize;
- crypto_aead_crt(cryptd_child)->authsize = authsize;
+ crypto_aead_crt(aead)->authsize = authsize;
return 0;
}
-static int rfc4106_encrypt(struct aead_request *req)
-{
- int ret;
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
-
- if (!irq_fpu_usable()) {
- struct aead_request *cryptd_req =
- (struct aead_request *) aead_request_ctx(req);
- memcpy(cryptd_req, req, sizeof(*req));
- aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
- return crypto_aead_encrypt(cryptd_req);
- } else {
- struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
- kernel_fpu_begin();
- ret = cryptd_child->base.crt_aead.encrypt(req);
- kernel_fpu_end();
- return ret;
- }
-}
-
-static int rfc4106_decrypt(struct aead_request *req)
+/* This is the Integrity Check Value (aka the authentication tag length and can
+ * be 8, 12 or 16 bytes long. */
+static int rfc4106_set_authsize(struct crypto_aead *parent,
+ unsigned int authsize)
{
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent);
+ struct crypto_aead *child = cryptd_aead_child(ctx->cryptd_tfm);
int ret;
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
- if (!irq_fpu_usable()) {
- struct aead_request *cryptd_req =
- (struct aead_request *) aead_request_ctx(req);
- memcpy(cryptd_req, req, sizeof(*req));
- aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
- return crypto_aead_decrypt(cryptd_req);
- } else {
- struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm);
- kernel_fpu_begin();
- ret = cryptd_child->base.crt_aead.decrypt(req);
- kernel_fpu_end();
- return ret;
- }
+ ret = crypto_aead_setauthsize(child, authsize);
+ if (!ret)
+ crypto_aead_crt(parent)->authsize = authsize;
+ return ret;
}
static int __driver_rfc4106_encrypt(struct aead_request *req)
@@ -1185,6 +1167,78 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
}
return retval;
}
+
+static int rfc4106_encrypt(struct aead_request *req)
+{
+ int ret;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+
+ if (!irq_fpu_usable()) {
+ struct aead_request *cryptd_req =
+ (struct aead_request *) aead_request_ctx(req);
+
+ memcpy(cryptd_req, req, sizeof(*req));
+ aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+ ret = crypto_aead_encrypt(cryptd_req);
+ } else {
+ kernel_fpu_begin();
+ ret = __driver_rfc4106_encrypt(req);
+ kernel_fpu_end();
+ }
+ return ret;
+}
+
+static int rfc4106_decrypt(struct aead_request *req)
+{
+ int ret;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+
+ if (!irq_fpu_usable()) {
+ struct aead_request *cryptd_req =
+ (struct aead_request *) aead_request_ctx(req);
+
+ memcpy(cryptd_req, req, sizeof(*req));
+ aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
+ ret = crypto_aead_decrypt(cryptd_req);
+ } else {
+ kernel_fpu_begin();
+ ret = __driver_rfc4106_decrypt(req);
+ kernel_fpu_end();
+ }
+ return ret;
+}
+
+static int helper_rfc4106_encrypt(struct aead_request *req)
+{
+ int ret;
+
+ if (unlikely(!irq_fpu_usable())) {
+ WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
+ ret = -EINVAL;
+ } else {
+ kernel_fpu_begin();
+ ret = __driver_rfc4106_encrypt(req);
+ kernel_fpu_end();
+ }
+ return ret;
+}
+
+static int helper_rfc4106_decrypt(struct aead_request *req)
+{
+ int ret;
+
+ if (unlikely(!irq_fpu_usable())) {
+ WARN_ONCE(1, "__gcm-aes-aesni alg used in invalid context");
+ ret = -EINVAL;
+ } else {
+ kernel_fpu_begin();
+ ret = __driver_rfc4106_decrypt(req);
+ kernel_fpu_end();
+ }
+ return ret;
+}
#endif
static struct crypto_alg aesni_algs[] = { {
@@ -1210,7 +1264,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__aes-aesni",
.cra_driver_name = "__driver-aes-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx) +
AESNI_ALIGN - 1,
@@ -1229,7 +1283,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__ecb-aes-aesni",
.cra_driver_name = "__driver-ecb-aes-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx) +
AESNI_ALIGN - 1,
@@ -1249,7 +1304,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__cbc-aes-aesni",
.cra_driver_name = "__driver-cbc-aes-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx) +
AESNI_ALIGN - 1,
@@ -1313,7 +1369,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__ctr-aes-aesni",
.cra_driver_name = "__driver-ctr-aes-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx) +
AESNI_ALIGN - 1,
@@ -1357,7 +1414,7 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__gcm-aes-aesni",
.cra_driver_name = "__driver-gcm-aes-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_AEAD,
+ .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) +
AESNI_ALIGN,
@@ -1366,8 +1423,12 @@ static struct crypto_alg aesni_algs[] = { {
.cra_module = THIS_MODULE,
.cra_u = {
.aead = {
- .encrypt = __driver_rfc4106_encrypt,
- .decrypt = __driver_rfc4106_decrypt,
+ .setkey = common_rfc4106_set_key,
+ .setauthsize = common_rfc4106_set_authsize,
+ .encrypt = helper_rfc4106_encrypt,
+ .decrypt = helper_rfc4106_decrypt,
+ .ivsize = 8,
+ .maxauthsize = 16,
},
},
}, {
@@ -1423,7 +1484,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__lrw-aes-aesni",
.cra_driver_name = "__driver-lrw-aes-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesni_lrw_ctx),
.cra_alignmask = 0,
@@ -1444,7 +1506,8 @@ static struct crypto_alg aesni_algs[] = { {
.cra_name = "__xts-aes-aesni",
.cra_driver_name = "__driver-xts-aes-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct aesni_xts_ctx),
.cra_alignmask = 0,
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 9a07fafe3831..baf0ac21ace5 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -343,7 +343,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ecb-camellia-aesni-avx2",
.cra_driver_name = "__driver-ecb-camellia-aesni-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@@ -362,7 +363,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__cbc-camellia-aesni-avx2",
.cra_driver_name = "__driver-cbc-camellia-aesni-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@@ -381,7 +383,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ctr-camellia-aesni-avx2",
.cra_driver_name = "__driver-ctr-camellia-aesni-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@@ -401,7 +404,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__lrw-camellia-aesni-avx2",
.cra_driver_name = "__driver-lrw-camellia-aesni-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
.cra_alignmask = 0,
@@ -424,7 +428,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__xts-camellia-aesni-avx2",
.cra_driver_name = "__driver-xts-camellia-aesni-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.cra_alignmask = 0,
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index ed38d959add6..78818a1e73e3 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -335,7 +335,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ecb-camellia-aesni",
.cra_driver_name = "__driver-ecb-camellia-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@@ -354,7 +355,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__cbc-camellia-aesni",
.cra_driver_name = "__driver-cbc-camellia-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@@ -373,7 +375,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__ctr-camellia-aesni",
.cra_driver_name = "__driver-ctr-camellia-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct camellia_ctx),
.cra_alignmask = 0,
@@ -393,7 +396,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__lrw-camellia-aesni",
.cra_driver_name = "__driver-lrw-camellia-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_lrw_ctx),
.cra_alignmask = 0,
@@ -416,7 +420,8 @@ static struct crypto_alg cmll_algs[10] = { {
.cra_name = "__xts-camellia-aesni",
.cra_driver_name = "__driver-xts-camellia-aesni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAMELLIA_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct camellia_xts_ctx),
.cra_alignmask = 0,
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 60ada677a928..236c80974457 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -341,7 +341,8 @@ static struct crypto_alg cast5_algs[6] = { {
.cra_name = "__ecb-cast5-avx",
.cra_driver_name = "__driver-ecb-cast5-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST5_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,
@@ -360,7 +361,8 @@ static struct crypto_alg cast5_algs[6] = { {
.cra_name = "__cbc-cast5-avx",
.cra_driver_name = "__driver-cbc-cast5-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST5_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,
@@ -379,7 +381,8 @@ static struct crypto_alg cast5_algs[6] = { {
.cra_name = "__ctr-cast5-avx",
.cra_driver_name = "__driver-ctr-cast5-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cast5_ctx),
.cra_alignmask = 0,
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 0160f68a57ff..f448810ca4ac 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -372,7 +372,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__ecb-cast6-avx",
.cra_driver_name = "__driver-ecb-cast6-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
@@ -391,7 +392,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__cbc-cast6-avx",
.cra_driver_name = "__driver-cbc-cast6-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
@@ -410,7 +412,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__ctr-cast6-avx",
.cra_driver_name = "__driver-ctr-cast6-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct cast6_ctx),
.cra_alignmask = 0,
@@ -430,7 +433,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__lrw-cast6-avx",
.cra_driver_name = "__driver-lrw-cast6-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_lrw_ctx),
.cra_alignmask = 0,
@@ -453,7 +457,8 @@ static struct crypto_alg cast6_algs[10] = { {
.cra_name = "__xts-cast6-avx",
.cra_driver_name = "__driver-xts-cast6-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = CAST6_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct cast6_xts_ctx),
.cra_alignmask = 0,
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 8253d85aa165..2079baf06bdd 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = {
.cra_name = "__ghash",
.cra_driver_name = "__ghash-pclmulqdqni",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = GHASH_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct ghash_ctx),
.cra_module = THIS_MODULE,
@@ -261,7 +262,9 @@ static int ghash_async_init_tfm(struct crypto_tfm *tfm)
struct cryptd_ahash *cryptd_tfm;
struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
- cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni", 0, 0);
+ cryptd_tfm = cryptd_alloc_ahash("__ghash-pclmulqdqni",
+ CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
ctx->cryptd_tfm = cryptd_tfm;
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 432f1d76ceb8..6a85598931b5 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -232,7 +232,6 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
le128_to_be128((be128 *)walk->iv, &ctrblk);
}
-EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
struct blkcipher_desc *desc,
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 437e47a4d302..2f63dc89e7a9 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -309,7 +309,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__ecb-serpent-avx2",
.cra_driver_name = "__driver-ecb-serpent-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -329,7 +330,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__cbc-serpent-avx2",
.cra_driver_name = "__driver-cbc-serpent-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -349,7 +351,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__ctr-serpent-avx2",
.cra_driver_name = "__driver-ctr-serpent-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -370,7 +373,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__lrw-serpent-avx2",
.cra_driver_name = "__driver-lrw-serpent-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_lrw_ctx),
.cra_alignmask = 0,
@@ -394,7 +398,8 @@ static struct crypto_alg srp_algs[10] = { {
.cra_name = "__xts-serpent-avx2",
.cra_driver_name = "__driver-xts-serpent-avx2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.cra_alignmask = 0,
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 7e217398b4eb..c8d478af8456 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -378,7 +378,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ecb-serpent-avx",
.cra_driver_name = "__driver-ecb-serpent-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -397,7 +398,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__cbc-serpent-avx",
.cra_driver_name = "__driver-cbc-serpent-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -416,7 +418,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ctr-serpent-avx",
.cra_driver_name = "__driver-ctr-serpent-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -436,7 +439,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__lrw-serpent-avx",
.cra_driver_name = "__driver-lrw-serpent-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_lrw_ctx),
.cra_alignmask = 0,
@@ -459,7 +463,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__xts-serpent-avx",
.cra_driver_name = "__driver-xts-serpent-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.cra_alignmask = 0,
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index bf025adaea01..3643dd508f45 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -387,7 +387,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ecb-serpent-sse2",
.cra_driver_name = "__driver-ecb-serpent-sse2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -406,7 +407,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__cbc-serpent-sse2",
.cra_driver_name = "__driver-cbc-serpent-sse2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -425,7 +427,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__ctr-serpent-sse2",
.cra_driver_name = "__driver-ctr-serpent-sse2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct serpent_ctx),
.cra_alignmask = 0,
@@ -445,7 +448,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__lrw-serpent-sse2",
.cra_driver_name = "__driver-lrw-serpent-sse2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_lrw_ctx),
.cra_alignmask = 0,
@@ -468,7 +472,8 @@ static struct crypto_alg serpent_algs[10] = { {
.cra_name = "__xts-serpent-sse2",
.cra_driver_name = "__driver-xts-serpent-sse2",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SERPENT_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct serpent_xts_ctx),
.cra_alignmask = 0,
diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c
index fd9f6b035b16..e510b1c5d690 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb.c
@@ -694,7 +694,8 @@ static struct shash_alg sha1_mb_shash_alg = {
* use ASYNC flag as some buffers in multi-buffer
* algo may not have completed before hashing thread sleep
*/
- .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
.cra_list = LIST_HEAD_INIT(sha1_mb_shash_alg.base.cra_list),
@@ -770,7 +771,9 @@ static int sha1_mb_async_init_tfm(struct crypto_tfm *tfm)
struct sha1_mb_ctx *ctx = crypto_tfm_ctx(tfm);
struct mcryptd_hash_ctx *mctx;
- mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb", 0, 0);
+ mcryptd_tfm = mcryptd_alloc_ahash("__intel_sha1-mb",
+ CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
if (IS_ERR(mcryptd_tfm))
return PTR_ERR(mcryptd_tfm);
mctx = crypto_ahash_ctx(&mcryptd_tfm->base);
@@ -828,7 +831,7 @@ static unsigned long sha1_mb_flusher(struct mcryptd_alg_cstate *cstate)
while (!list_empty(&cstate->work_list)) {
rctx = list_entry(cstate->work_list.next,
struct mcryptd_hash_request_ctx, waiter);
- if time_before(cur_time, rctx->tag.expire)
+ if (time_before(cur_time, rctx->tag.expire))
break;
kernel_fpu_begin();
sha_ctx = (struct sha1_hash_ctx *) sha1_ctx_mgr_flush(cstate->mgr);
diff --git a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
index 4ca7e166a2aa..822acb5b464c 100644
--- a/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
+++ b/arch/x86/crypto/sha-mb/sha1_mb_mgr_init_avx2.c
@@ -56,7 +56,7 @@
void sha1_mb_mgr_init_avx2(struct sha1_mb_mgr *state)
{
unsigned int j;
- state->unused_lanes = 0xF76543210;
+ state->unused_lanes = 0xF76543210ULL;
for (j = 0; j < 8; j++) {
state->lens[j] = 0xFFFFFFFF;
state->ldata[j].job_in_lane = NULL;
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 6c20fe04a738..33d1b9dc14cc 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -28,7 +28,7 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
-#include <asm/byteorder.h>
+#include <crypto/sha1_base.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
@@ -44,132 +44,51 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data,
#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */
asmlinkage void sha1_transform_avx2(u32 *digest, const char *data,
- unsigned int rounds);
+ unsigned int rounds);
#endif
-static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
-
-
-static int sha1_ssse3_init(struct shash_desc *desc)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha1_state){
- .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
- };
-
- return 0;
-}
-
-static int __sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, unsigned int partial)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int done = 0;
-
- sctx->count += len;
-
- if (partial) {
- done = SHA1_BLOCK_SIZE - partial;
- memcpy(sctx->buffer + partial, data, done);
- sha1_transform_asm(sctx->state, sctx->buffer, 1);
- }
-
- if (len - done >= SHA1_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
-
- sha1_transform_asm(sctx->state, data + done, rounds);
- done += rounds * SHA1_BLOCK_SIZE;
- }
-
- memcpy(sctx->buffer, data + done, len - done);
-
- return 0;
-}
+static void (*sha1_transform_asm)(u32 *, const char *, unsigned int);
static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
- int res;
- /* Handle the fast case right here */
- if (partial + len < SHA1_BLOCK_SIZE) {
- sctx->count += len;
- memcpy(sctx->buffer + partial, data, len);
+ if (!irq_fpu_usable() ||
+ (sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
+ return crypto_sha1_update(desc, data, len);
- return 0;
- }
+ /* make sure casting to sha1_block_fn() is safe */
+ BUILD_BUG_ON(offsetof(struct sha1_state, state) != 0);
- if (!irq_fpu_usable()) {
- res = crypto_sha1_update(desc, data, len);
- } else {
- kernel_fpu_begin();
- res = __sha1_ssse3_update(desc, data, len, partial);
- kernel_fpu_end();
- }
-
- return res;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be32 *dst = (__be32 *)out;
- __be64 bits;
- static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 and append length */
- index = sctx->count % SHA1_BLOCK_SIZE;
- padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
- if (!irq_fpu_usable()) {
- crypto_sha1_update(desc, padding, padlen);
- crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
- } else {
- kernel_fpu_begin();
- /* We need to fill a whole block for __sha1_ssse3_update() */
- if (padlen <= 56) {
- sctx->count += padlen;
- memcpy(sctx->buffer + index, padding, padlen);
- } else {
- __sha1_ssse3_update(desc, padding, padlen, index);
- }
- __sha1_ssse3_update(desc, (const u8 *)&bits, sizeof(bits), 56);
- kernel_fpu_end();
- }
-
- /* Store state in digest */
- for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
+ kernel_fpu_begin();
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_transform_asm);
+ kernel_fpu_end();
return 0;
}
-static int sha1_ssse3_export(struct shash_desc *desc, void *out)
+static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
+ if (!irq_fpu_usable())
+ return crypto_sha1_finup(desc, data, len, out);
- memcpy(out, sctx, sizeof(*sctx));
+ kernel_fpu_begin();
+ if (len)
+ sha1_base_do_update(desc, data, len,
+ (sha1_block_fn *)sha1_transform_asm);
+ sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm);
+ kernel_fpu_end();
- return 0;
+ return sha1_base_finish(desc, out);
}
-static int sha1_ssse3_import(struct shash_desc *desc, const void *in)
+/* Add padding and return the message digest. */
+static int sha1_ssse3_final(struct shash_desc *desc, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
+ return sha1_ssse3_finup(desc, NULL, 0, out);
}
#ifdef CONFIG_AS_AVX2
@@ -186,13 +105,11 @@ static void sha1_apply_transform_avx2(u32 *digest, const char *data,
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
- .init = sha1_ssse3_init,
+ .init = sha1_base_init,
.update = sha1_ssse3_update,
.final = sha1_ssse3_final,
- .export = sha1_ssse3_export,
- .import = sha1_ssse3_import,
+ .finup = sha1_ssse3_finup,
.descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-ssse3",
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 642f15687a0a..92b3b5d75ba9 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -96,10 +96,10 @@ SHUF_DC00 = %xmm12 # shuffle xDxC -> DC00
BYTE_FLIP_MASK = %xmm13
NUM_BLKS = %rdx # 3rd arg
-CTX = %rsi # 2nd arg
-INP = %rdi # 1st arg
+INP = %rsi # 2nd arg
+CTX = %rdi # 1st arg
-SRND = %rdi # clobbers INP
+SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
@@ -342,8 +342,8 @@ a = TMP_
########################################################################
## void sha256_transform_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to input data
-## arg 2 : pointer to digest
+## arg 1 : pointer to digest
+## arg 2 : pointer to input data
## arg 3 : Num blocks
########################################################################
.text
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 9e86944c539d..570ec5ec62d7 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -91,12 +91,12 @@ BYTE_FLIP_MASK = %ymm13
X_BYTE_FLIP_MASK = %xmm13 # XMM version of BYTE_FLIP_MASK
NUM_BLKS = %rdx # 3rd arg
-CTX = %rsi # 2nd arg
-INP = %rdi # 1st arg
+INP = %rsi # 2nd arg
+CTX = %rdi # 1st arg
c = %ecx
d = %r8d
e = %edx # clobbers NUM_BLKS
-y3 = %edi # clobbers INP
+y3 = %esi # clobbers INP
TBL = %rbp
@@ -523,8 +523,8 @@ STACK_SIZE = _RSP + _RSP_SIZE
########################################################################
## void sha256_transform_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to input data
-## arg 2 : pointer to digest
+## arg 1 : pointer to digest
+## arg 2 : pointer to input data
## arg 3 : Num blocks
########################################################################
.text
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index f833b74d902b..2cedc44e8121 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -88,10 +88,10 @@ SHUF_DC00 = %xmm11 # shuffle xDxC -> DC00
BYTE_FLIP_MASK = %xmm12
NUM_BLKS = %rdx # 3rd arg
-CTX = %rsi # 2nd arg
-INP = %rdi # 1st arg
+INP = %rsi # 2nd arg
+CTX = %rdi # 1st arg
-SRND = %rdi # clobbers INP
+SRND = %rsi # clobbers INP
c = %ecx
d = %r8d
e = %edx
@@ -348,8 +348,8 @@ a = TMP_
########################################################################
## void sha256_transform_ssse3(void *input_data, UINT32 digest[8], UINT64 num_blks)
-## arg 1 : pointer to input data
-## arg 2 : pointer to digest
+## arg 1 : pointer to digest
+## arg 2 : pointer to input data
## arg 3 : Num blocks
########################################################################
.text
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 8fad72f4dfd2..ccc338881ee8 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -36,195 +36,74 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
-#include <asm/byteorder.h>
+#include <crypto/sha256_base.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <linux/string.h>
-asmlinkage void sha256_transform_ssse3(const char *data, u32 *digest,
- u64 rounds);
+asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
+ u64 rounds);
#ifdef CONFIG_AS_AVX
-asmlinkage void sha256_transform_avx(const char *data, u32 *digest,
+asmlinkage void sha256_transform_avx(u32 *digest, const char *data,
u64 rounds);
#endif
#ifdef CONFIG_AS_AVX2
-asmlinkage void sha256_transform_rorx(const char *data, u32 *digest,
- u64 rounds);
+asmlinkage void sha256_transform_rorx(u32 *digest, const char *data,
+ u64 rounds);
#endif
-static asmlinkage void (*sha256_transform_asm)(const char *, u32 *, u64);
-
-
-static int sha256_ssse3_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA256_H0;
- sctx->state[1] = SHA256_H1;
- sctx->state[2] = SHA256_H2;
- sctx->state[3] = SHA256_H3;
- sctx->state[4] = SHA256_H4;
- sctx->state[5] = SHA256_H5;
- sctx->state[6] = SHA256_H6;
- sctx->state[7] = SHA256_H7;
- sctx->count = 0;
-
- return 0;
-}
-
-static int __sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, unsigned int partial)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int done = 0;
-
- sctx->count += len;
-
- if (partial) {
- done = SHA256_BLOCK_SIZE - partial;
- memcpy(sctx->buf + partial, data, done);
- sha256_transform_asm(sctx->buf, sctx->state, 1);
- }
-
- if (len - done >= SHA256_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA256_BLOCK_SIZE;
-
- sha256_transform_asm(data + done, sctx->state, (u64) rounds);
-
- done += rounds * SHA256_BLOCK_SIZE;
- }
-
- memcpy(sctx->buf, data + done, len - done);
-
- return 0;
-}
+static void (*sha256_transform_asm)(u32 *, const char *, u64);
static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count % SHA256_BLOCK_SIZE;
- int res;
- /* Handle the fast case right here */
- if (partial + len < SHA256_BLOCK_SIZE) {
- sctx->count += len;
- memcpy(sctx->buf + partial, data, len);
+ if (!irq_fpu_usable() ||
+ (sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
+ return crypto_sha256_update(desc, data, len);
- return 0;
- }
-
- if (!irq_fpu_usable()) {
- res = crypto_sha256_update(desc, data, len);
- } else {
- kernel_fpu_begin();
- res = __sha256_ssse3_update(desc, data, len, partial);
- kernel_fpu_end();
- }
-
- return res;
-}
+ /* make sure casting to sha256_block_fn() is safe */
+ BUILD_BUG_ON(offsetof(struct sha256_state, state) != 0);
-
-/* Add padding and return the message digest. */
-static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be32 *dst = (__be32 *)out;
- __be64 bits;
- static const u8 padding[SHA256_BLOCK_SIZE] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 and append length */
- index = sctx->count % SHA256_BLOCK_SIZE;
- padlen = (index < 56) ? (56 - index) : ((SHA256_BLOCK_SIZE+56)-index);
-
- if (!irq_fpu_usable()) {
- crypto_sha256_update(desc, padding, padlen);
- crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
- } else {
- kernel_fpu_begin();
- /* We need to fill a whole block for __sha256_ssse3_update() */
- if (padlen <= 56) {
- sctx->count += padlen;
- memcpy(sctx->buf + index, padding, padlen);
- } else {
- __sha256_ssse3_update(desc, padding, padlen, index);
- }
- __sha256_ssse3_update(desc, (const u8 *)&bits,
- sizeof(bits), 56);
- kernel_fpu_end();
- }
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
+ kernel_fpu_begin();
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_transform_asm);
+ kernel_fpu_end();
return 0;
}
-static int sha256_ssse3_export(struct shash_desc *desc, void *out)
+static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
+ if (!irq_fpu_usable())
+ return crypto_sha256_finup(desc, data, len, out);
- memcpy(out, sctx, sizeof(*sctx));
+ kernel_fpu_begin();
+ if (len)
+ sha256_base_do_update(desc, data, len,
+ (sha256_block_fn *)sha256_transform_asm);
+ sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm);
+ kernel_fpu_end();
- return 0;
+ return sha256_base_finish(desc, out);
}
-static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha224_ssse3_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA224_H0;
- sctx->state[1] = SHA224_H1;
- sctx->state[2] = SHA224_H2;
- sctx->state[3] = SHA224_H3;
- sctx->state[4] = SHA224_H4;
- sctx->state[5] = SHA224_H5;
- sctx->state[6] = SHA224_H6;
- sctx->state[7] = SHA224_H7;
- sctx->count = 0;
-
- return 0;
-}
-
-static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
+/* Add padding and return the message digest. */
+static int sha256_ssse3_final(struct shash_desc *desc, u8 *out)
{
- u8 D[SHA256_DIGEST_SIZE];
-
- sha256_ssse3_final(desc, D);
-
- memcpy(hash, D, SHA224_DIGEST_SIZE);
- memzero_explicit(D, SHA256_DIGEST_SIZE);
-
- return 0;
+ return sha256_ssse3_finup(desc, NULL, 0, out);
}
static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_ssse3_init,
+ .init = sha256_base_init,
.update = sha256_ssse3_update,
.final = sha256_ssse3_final,
- .export = sha256_ssse3_export,
- .import = sha256_ssse3_import,
+ .finup = sha256_ssse3_finup,
.descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-ssse3",
@@ -235,13 +114,11 @@ static struct shash_alg algs[] = { {
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_ssse3_init,
+ .init = sha224_base_init,
.update = sha256_ssse3_update,
- .final = sha224_ssse3_final,
- .export = sha256_ssse3_export,
- .import = sha256_ssse3_import,
+ .final = sha256_ssse3_final,
+ .finup = sha256_ssse3_finup,
.descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ssse3",
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 974dde9bc6cd..565274d6a641 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -54,9 +54,9 @@
# Virtual Registers
# ARG1
-msg = %rdi
+digest = %rdi
# ARG2
-digest = %rsi
+msg = %rsi
# ARG3
msglen = %rdx
T1 = %rcx
@@ -271,7 +271,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
.endm
########################################################################
-# void sha512_transform_avx(const void* M, void* D, u64 L)
+# void sha512_transform_avx(void* D, const void* M, u64 L)
# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
# The size of the message pointed to by M must be an integer multiple of SHA512
# message blocks.
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 568b96105f5c..a4771dcd1fcf 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -70,9 +70,9 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9
# 1st arg
-INP = %rdi
+CTX = %rdi
# 2nd arg
-CTX = %rsi
+INP = %rsi
# 3rd arg
NUM_BLKS = %rdx
@@ -562,7 +562,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
.endm
########################################################################
-# void sha512_transform_rorx(const void* M, void* D, uint64_t L)#
+# void sha512_transform_rorx(void* D, const void* M, uint64_t L)#
# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
# The size of the message pointed to by M must be an integer multiple of SHA512
# message blocks.
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index fb56855d51f5..e610e29cbc81 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -53,9 +53,9 @@
# Virtual Registers
# ARG1
-msg = %rdi
+digest = %rdi
# ARG2
-digest = %rsi
+msg = %rsi
# ARG3
msglen = %rdx
T1 = %rcx
@@ -269,7 +269,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
.endm
########################################################################
-# void sha512_transform_ssse3(const void* M, void* D, u64 L)#
+# void sha512_transform_ssse3(void* D, const void* M, u64 L)#
# Purpose: Updates the SHA512 digest stored at D with the message stored in M.
# The size of the message pointed to by M must be an integer multiple of SHA512
# message blocks.
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 0b6af26832bf..d9fa4c1e063f 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -34,205 +34,75 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
-#include <asm/byteorder.h>
+#include <crypto/sha512_base.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <linux/string.h>
-asmlinkage void sha512_transform_ssse3(const char *data, u64 *digest,
- u64 rounds);
+asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
+ u64 rounds);
#ifdef CONFIG_AS_AVX
-asmlinkage void sha512_transform_avx(const char *data, u64 *digest,
+asmlinkage void sha512_transform_avx(u64 *digest, const char *data,
u64 rounds);
#endif
#ifdef CONFIG_AS_AVX2
-asmlinkage void sha512_transform_rorx(const char *data, u64 *digest,
- u64 rounds);
+asmlinkage void sha512_transform_rorx(u64 *digest, const char *data,
+ u64 rounds);
#endif
-static asmlinkage void (*sha512_transform_asm)(const char *, u64 *, u64);
-
-
-static int sha512_ssse3_init(struct shash_desc *desc)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA512_H0;
- sctx->state[1] = SHA512_H1;
- sctx->state[2] = SHA512_H2;
- sctx->state[3] = SHA512_H3;
- sctx->state[4] = SHA512_H4;
- sctx->state[5] = SHA512_H5;
- sctx->state[6] = SHA512_H6;
- sctx->state[7] = SHA512_H7;
- sctx->count[0] = sctx->count[1] = 0;
-
- return 0;
-}
+static void (*sha512_transform_asm)(u64 *, const char *, u64);
-static int __sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len, unsigned int partial)
+static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int done = 0;
-
- sctx->count[0] += len;
- if (sctx->count[0] < len)
- sctx->count[1]++;
- if (partial) {
- done = SHA512_BLOCK_SIZE - partial;
- memcpy(sctx->buf + partial, data, done);
- sha512_transform_asm(sctx->buf, sctx->state, 1);
- }
-
- if (len - done >= SHA512_BLOCK_SIZE) {
- const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
+ if (!irq_fpu_usable() ||
+ (sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
+ return crypto_sha512_update(desc, data, len);
- sha512_transform_asm(data + done, sctx->state, (u64) rounds);
-
- done += rounds * SHA512_BLOCK_SIZE;
- }
+ /* make sure casting to sha512_block_fn() is safe */
+ BUILD_BUG_ON(offsetof(struct sha512_state, state) != 0);
- memcpy(sctx->buf, data + done, len - done);
+ kernel_fpu_begin();
+ sha512_base_do_update(desc, data, len,
+ (sha512_block_fn *)sha512_transform_asm);
+ kernel_fpu_end();
return 0;
}
-static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
- int res;
-
- /* Handle the fast case right here */
- if (partial + len < SHA512_BLOCK_SIZE) {
- sctx->count[0] += len;
- if (sctx->count[0] < len)
- sctx->count[1]++;
- memcpy(sctx->buf + partial, data, len);
-
- return 0;
- }
+ if (!irq_fpu_usable())
+ return crypto_sha512_finup(desc, data, len, out);
- if (!irq_fpu_usable()) {
- res = crypto_sha512_update(desc, data, len);
- } else {
- kernel_fpu_begin();
- res = __sha512_ssse3_update(desc, data, len, partial);
- kernel_fpu_end();
- }
+ kernel_fpu_begin();
+ if (len)
+ sha512_base_do_update(desc, data, len,
+ (sha512_block_fn *)sha512_transform_asm);
+ sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm);
+ kernel_fpu_end();
- return res;
+ return sha512_base_finish(desc, out);
}
-
/* Add padding and return the message digest. */
static int sha512_ssse3_final(struct shash_desc *desc, u8 *out)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- unsigned int i, index, padlen;
- __be64 *dst = (__be64 *)out;
- __be64 bits[2];
- static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
-
- /* save number of bits */
- bits[1] = cpu_to_be64(sctx->count[0] << 3);
- bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
- /* Pad out to 112 mod 128 and append length */
- index = sctx->count[0] & 0x7f;
- padlen = (index < 112) ? (112 - index) : ((128+112) - index);
-
- if (!irq_fpu_usable()) {
- crypto_sha512_update(desc, padding, padlen);
- crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
- } else {
- kernel_fpu_begin();
- /* We need to fill a whole block for __sha512_ssse3_update() */
- if (padlen <= 112) {
- sctx->count[0] += padlen;
- if (sctx->count[0] < padlen)
- sctx->count[1]++;
- memcpy(sctx->buf + index, padding, padlen);
- } else {
- __sha512_ssse3_update(desc, padding, padlen, index);
- }
- __sha512_ssse3_update(desc, (const u8 *)&bits,
- sizeof(bits), 112);
- kernel_fpu_end();
- }
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be64(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_ssse3_export(struct shash_desc *desc, void *out)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
-
- return 0;
-}
-
-static int sha384_ssse3_init(struct shash_desc *desc)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- sctx->state[0] = SHA384_H0;
- sctx->state[1] = SHA384_H1;
- sctx->state[2] = SHA384_H2;
- sctx->state[3] = SHA384_H3;
- sctx->state[4] = SHA384_H4;
- sctx->state[5] = SHA384_H5;
- sctx->state[6] = SHA384_H6;
- sctx->state[7] = SHA384_H7;
-
- sctx->count[0] = sctx->count[1] = 0;
-
- return 0;
-}
-
-static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
-{
- u8 D[SHA512_DIGEST_SIZE];
-
- sha512_ssse3_final(desc, D);
-
- memcpy(hash, D, SHA384_DIGEST_SIZE);
- memzero_explicit(D, SHA512_DIGEST_SIZE);
-
- return 0;
+ return sha512_ssse3_finup(desc, NULL, 0, out);
}
static struct shash_alg algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
- .init = sha512_ssse3_init,
+ .init = sha512_base_init,
.update = sha512_ssse3_update,
.final = sha512_ssse3_final,
- .export = sha512_ssse3_export,
- .import = sha512_ssse3_import,
+ .finup = sha512_ssse3_finup,
.descsize = sizeof(struct sha512_state),
- .statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-ssse3",
@@ -243,13 +113,11 @@ static struct shash_alg algs[] = { {
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
- .init = sha384_ssse3_init,
+ .init = sha384_base_init,
.update = sha512_ssse3_update,
- .final = sha384_ssse3_final,
- .export = sha512_ssse3_export,
- .import = sha512_ssse3_import,
+ .final = sha512_ssse3_final,
+ .finup = sha512_ssse3_finup,
.descsize = sizeof(struct sha512_state),
- .statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-ssse3",
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 1ac531ea9bcc..b5e2d5651851 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -340,7 +340,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__ecb-twofish-avx",
.cra_driver_name = "__driver-ecb-twofish-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
@@ -359,7 +360,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__cbc-twofish-avx",
.cra_driver_name = "__driver-cbc-twofish-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
@@ -378,7 +380,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__ctr-twofish-avx",
.cra_driver_name = "__driver-ctr-twofish-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
@@ -398,7 +401,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__lrw-twofish-avx",
.cra_driver_name = "__driver-lrw-twofish-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_lrw_ctx),
.cra_alignmask = 0,
@@ -421,7 +425,8 @@ static struct crypto_alg twofish_algs[10] = { {
.cra_name = "__xts-twofish-avx",
.cra_driver_name = "__driver-xts-twofish-avx",
.cra_priority = 0,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
+ CRYPTO_ALG_INTERNAL,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_xts_ctx),
.cra_alignmask = 0,
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 50f4da44a304..8aaf298a80e1 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -436,6 +436,14 @@ config CRYPTO_MD5_OCTEON
MD5 message digest algorithm (RFC1321) implemented
using OCTEON crypto instructions, when available.
+config CRYPTO_MD5_PPC
+ tristate "MD5 digest algorithm (PPC)"
+ depends on PPC
+ select CRYPTO_HASH
+ help
+ MD5 message digest algorithm (RFC1321) implemented
+ in PPC assembler.
+
config CRYPTO_MD5_SPARC64
tristate "MD5 digest algorithm (SPARC64)"
depends on SPARC64
@@ -546,34 +554,23 @@ config CRYPTO_SHA512_SSSE3
Extensions version 1 (AVX1), or Advanced Vector Extensions
version 2 (AVX2) instructions, when available.
-config CRYPTO_SHA1_SPARC64
- tristate "SHA1 digest algorithm (SPARC64)"
- depends on SPARC64
- select CRYPTO_SHA1
- select CRYPTO_HASH
- help
- SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using sparc64 crypto instructions, when available.
-
-config CRYPTO_SHA1_ARM
- tristate "SHA1 digest algorithm (ARM-asm)"
- depends on ARM
+config CRYPTO_SHA1_OCTEON
+ tristate "SHA1 digest algorithm (OCTEON)"
+ depends on CPU_CAVIUM_OCTEON
select CRYPTO_SHA1
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using optimized ARM assembler.
+ using OCTEON crypto instructions, when available.
-config CRYPTO_SHA1_ARM_NEON
- tristate "SHA1 digest algorithm (ARM NEON)"
- depends on ARM && KERNEL_MODE_NEON
- select CRYPTO_SHA1_ARM
+config CRYPTO_SHA1_SPARC64
+ tristate "SHA1 digest algorithm (SPARC64)"
+ depends on SPARC64
select CRYPTO_SHA1
select CRYPTO_HASH
help
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented
- using optimized ARM NEON assembly, when NEON instructions are
- available.
+ using sparc64 crypto instructions, when available.
config CRYPTO_SHA1_PPC
tristate "SHA1 digest algorithm (powerpc)"
@@ -582,6 +579,13 @@ config CRYPTO_SHA1_PPC
This is the powerpc hardware accelerated implementation of the
SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
+config CRYPTO_SHA1_PPC_SPE
+ tristate "SHA1 digest algorithm (PPC SPE)"
+ depends on PPC && SPE
+ help
+ SHA-1 secure hash standard (DFIPS 180-4) implemented
+ using powerpc SPE SIMD instruction set.
+
config CRYPTO_SHA1_MB
tristate "SHA1 digest algorithm (x86_64 Multi-Buffer, Experimental)"
depends on X86 && 64BIT
@@ -610,6 +614,24 @@ config CRYPTO_SHA256
This code also includes SHA-224, a 224 bit hash with 112 bits
of security against collision attacks.
+config CRYPTO_SHA256_PPC_SPE
+ tristate "SHA224 and SHA256 digest algorithm (PPC SPE)"
+ depends on PPC && SPE
+ select CRYPTO_SHA256
+ select CRYPTO_HASH
+ help
+ SHA224 and SHA256 secure hash standard (DFIPS 180-2)
+ implemented using powerpc SPE SIMD instruction set.
+
+config CRYPTO_SHA256_OCTEON
+ tristate "SHA224 and SHA256 digest algorithm (OCTEON)"
+ depends on CPU_CAVIUM_OCTEON
+ select CRYPTO_SHA256
+ select CRYPTO_HASH
+ help
+ SHA-256 secure hash standard (DFIPS 180-2) implemented
+ using OCTEON crypto instructions, when available.
+
config CRYPTO_SHA256_SPARC64
tristate "SHA224 and SHA256 digest algorithm (SPARC64)"
depends on SPARC64
@@ -631,29 +653,23 @@ config CRYPTO_SHA512
This code also includes SHA-384, a 384 bit hash with 192 bits
of security against collision attacks.
-config CRYPTO_SHA512_SPARC64
- tristate "SHA384 and SHA512 digest algorithm (SPARC64)"
- depends on SPARC64
+config CRYPTO_SHA512_OCTEON
+ tristate "SHA384 and SHA512 digest algorithms (OCTEON)"
+ depends on CPU_CAVIUM_OCTEON
select CRYPTO_SHA512
select CRYPTO_HASH
help
SHA-512 secure hash standard (DFIPS 180-2) implemented
- using sparc64 crypto instructions, when available.
+ using OCTEON crypto instructions, when available.
-config CRYPTO_SHA512_ARM_NEON
- tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
- depends on ARM && KERNEL_MODE_NEON
+config CRYPTO_SHA512_SPARC64
+ tristate "SHA384 and SHA512 digest algorithm (SPARC64)"
+ depends on SPARC64
select CRYPTO_SHA512
select CRYPTO_HASH
help
SHA-512 secure hash standard (DFIPS 180-2) implemented
- using ARM NEON instructions, when available.
-
- This version of SHA implements a 512 bit hash with 256 bits of
- security against collision attacks.
-
- This code also includes SHA-384, a 384 bit hash with 192 bits
- of security against collision attacks.
+ using sparc64 crypto instructions, when available.
config CRYPTO_TGR192
tristate "Tiger digest algorithms"
@@ -817,45 +833,18 @@ config CRYPTO_AES_SPARC64
for some popular block cipher mode is supported too, including
ECB and CBC.
-config CRYPTO_AES_ARM
- tristate "AES cipher algorithms (ARM-asm)"
- depends on ARM
- select CRYPTO_ALGAPI
- select CRYPTO_AES
- help
- Use optimized AES assembler routines for ARM platforms.
-
- AES cipher algorithms (FIPS-197). AES uses the Rijndael
- algorithm.
-
- Rijndael appears to be consistently a very good performer in
- both hardware and software across a wide range of computing
- environments regardless of its use in feedback or non-feedback
- modes. Its key setup time is excellent, and its key agility is
- good. Rijndael's very low memory requirements make it very well
- suited for restricted-space environments, in which it also
- demonstrates excellent performance. Rijndael's operations are
- among the easiest to defend against power and timing attacks.
-
- The AES specifies three key sizes: 128, 192 and 256 bits
-
- See <http://csrc.nist.gov/encryption/aes/> for more information.
-
-config CRYPTO_AES_ARM_BS
- tristate "Bit sliced AES using NEON instructions"
- depends on ARM && KERNEL_MODE_NEON
- select CRYPTO_ALGAPI
- select CRYPTO_AES_ARM
- select CRYPTO_ABLK_HELPER
+config CRYPTO_AES_PPC_SPE
+ tristate "AES cipher algorithms (PPC SPE)"
+ depends on PPC && SPE
help
- Use a faster and more secure NEON based implementation of AES in CBC,
- CTR and XTS modes
-
- Bit sliced AES gives around 45% speedup on Cortex-A15 for CTR mode
- and for XTS mode encryption, CBC and XTS mode decryption speedup is
- around 25%. (CBC encryption speed is not affected by this driver.)
- This implementation does not rely on any lookup tables so it is
- believed to be invulnerable to cache timing attacks.
+ AES cipher algorithms (FIPS-197). Additionally the acceleration
+ for popular block cipher modes ECB, CBC, CTR and XTS is supported.
+ This module should only be used for low power (router) devices
+ without hardware AES acceleration (e.g. caam crypto). It reduces the
+ size of the AES tables from 16KB to 8KB + 256 bytes and mitigates
+ timining attacks. Nevertheless it might be not as secure as other
+ architecture specific assembler implementations that work on 1KB
+ tables or 256 bytes S-boxes.
config CRYPTO_ANUBIS
tristate "Anubis cipher algorithm"
@@ -1199,7 +1188,7 @@ config CRYPTO_SERPENT_SSE2_X86_64
Keys are allowed to be from 0 to 256 bits in length, in steps
of 8 bits.
- This module provides Serpent cipher algorithm that processes eigth
+ This module provides Serpent cipher algorithm that processes eight
blocks parallel using SSE2 instruction set.
See also:
@@ -1523,6 +1512,15 @@ config CRYPTO_USER_API_RNG
This option enables the user-spaces interface for random
number generator algorithms.
+config CRYPTO_USER_API_AEAD
+ tristate "User-space interface for AEAD cipher algorithms"
+ depends on NET
+ select CRYPTO_AEAD
+ select CRYPTO_USER_API
+ help
+ This option enables the user-spaces interface for AEAD
+ cipher algorithms.
+
config CRYPTO_HASH_INFO
bool
diff --git a/crypto/Makefile b/crypto/Makefile
index ba19465f9ad3..97b7d3ac87e7 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o
obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o
obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o
obj-$(CONFIG_CRYPTO_USER_API_RNG) += algif_rng.o
+obj-$(CONFIG_CRYPTO_USER_API_AEAD) += algif_aead.o
#
# generic algorithms and the async_tx api
diff --git a/crypto/ablk_helper.c b/crypto/ablk_helper.c
index ffe7278d4bd8..e1fcf53bb931 100644
--- a/crypto/ablk_helper.c
+++ b/crypto/ablk_helper.c
@@ -124,7 +124,8 @@ int ablk_init_common(struct crypto_tfm *tfm, const char *drv_name)
struct async_helper_ctx *ctx = crypto_tfm_ctx(tfm);
struct cryptd_ablkcipher *cryptd_tfm;
- cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, 0, 0);
+ cryptd_tfm = cryptd_alloc_ablkcipher(drv_name, CRYPTO_ALG_INTERNAL,
+ CRYPTO_ALG_INTERNAL);
if (IS_ERR(cryptd_tfm))
return PTR_ERR(cryptd_tfm);
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 83b04e0884b1..2d0a1c64ce39 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -64,6 +64,8 @@ static int crypto_check_alg(struct crypto_alg *alg)
if (alg->cra_priority < 0)
return -EINVAL;
+ atomic_set(&alg->cra_refcnt, 1);
+
return crypto_set_driver_name(alg);
}
@@ -99,10 +101,9 @@ static struct list_head *crypto_more_spawns(struct crypto_alg *alg,
return &n->list == stack ? top : &n->inst->alg.cra_users;
}
-static void crypto_remove_spawn(struct crypto_spawn *spawn,
- struct list_head *list)
+static void crypto_remove_instance(struct crypto_instance *inst,
+ struct list_head *list)
{
- struct crypto_instance *inst = spawn->inst;
struct crypto_template *tmpl = inst->tmpl;
if (crypto_is_dead(&inst->alg))
@@ -167,7 +168,7 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
if (spawn->alg)
list_move(&spawn->list, &spawn->alg->cra_users);
else
- crypto_remove_spawn(spawn, list);
+ crypto_remove_instance(spawn->inst, list);
}
}
EXPORT_SYMBOL_GPL(crypto_remove_spawns);
@@ -188,7 +189,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
ret = -EEXIST;
- atomic_set(&alg->cra_refcnt, 1);
list_for_each_entry(q, &crypto_alg_list, cra_list) {
if (q == alg)
goto err;
@@ -523,7 +523,10 @@ int crypto_register_instance(struct crypto_template *tmpl,
err = crypto_check_alg(&inst->alg);
if (err)
- goto err;
+ return err;
+
+ if (unlikely(!crypto_mod_get(&inst->alg)))
+ return -EAGAIN;
inst->alg.cra_module = tmpl->module;
inst->alg.cra_flags |= CRYPTO_ALG_INSTANCE;
@@ -545,37 +548,30 @@ unlock:
goto err;
crypto_wait_for_test(larval);
+
+ /* Remove instance if test failed */
+ if (!(inst->alg.cra_flags & CRYPTO_ALG_TESTED))
+ crypto_unregister_instance(inst);
err = 0;
err:
+ crypto_mod_put(&inst->alg);
return err;
}
EXPORT_SYMBOL_GPL(crypto_register_instance);
-int crypto_unregister_instance(struct crypto_alg *alg)
+int crypto_unregister_instance(struct crypto_instance *inst)
{
- int err;
- struct crypto_instance *inst = (void *)alg;
- struct crypto_template *tmpl = inst->tmpl;
- LIST_HEAD(users);
-
- if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
- return -EINVAL;
-
- BUG_ON(atomic_read(&alg->cra_refcnt) != 1);
+ LIST_HEAD(list);
down_write(&crypto_alg_sem);
- hlist_del_init(&inst->list);
- err = crypto_remove_alg(alg, &users);
+ crypto_remove_spawns(&inst->alg, &list, NULL);
+ crypto_remove_instance(inst, &list);
up_write(&crypto_alg_sem);
- if (err)
- return err;
-
- tmpl->free(inst);
- crypto_remove_final(&users);
+ crypto_remove_final(&list);
return 0;
}
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
new file mode 100644
index 000000000000..527d27b023ab
--- /dev/null
+++ b/crypto/algif_aead.c
@@ -0,0 +1,666 @@
+/*
+ * algif_aead: User-space interface for AEAD algorithms
+ *
+ * Copyright (C) 2014, Stephan Mueller <smueller@chronox.de>
+ *
+ * This file provides the user-space API for AEAD ciphers.
+ *
+ * This file is derived from algif_skcipher.c.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/scatterwalk.h>
+#include <crypto/if_alg.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/net.h>
+#include <net/sock.h>
+
+struct aead_sg_list {
+ unsigned int cur;
+ struct scatterlist sg[ALG_MAX_PAGES];
+};
+
+struct aead_ctx {
+ struct aead_sg_list tsgl;
+ /*
+ * RSGL_MAX_ENTRIES is an artificial limit where user space at maximum
+ * can cause the kernel to allocate RSGL_MAX_ENTRIES * ALG_MAX_PAGES
+ * bytes
+ */
+#define RSGL_MAX_ENTRIES ALG_MAX_PAGES
+ struct af_alg_sgl rsgl[RSGL_MAX_ENTRIES];
+
+ void *iv;
+
+ struct af_alg_completion completion;
+
+ unsigned long used;
+
+ unsigned int len;
+ bool more;
+ bool merge;
+ bool enc;
+
+ size_t aead_assoclen;
+ struct aead_request aead_req;
+};
+
+static inline int aead_sndbuf(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+
+ return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) -
+ ctx->used, 0);
+}
+
+static inline bool aead_writable(struct sock *sk)
+{
+ return PAGE_SIZE <= aead_sndbuf(sk);
+}
+
+static inline bool aead_sufficient_data(struct aead_ctx *ctx)
+{
+ unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
+
+ return (ctx->used >= (ctx->aead_assoclen + (ctx->enc ? 0 : as)));
+}
+
+static void aead_put_sgl(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+ struct aead_sg_list *sgl = &ctx->tsgl;
+ struct scatterlist *sg = sgl->sg;
+ unsigned int i;
+
+ for (i = 0; i < sgl->cur; i++) {
+ if (!sg_page(sg + i))
+ continue;
+
+ put_page(sg_page(sg + i));
+ sg_assign_page(sg + i, NULL);
+ }
+ sgl->cur = 0;
+ ctx->used = 0;
+ ctx->more = 0;
+ ctx->merge = 0;
+}
+
+static void aead_wmem_wakeup(struct sock *sk)
+{
+ struct socket_wq *wq;
+
+ if (!aead_writable(sk))
+ return;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
+ POLLRDNORM |
+ POLLRDBAND);
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
+ rcu_read_unlock();
+}
+
+static int aead_wait_for_data(struct sock *sk, unsigned flags)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+ long timeout;
+ DEFINE_WAIT(wait);
+ int err = -ERESTARTSYS;
+
+ if (flags & MSG_DONTWAIT)
+ return -EAGAIN;
+
+ set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+
+ for (;;) {
+ if (signal_pending(current))
+ break;
+ prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ if (sk_wait_event(sk, &timeout, !ctx->more)) {
+ err = 0;
+ break;
+ }
+ }
+ finish_wait(sk_sleep(sk), &wait);
+
+ clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+
+ return err;
+}
+
+static void aead_data_wakeup(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+ struct socket_wq *wq;
+
+ if (ctx->more)
+ return;
+ if (!ctx->used)
+ return;
+
+ rcu_read_lock();
+ wq = rcu_dereference(sk->sk_wq);
+ if (wq_has_sleeper(wq))
+ wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
+ POLLRDNORM |
+ POLLRDBAND);
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ rcu_read_unlock();
+}
+
+static int aead_sendmsg(struct kiocb *unused, struct socket *sock,
+ struct msghdr *msg, size_t size)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+ unsigned ivsize =
+ crypto_aead_ivsize(crypto_aead_reqtfm(&ctx->aead_req));
+ struct aead_sg_list *sgl = &ctx->tsgl;
+ struct af_alg_control con = {};
+ long copied = 0;
+ bool enc = 0;
+ bool init = 0;
+ int err = -EINVAL;
+
+ if (msg->msg_controllen) {
+ err = af_alg_cmsg_send(msg, &con);
+ if (err)
+ return err;
+
+ init = 1;
+ switch (con.op) {
+ case ALG_OP_ENCRYPT:
+ enc = 1;
+ break;
+ case ALG_OP_DECRYPT:
+ enc = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (con.iv && con.iv->ivlen != ivsize)
+ return -EINVAL;
+ }
+
+ lock_sock(sk);
+ if (!ctx->more && ctx->used)
+ goto unlock;
+
+ if (init) {
+ ctx->enc = enc;
+ if (con.iv)
+ memcpy(ctx->iv, con.iv->iv, ivsize);
+
+ ctx->aead_assoclen = con.aead_assoclen;
+ }
+
+ while (size) {
+ unsigned long len = size;
+ struct scatterlist *sg = NULL;
+
+ /* use the existing memory in an allocated page */
+ if (ctx->merge) {
+ sg = sgl->sg + sgl->cur - 1;
+ len = min_t(unsigned long, len,
+ PAGE_SIZE - sg->offset - sg->length);
+ err = memcpy_from_msg(page_address(sg_page(sg)) +
+ sg->offset + sg->length,
+ msg, len);
+ if (err)
+ goto unlock;
+
+ sg->length += len;
+ ctx->merge = (sg->offset + sg->length) &
+ (PAGE_SIZE - 1);
+
+ ctx->used += len;
+ copied += len;
+ size -= len;
+ continue;
+ }
+
+ if (!aead_writable(sk)) {
+ /* user space sent too much data */
+ aead_put_sgl(sk);
+ err = -EMSGSIZE;
+ goto unlock;
+ }
+
+ /* allocate a new page */
+ len = min_t(unsigned long, size, aead_sndbuf(sk));
+ while (len) {
+ int plen = 0;
+
+ if (sgl->cur >= ALG_MAX_PAGES) {
+ aead_put_sgl(sk);
+ err = -E2BIG;
+ goto unlock;
+ }
+
+ sg = sgl->sg + sgl->cur;
+ plen = min_t(int, len, PAGE_SIZE);
+
+ sg_assign_page(sg, alloc_page(GFP_KERNEL));
+ err = -ENOMEM;
+ if (!sg_page(sg))
+ goto unlock;
+
+ err = memcpy_from_msg(page_address(sg_page(sg)),
+ msg, plen);
+ if (err) {
+ __free_page(sg_page(sg));
+ sg_assign_page(sg, NULL);
+ goto unlock;
+ }
+
+ sg->offset = 0;
+ sg->length = plen;
+ len -= plen;
+ ctx->used += plen;
+ copied += plen;
+ sgl->cur++;
+ size -= plen;
+ ctx->merge = plen & (PAGE_SIZE - 1);
+ }
+ }
+
+ err = 0;
+
+ ctx->more = msg->msg_flags & MSG_MORE;
+ if (!ctx->more && !aead_sufficient_data(ctx)) {
+ aead_put_sgl(sk);
+ err = -EMSGSIZE;
+ }
+
+unlock:
+ aead_data_wakeup(sk);
+ release_sock(sk);
+
+ return err ?: copied;
+}
+
+static ssize_t aead_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+ struct aead_sg_list *sgl = &ctx->tsgl;
+ int err = -EINVAL;
+
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ flags |= MSG_MORE;
+
+ if (sgl->cur >= ALG_MAX_PAGES)
+ return -E2BIG;
+
+ lock_sock(sk);
+ if (!ctx->more && ctx->used)
+ goto unlock;
+
+ if (!size)
+ goto done;
+
+ if (!aead_writable(sk)) {
+ /* user space sent too much data */
+ aead_put_sgl(sk);
+ err = -EMSGSIZE;
+ goto unlock;
+ }
+
+ ctx->merge = 0;
+
+ get_page(page);
+ sg_set_page(sgl->sg + sgl->cur, page, size, offset);
+ sgl->cur++;
+ ctx->used += size;
+
+ err = 0;
+
+done:
+ ctx->more = flags & MSG_MORE;
+ if (!ctx->more && !aead_sufficient_data(ctx)) {
+ aead_put_sgl(sk);
+ err = -EMSGSIZE;
+ }
+
+unlock:
+ aead_data_wakeup(sk);
+ release_sock(sk);
+
+ return err ?: size;
+}
+
+static int aead_recvmsg(struct kiocb *unused, struct socket *sock,
+ struct msghdr *msg, size_t ignored, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+ unsigned bs = crypto_aead_blocksize(crypto_aead_reqtfm(&ctx->aead_req));
+ unsigned as = crypto_aead_authsize(crypto_aead_reqtfm(&ctx->aead_req));
+ struct aead_sg_list *sgl = &ctx->tsgl;
+ struct scatterlist *sg = NULL;
+ struct scatterlist assoc[ALG_MAX_PAGES];
+ size_t assoclen = 0;
+ unsigned int i = 0;
+ int err = -EINVAL;
+ unsigned long used = 0;
+ size_t outlen = 0;
+ size_t usedpages = 0;
+ unsigned int cnt = 0;
+
+ /* Limit number of IOV blocks to be accessed below */
+ if (msg->msg_iter.nr_segs > RSGL_MAX_ENTRIES)
+ return -ENOMSG;
+
+ lock_sock(sk);
+
+ /*
+ * AEAD memory structure: For encryption, the tag is appended to the
+ * ciphertext which implies that the memory allocated for the ciphertext
+ * must be increased by the tag length. For decryption, the tag
+ * is expected to be concatenated to the ciphertext. The plaintext
+ * therefore has a memory size of the ciphertext minus the tag length.
+ *
+ * The memory structure for cipher operation has the following
+ * structure:
+ * AEAD encryption input: assoc data || plaintext
+ * AEAD encryption output: cipherntext || auth tag
+ * AEAD decryption input: assoc data || ciphertext || auth tag
+ * AEAD decryption output: plaintext
+ */
+
+ if (ctx->more) {
+ err = aead_wait_for_data(sk, flags);
+ if (err)
+ goto unlock;
+ }
+
+ used = ctx->used;
+
+ /*
+ * Make sure sufficient data is present -- note, the same check is
+ * is also present in sendmsg/sendpage. The checks in sendpage/sendmsg
+ * shall provide an information to the data sender that something is
+ * wrong, but they are irrelevant to maintain the kernel integrity.
+ * We need this check here too in case user space decides to not honor
+ * the error message in sendmsg/sendpage and still call recvmsg. This
+ * check here protects the kernel integrity.
+ */
+ if (!aead_sufficient_data(ctx))
+ goto unlock;
+
+ /*
+ * The cipher operation input data is reduced by the associated data
+ * length as this data is processed separately later on.
+ */
+ used -= ctx->aead_assoclen;
+
+ if (ctx->enc) {
+ /* round up output buffer to multiple of block size */
+ outlen = ((used + bs - 1) / bs * bs);
+ /* add the size needed for the auth tag to be created */
+ outlen += as;
+ } else {
+ /* output data size is input without the authentication tag */
+ outlen = used - as;
+ /* round up output buffer to multiple of block size */
+ outlen = ((outlen + bs - 1) / bs * bs);
+ }
+
+ /* convert iovecs of output buffers into scatterlists */
+ while (iov_iter_count(&msg->msg_iter)) {
+ size_t seglen = min_t(size_t, iov_iter_count(&msg->msg_iter),
+ (outlen - usedpages));
+
+ /* make one iovec available as scatterlist */
+ err = af_alg_make_sg(&ctx->rsgl[cnt], &msg->msg_iter,
+ seglen);
+ if (err < 0)
+ goto unlock;
+ usedpages += err;
+ /* chain the new scatterlist with initial list */
+ if (cnt)
+ scatterwalk_crypto_chain(ctx->rsgl[0].sg,
+ ctx->rsgl[cnt].sg, 1,
+ sg_nents(ctx->rsgl[cnt-1].sg));
+ /* we do not need more iovecs as we have sufficient memory */
+ if (outlen <= usedpages)
+ break;
+ iov_iter_advance(&msg->msg_iter, err);
+ cnt++;
+ }
+
+ err = -EINVAL;
+ /* ensure output buffer is sufficiently large */
+ if (usedpages < outlen)
+ goto unlock;
+
+ sg_init_table(assoc, ALG_MAX_PAGES);
+ assoclen = ctx->aead_assoclen;
+ /*
+ * Split scatterlist into two: first part becomes AD, second part
+ * is plaintext / ciphertext. The first part is assigned to assoc
+ * scatterlist. When this loop finishes, sg points to the start of the
+ * plaintext / ciphertext.
+ */
+ for (i = 0; i < ctx->tsgl.cur; i++) {
+ sg = sgl->sg + i;
+ if (sg->length <= assoclen) {
+ /* AD is larger than one page */
+ sg_set_page(assoc + i, sg_page(sg),
+ sg->length, sg->offset);
+ assoclen -= sg->length;
+ if (i >= ctx->tsgl.cur)
+ goto unlock;
+ } else if (!assoclen) {
+ /* current page is to start of plaintext / ciphertext */
+ if (i)
+ /* AD terminates at page boundary */
+ sg_mark_end(assoc + i - 1);
+ else
+ /* AD size is zero */
+ sg_mark_end(assoc);
+ break;
+ } else {
+ /* AD does not terminate at page boundary */
+ sg_set_page(assoc + i, sg_page(sg),
+ assoclen, sg->offset);
+ sg_mark_end(assoc + i);
+ /* plaintext / ciphertext starts after AD */
+ sg->length -= assoclen;
+ sg->offset += assoclen;
+ break;
+ }
+ }
+
+ aead_request_set_assoc(&ctx->aead_req, assoc, ctx->aead_assoclen);
+ aead_request_set_crypt(&ctx->aead_req, sg, ctx->rsgl[0].sg, used,
+ ctx->iv);
+
+ err = af_alg_wait_for_completion(ctx->enc ?
+ crypto_aead_encrypt(&ctx->aead_req) :
+ crypto_aead_decrypt(&ctx->aead_req),
+ &ctx->completion);
+
+ if (err) {
+ /* EBADMSG implies a valid cipher operation took place */
+ if (err == -EBADMSG)
+ aead_put_sgl(sk);
+ goto unlock;
+ }
+
+ aead_put_sgl(sk);
+
+ err = 0;
+
+unlock:
+ for (i = 0; i < cnt; i++)
+ af_alg_free_sg(&ctx->rsgl[i]);
+
+ aead_wmem_wakeup(sk);
+ release_sock(sk);
+
+ return err ? err : outlen;
+}
+
+static unsigned int aead_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ struct sock *sk = sock->sk;
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+ unsigned int mask;
+
+ sock_poll_wait(file, sk_sleep(sk), wait);
+ mask = 0;
+
+ if (!ctx->more)
+ mask |= POLLIN | POLLRDNORM;
+
+ if (aead_writable(sk))
+ mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+ return mask;
+}
+
+static struct proto_ops algif_aead_ops = {
+ .family = PF_ALG,
+
+ .connect = sock_no_connect,
+ .socketpair = sock_no_socketpair,
+ .getname = sock_no_getname,
+ .ioctl = sock_no_ioctl,
+ .listen = sock_no_listen,
+ .shutdown = sock_no_shutdown,
+ .getsockopt = sock_no_getsockopt,
+ .mmap = sock_no_mmap,
+ .bind = sock_no_bind,
+ .accept = sock_no_accept,
+ .setsockopt = sock_no_setsockopt,
+
+ .release = af_alg_release,
+ .sendmsg = aead_sendmsg,
+ .sendpage = aead_sendpage,
+ .recvmsg = aead_recvmsg,
+ .poll = aead_poll,
+};
+
+static void *aead_bind(const char *name, u32 type, u32 mask)
+{
+ return crypto_alloc_aead(name, type, mask);
+}
+
+static void aead_release(void *private)
+{
+ crypto_free_aead(private);
+}
+
+static int aead_setauthsize(void *private, unsigned int authsize)
+{
+ return crypto_aead_setauthsize(private, authsize);
+}
+
+static int aead_setkey(void *private, const u8 *key, unsigned int keylen)
+{
+ return crypto_aead_setkey(private, key, keylen);
+}
+
+static void aead_sock_destruct(struct sock *sk)
+{
+ struct alg_sock *ask = alg_sk(sk);
+ struct aead_ctx *ctx = ask->private;
+ unsigned int ivlen = crypto_aead_ivsize(
+ crypto_aead_reqtfm(&ctx->aead_req));
+
+ aead_put_sgl(sk);
+ sock_kzfree_s(sk, ctx->iv, ivlen);
+ sock_kfree_s(sk, ctx, ctx->len);
+ af_alg_release_parent(sk);
+}
+
+static int aead_accept_parent(void *private, struct sock *sk)
+{
+ struct aead_ctx *ctx;
+ struct alg_sock *ask = alg_sk(sk);
+ unsigned int len = sizeof(*ctx) + crypto_aead_reqsize(private);
+ unsigned int ivlen = crypto_aead_ivsize(private);
+
+ ctx = sock_kmalloc(sk, len, GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ memset(ctx, 0, len);
+
+ ctx->iv = sock_kmalloc(sk, ivlen, GFP_KERNEL);
+ if (!ctx->iv) {
+ sock_kfree_s(sk, ctx, len);
+ return -ENOMEM;
+ }
+ memset(ctx->iv, 0, ivlen);
+
+ ctx->len = len;
+ ctx->used = 0;
+ ctx->more = 0;
+ ctx->merge = 0;
+ ctx->enc = 0;
+ ctx->tsgl.cur = 0;
+ ctx->aead_assoclen = 0;
+ af_alg_init_completion(&ctx->completion);
+ sg_init_table(ctx->tsgl.sg, ALG_MAX_PAGES);
+
+ ask->private = ctx;
+
+ aead_request_set_tfm(&ctx->aead_req, private);
+ aead_request_set_callback(&ctx->aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ af_alg_complete, &ctx->completion);
+
+ sk->sk_destruct = aead_sock_destruct;
+
+ return 0;
+}
+
+static const struct af_alg_type algif_type_aead = {
+ .bind = aead_bind,
+ .release = aead_release,
+ .setkey = aead_setkey,
+ .setauthsize = aead_setauthsize,
+ .accept = aead_accept_parent,
+ .ops = &algif_aead_ops,
+ .name = "aead",
+ .owner = THIS_MODULE
+};
+
+static int __init algif_aead_init(void)
+{
+ return af_alg_register_type(&algif_type_aead);
+}
+
+static void __exit algif_aead_exit(void)
+{
+ int err = af_alg_unregister_type(&algif_type_aead);
+ BUG_ON(err);
+}
+
+module_init(algif_aead_init);
+module_exit(algif_aead_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Stephan Mueller <smueller@chronox.de>");
+MODULE_DESCRIPTION("AEAD kernel crypto API user space interface");
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 3acba0a7cd55..8109aaad2726 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -87,7 +87,7 @@ static int rng_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
return genlen;
err = memcpy_to_msg(msg, result, len);
- memzero_explicit(result, genlen);
+ memzero_explicit(result, len);
return err ? err : len;
}
diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c
index 6f5bebc9bf01..765fe7609348 100644
--- a/crypto/ansi_cprng.c
+++ b/crypto/ansi_cprng.c
@@ -210,7 +210,11 @@ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx,
byte_count = DEFAULT_BLK_SZ;
}
- err = byte_count;
+ /*
+ * Return 0 in case of success as mandated by the kernel
+ * crypto API interface definition.
+ */
+ err = 0;
dbgprint(KERN_CRIT "getting %d random bytes for context %p\n",
byte_count, ctx);
diff --git a/crypto/api.c b/crypto/api.c
index 2a81e98a0021..afe4610afc4b 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -257,6 +257,16 @@ struct crypto_alg *crypto_alg_mod_lookup(const char *name, u32 type, u32 mask)
mask |= CRYPTO_ALG_TESTED;
}
+ /*
+ * If the internal flag is set for a cipher, require a caller to
+ * to invoke the cipher with the internal flag to use that cipher.
+ * Also, if a caller wants to allocate a cipher that may or may
+ * not be an internal cipher, use type | CRYPTO_ALG_INTERNAL and
+ * !(mask & CRYPTO_ALG_INTERNAL).
+ */
+ if (!((type | mask) & CRYPTO_ALG_INTERNAL))
+ mask |= CRYPTO_ALG_INTERNAL;
+
larval = crypto_larval_lookup(name, type, mask);
if (IS_ERR(larval) || !crypto_is_larval(larval))
return larval;
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 650afac10fd7..b0602ba03111 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -168,6 +168,20 @@ static inline struct cryptd_queue *cryptd_get_queue(struct crypto_tfm *tfm)
return ictx->queue;
}
+static inline void cryptd_check_internal(struct rtattr **tb, u32 *type,
+ u32 *mask)
+{
+ struct crypto_attr_type *algt;
+
+ algt = crypto_get_attr_type(tb);
+ if (IS_ERR(algt))
+ return;
+ if ((algt->type & CRYPTO_ALG_INTERNAL))
+ *type |= CRYPTO_ALG_INTERNAL;
+ if ((algt->mask & CRYPTO_ALG_INTERNAL))
+ *mask |= CRYPTO_ALG_INTERNAL;
+}
+
static int cryptd_blkcipher_setkey(struct crypto_ablkcipher *parent,
const u8 *key, unsigned int keylen)
{
@@ -321,10 +335,13 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
struct cryptd_instance_ctx *ctx;
struct crypto_instance *inst;
struct crypto_alg *alg;
+ u32 type = CRYPTO_ALG_TYPE_BLKCIPHER;
+ u32 mask = CRYPTO_ALG_TYPE_MASK;
int err;
- alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
- CRYPTO_ALG_TYPE_MASK);
+ cryptd_check_internal(tb, &type, &mask);
+
+ alg = crypto_get_attr_alg(tb, type, mask);
if (IS_ERR(alg))
return PTR_ERR(alg);
@@ -341,7 +358,10 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
if (err)
goto out_free_inst;
- inst->alg.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
+ type = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC;
+ if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
+ type |= CRYPTO_ALG_INTERNAL;
+ inst->alg.cra_flags = type;
inst->alg.cra_type = &crypto_ablkcipher_type;
inst->alg.cra_ablkcipher.ivsize = alg->cra_blkcipher.ivsize;
@@ -577,9 +597,13 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
struct ahash_instance *inst;
struct shash_alg *salg;
struct crypto_alg *alg;
+ u32 type = 0;
+ u32 mask = 0;
int err;
- salg = shash_attr_alg(tb[1], 0, 0);
+ cryptd_check_internal(tb, &type, &mask);
+
+ salg = shash_attr_alg(tb[1], type, mask);
if (IS_ERR(salg))
return PTR_ERR(salg);
@@ -598,7 +622,10 @@ static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
if (err)
goto out_free_inst;
- inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+ type = CRYPTO_ALG_ASYNC;
+ if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
+ type |= CRYPTO_ALG_INTERNAL;
+ inst->alg.halg.base.cra_flags = type;
inst->alg.halg.digestsize = salg->digestsize;
inst->alg.halg.base.cra_ctxsize = sizeof(struct cryptd_hash_ctx);
@@ -719,10 +746,13 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
struct aead_instance_ctx *ctx;
struct crypto_instance *inst;
struct crypto_alg *alg;
+ u32 type = CRYPTO_ALG_TYPE_AEAD;
+ u32 mask = CRYPTO_ALG_TYPE_MASK;
int err;
- alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_AEAD,
- CRYPTO_ALG_TYPE_MASK);
+ cryptd_check_internal(tb, &type, &mask);
+
+ alg = crypto_get_attr_alg(tb, type, mask);
if (IS_ERR(alg))
return PTR_ERR(alg);
@@ -739,7 +769,10 @@ static int cryptd_create_aead(struct crypto_template *tmpl,
if (err)
goto out_free_inst;
- inst->alg.cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+ type = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC;
+ if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
+ type |= CRYPTO_ALG_INTERNAL;
+ inst->alg.cra_flags = type;
inst->alg.cra_type = alg->cra_type;
inst->alg.cra_ctxsize = sizeof(struct cryptd_aead_ctx);
inst->alg.cra_init = cryptd_aead_init_tfm;
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index c5148a35ae0a..41dfe762b7fb 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -62,10 +62,14 @@ static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
else if (!exact)
match = !strcmp(q->cra_name, p->cru_name);
- if (match) {
- alg = q;
- break;
- }
+ if (!match)
+ continue;
+
+ if (unlikely(!crypto_mod_get(q)))
+ continue;
+
+ alg = q;
+ break;
}
up_read(&crypto_alg_sem);
@@ -205,9 +209,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
if (!alg)
return -ENOENT;
+ err = -ENOMEM;
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!skb)
- return -ENOMEM;
+ goto drop_alg;
info.in_skb = in_skb;
info.out_skb = skb;
@@ -215,6 +220,10 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
info.nlmsg_flags = 0;
err = crypto_report_alg(alg, &info);
+
+drop_alg:
+ crypto_mod_put(alg);
+
if (err)
return err;
@@ -284,6 +293,7 @@ static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
up_write(&crypto_alg_sem);
+ crypto_mod_put(alg);
crypto_remove_final(&list);
return 0;
@@ -294,6 +304,7 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct crypto_alg *alg;
struct crypto_user_alg *p = nlmsg_data(nlh);
+ int err;
if (!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
@@ -310,13 +321,19 @@ static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
* if we try to unregister. Unregistering such an algorithm without
* removing the module is not possible, so we restrict to crypto
* instances that are build from templates. */
+ err = -EINVAL;
if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
- return -EINVAL;
+ goto drop_alg;
- if (atomic_read(&alg->cra_refcnt) != 1)
- return -EBUSY;
+ err = -EBUSY;
+ if (atomic_read(&alg->cra_refcnt) > 2)
+ goto drop_alg;
- return crypto_unregister_instance(alg);
+ err = crypto_unregister_instance((struct crypto_instance *)alg);
+
+drop_alg:
+ crypto_mod_put(alg);
+ return err;
}
static struct crypto_alg *crypto_user_skcipher_alg(const char *name, u32 type,
@@ -395,8 +412,10 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
alg = crypto_alg_match(p, exact);
- if (alg)
+ if (alg) {
+ crypto_mod_put(alg);
return -EEXIST;
+ }
if (strlen(p->cru_driver_name))
name = p->cru_driver_name;
diff --git a/crypto/drbg.c b/crypto/drbg.c
index d8ff16e5c322..b69409cb7e6a 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -119,19 +119,19 @@ static const struct drbg_core drbg_cores[] = {
.statelen = 32, /* 256 bits as defined in 10.2.1 */
.blocklen_bytes = 16,
.cra_name = "ctr_aes128",
- .backend_cra_name = "ecb(aes)",
+ .backend_cra_name = "aes",
}, {
.flags = DRBG_CTR | DRBG_STRENGTH192,
.statelen = 40, /* 320 bits as defined in 10.2.1 */
.blocklen_bytes = 16,
.cra_name = "ctr_aes192",
- .backend_cra_name = "ecb(aes)",
+ .backend_cra_name = "aes",
}, {
.flags = DRBG_CTR | DRBG_STRENGTH256,
.statelen = 48, /* 384 bits as defined in 10.2.1 */
.blocklen_bytes = 16,
.cra_name = "ctr_aes256",
- .backend_cra_name = "ecb(aes)",
+ .backend_cra_name = "aes",
},
#endif /* CONFIG_CRYPTO_DRBG_CTR */
#ifdef CONFIG_CRYPTO_DRBG_HASH
@@ -308,9 +308,6 @@ static int drbg_ctr_bcc(struct drbg_state *drbg,
drbg_string_fill(&data, out, drbg_blocklen(drbg));
- /* 10.4.3 step 1 */
- memset(out, 0, drbg_blocklen(drbg));
-
/* 10.4.3 step 2 / 4 */
list_for_each_entry(curr, in, list) {
const unsigned char *pos = curr->buf;
@@ -406,7 +403,6 @@ static int drbg_ctr_df(struct drbg_state *drbg,
memset(pad, 0, drbg_blocklen(drbg));
memset(iv, 0, drbg_blocklen(drbg));
- memset(temp, 0, drbg_statelen(drbg));
/* 10.4.2 step 1 is implicit as we work byte-wise */
@@ -523,7 +519,6 @@ static int drbg_ctr_update(struct drbg_state *drbg, struct list_head *seed,
unsigned int len = 0;
struct drbg_string cipherin;
- memset(temp, 0, drbg_statelen(drbg) + drbg_blocklen(drbg));
if (3 > reseed)
memset(df_data, 0, drbg_statelen(drbg));
@@ -585,8 +580,6 @@ static int drbg_ctr_generate(struct drbg_state *drbg,
int ret = 0;
struct drbg_string data;
- memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
-
/* 10.2.1.5.2 step 2 */
if (addtl && !list_empty(addtl)) {
ret = drbg_ctr_update(drbg, addtl, 2);
@@ -761,7 +754,6 @@ static struct drbg_state_ops drbg_hmac_ops = {
.generate = drbg_hmac_generate,
.crypto_init = drbg_init_hash_kernel,
.crypto_fini = drbg_fini_hash_kernel,
-
};
#endif /* CONFIG_CRYPTO_DRBG_HMAC */
@@ -838,8 +830,6 @@ static int drbg_hash_df(struct drbg_state *drbg,
unsigned char *tmp = drbg->scratchpad + drbg_statelen(drbg);
struct drbg_string data;
- memset(tmp, 0, drbg_blocklen(drbg));
-
/* 10.4.1 step 3 */
input[0] = 1;
drbg_cpu_to_be32((outlen * 8), &input[1]);
@@ -879,7 +869,6 @@ static int drbg_hash_update(struct drbg_state *drbg, struct list_head *seed,
unsigned char *V = drbg->scratchpad;
unsigned char prefix = DRBG_PREFIX1;
- memset(drbg->scratchpad, 0, drbg_statelen(drbg));
if (!seed)
return -EINVAL;
@@ -921,9 +910,6 @@ static int drbg_hash_process_addtl(struct drbg_state *drbg,
LIST_HEAD(datalist);
unsigned char prefix = DRBG_PREFIX2;
- /* this is value w as per documentation */
- memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
-
/* 10.1.1.4 step 2 */
if (!addtl || list_empty(addtl))
return 0;
@@ -959,9 +945,6 @@ static int drbg_hash_hashgen(struct drbg_state *drbg,
struct drbg_string data;
LIST_HEAD(datalist);
- memset(src, 0, drbg_statelen(drbg));
- memset(dst, 0, drbg_blocklen(drbg));
-
/* 10.1.1.4 step hashgen 2 */
memcpy(src, drbg->V, drbg_statelen(drbg));
@@ -1018,7 +1001,6 @@ static int drbg_hash_generate(struct drbg_state *drbg,
len = drbg_hash_hashgen(drbg, buf, buflen);
/* this is the value H as documented in 10.1.1.4 */
- memset(drbg->scratchpad, 0, drbg_blocklen(drbg));
/* 10.1.1.4 step 4 */
drbg_string_fill(&data1, &prefix, 1);
list_add_tail(&data1.list, &datalist);
@@ -1298,7 +1280,7 @@ static void drbg_restore_shadow(struct drbg_state *drbg,
* as defined in SP800-90A. The additional input is mixed into
* the state in addition to the pulled entropy.
*
- * return: generated number of bytes
+ * return: 0 when all bytes are generated; < 0 in case of an error
*/
static int drbg_generate(struct drbg_state *drbg,
unsigned char *buf, unsigned int buflen,
@@ -1437,6 +1419,11 @@ static int drbg_generate(struct drbg_state *drbg,
}
#endif
+ /*
+ * All operations were successful, return 0 as mandated by
+ * the kernel crypto API interface.
+ */
+ len = 0;
err:
shadow->d_ops->crypto_fini(shadow);
drbg_restore_shadow(drbg, &shadow);
@@ -1644,24 +1631,24 @@ static int drbg_kcapi_hash(struct drbg_state *drbg, const unsigned char *key,
static int drbg_init_sym_kernel(struct drbg_state *drbg)
{
int ret = 0;
- struct crypto_blkcipher *tfm;
+ struct crypto_cipher *tfm;
- tfm = crypto_alloc_blkcipher(drbg->core->backend_cra_name, 0, 0);
+ tfm = crypto_alloc_cipher(drbg->core->backend_cra_name, 0, 0);
if (IS_ERR(tfm)) {
pr_info("DRBG: could not allocate cipher TFM handle\n");
return PTR_ERR(tfm);
}
- BUG_ON(drbg_blocklen(drbg) != crypto_blkcipher_blocksize(tfm));
+ BUG_ON(drbg_blocklen(drbg) != crypto_cipher_blocksize(tfm));
drbg->priv_data = tfm;
return ret;
}
static int drbg_fini_sym_kernel(struct drbg_state *drbg)
{
- struct crypto_blkcipher *tfm =
- (struct crypto_blkcipher *)drbg->priv_data;
+ struct crypto_cipher *tfm =
+ (struct crypto_cipher *)drbg->priv_data;
if (tfm)
- crypto_free_blkcipher(tfm);
+ crypto_free_cipher(tfm);
drbg->priv_data = NULL;
return 0;
}
@@ -1669,21 +1656,14 @@ static int drbg_fini_sym_kernel(struct drbg_state *drbg)
static int drbg_kcapi_sym(struct drbg_state *drbg, const unsigned char *key,
unsigned char *outval, const struct drbg_string *in)
{
- int ret = 0;
- struct scatterlist sg_in, sg_out;
- struct blkcipher_desc desc;
- struct crypto_blkcipher *tfm =
- (struct crypto_blkcipher *)drbg->priv_data;
-
- desc.tfm = tfm;
- desc.flags = 0;
- crypto_blkcipher_setkey(tfm, key, (drbg_keylen(drbg)));
- /* there is only component in *in */
- sg_init_one(&sg_in, in->buf, in->len);
- sg_init_one(&sg_out, outval, drbg_blocklen(drbg));
- ret = crypto_blkcipher_encrypt(&desc, &sg_out, &sg_in, in->len);
+ struct crypto_cipher *tfm =
+ (struct crypto_cipher *)drbg->priv_data;
- return ret;
+ crypto_cipher_setkey(tfm, key, (drbg_keylen(drbg)));
+ /* there is only component in *in */
+ BUG_ON(in->len < drbg_blocklen(drbg));
+ crypto_cipher_encrypt_one(tfm, outval, in->buf);
+ return 0;
}
#endif /* CONFIG_CRYPTO_DRBG_CTR */
diff --git a/crypto/mcryptd.c b/crypto/mcryptd.c
index a8e870444ea9..fe5b495a434d 100644
--- a/crypto/mcryptd.c
+++ b/crypto/mcryptd.c
@@ -258,6 +258,20 @@ out_free_inst:
goto out;
}
+static inline void mcryptd_check_internal(struct rtattr **tb, u32 *type,
+ u32 *mask)
+{
+ struct crypto_attr_type *algt;
+
+ algt = crypto_get_attr_type(tb);
+ if (IS_ERR(algt))
+ return;
+ if ((algt->type & CRYPTO_ALG_INTERNAL))
+ *type |= CRYPTO_ALG_INTERNAL;
+ if ((algt->mask & CRYPTO_ALG_INTERNAL))
+ *mask |= CRYPTO_ALG_INTERNAL;
+}
+
static int mcryptd_hash_init_tfm(struct crypto_tfm *tfm)
{
struct crypto_instance *inst = crypto_tfm_alg_instance(tfm);
@@ -480,9 +494,13 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
struct ahash_instance *inst;
struct shash_alg *salg;
struct crypto_alg *alg;
+ u32 type = 0;
+ u32 mask = 0;
int err;
- salg = shash_attr_alg(tb[1], 0, 0);
+ mcryptd_check_internal(tb, &type, &mask);
+
+ salg = shash_attr_alg(tb[1], type, mask);
if (IS_ERR(salg))
return PTR_ERR(salg);
@@ -502,7 +520,10 @@ static int mcryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
if (err)
goto out_free_inst;
- inst->alg.halg.base.cra_flags = CRYPTO_ALG_ASYNC;
+ type = CRYPTO_ALG_ASYNC;
+ if (alg->cra_flags & CRYPTO_ALG_INTERNAL)
+ type |= CRYPTO_ALG_INTERNAL;
+ inst->alg.halg.base.cra_flags = type;
inst->alg.halg.digestsize = salg->digestsize;
inst->alg.halg.base.cra_ctxsize = sizeof(struct mcryptd_hash_ctx);
diff --git a/crypto/proc.c b/crypto/proc.c
index 4a0a7aad2204..4ffe73b51612 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -89,6 +89,9 @@ static int c_show(struct seq_file *m, void *p)
seq_printf(m, "selftest : %s\n",
(alg->cra_flags & CRYPTO_ALG_TESTED) ?
"passed" : "unknown");
+ seq_printf(m, "internal : %s\n",
+ (alg->cra_flags & CRYPTO_ALG_INTERNAL) ?
+ "yes" : "no");
if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
seq_printf(m, "type : larval\n");
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index a3e50c37eb6f..39e3acc438d9 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -23,111 +23,49 @@
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
+#include <crypto/sha1_base.h>
#include <asm/byteorder.h>
-static int sha1_init(struct shash_desc *desc)
+static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src,
+ int blocks)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
+ u32 temp[SHA_WORKSPACE_WORDS];
- *sctx = (struct sha1_state){
- .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
- };
-
- return 0;
+ while (blocks--) {
+ sha_transform(sst->state, src, temp);
+ src += SHA1_BLOCK_SIZE;
+ }
+ memzero_explicit(temp, sizeof(temp));
}
int crypto_sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
+ unsigned int len)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial, done;
- const u8 *src;
-
- partial = sctx->count % SHA1_BLOCK_SIZE;
- sctx->count += len;
- done = 0;
- src = data;
-
- if ((partial + len) >= SHA1_BLOCK_SIZE) {
- u32 temp[SHA_WORKSPACE_WORDS];
-
- if (partial) {
- done = -partial;
- memcpy(sctx->buffer + partial, data,
- done + SHA1_BLOCK_SIZE);
- src = sctx->buffer;
- }
-
- do {
- sha_transform(sctx->state, src, temp);
- done += SHA1_BLOCK_SIZE;
- src = data + done;
- } while (done + SHA1_BLOCK_SIZE <= len);
-
- memzero_explicit(temp, sizeof(temp));
- partial = 0;
- }
- memcpy(sctx->buffer + partial, src, len - done);
-
- return 0;
+ return sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
}
EXPORT_SYMBOL(crypto_sha1_update);
-
-/* Add padding and return the message digest. */
static int sha1_final(struct shash_desc *desc, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- u32 i, index, padlen;
- __be64 bits;
- static const u8 padding[64] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 */
- index = sctx->count & 0x3f;
- padlen = (index < 56) ? (56 - index) : ((64+56) - index);
- crypto_sha1_update(desc, padding, padlen);
-
- /* Append length */
- crypto_sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
- /* Store state in digest */
- for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof *sctx);
-
- return 0;
+ sha1_base_do_finalize(desc, sha1_generic_block_fn);
+ return sha1_base_finish(desc, out);
}
-static int sha1_export(struct shash_desc *desc, void *out)
+int crypto_sha1_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
-}
-
-static int sha1_import(struct shash_desc *desc, const void *in)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
+ sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
+ return sha1_final(desc, out);
}
+EXPORT_SYMBOL(crypto_sha1_finup);
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
- .init = sha1_init,
+ .init = sha1_base_init,
.update = crypto_sha1_update,
.final = sha1_final,
- .export = sha1_export,
- .import = sha1_import,
+ .finup = crypto_sha1_finup,
.descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-generic",
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index b001ff5c2efc..78431163ed3c 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -23,6 +23,7 @@
#include <linux/mm.h>
#include <linux/types.h>
#include <crypto/sha.h>
+#include <crypto/sha256_base.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
@@ -214,138 +215,43 @@ static void sha256_transform(u32 *state, const u8 *input)
memzero_explicit(W, 64 * sizeof(u32));
}
-static int sha224_init(struct shash_desc *desc)
+static void sha256_generic_block_fn(struct sha256_state *sst, u8 const *src,
+ int blocks)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- sctx->state[0] = SHA224_H0;
- sctx->state[1] = SHA224_H1;
- sctx->state[2] = SHA224_H2;
- sctx->state[3] = SHA224_H3;
- sctx->state[4] = SHA224_H4;
- sctx->state[5] = SHA224_H5;
- sctx->state[6] = SHA224_H6;
- sctx->state[7] = SHA224_H7;
- sctx->count = 0;
-
- return 0;
-}
-
-static int sha256_init(struct shash_desc *desc)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- sctx->state[0] = SHA256_H0;
- sctx->state[1] = SHA256_H1;
- sctx->state[2] = SHA256_H2;
- sctx->state[3] = SHA256_H3;
- sctx->state[4] = SHA256_H4;
- sctx->state[5] = SHA256_H5;
- sctx->state[6] = SHA256_H6;
- sctx->state[7] = SHA256_H7;
- sctx->count = 0;
-
- return 0;
+ while (blocks--) {
+ sha256_transform(sst->state, src);
+ src += SHA256_BLOCK_SIZE;
+ }
}
int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- unsigned int partial, done;
- const u8 *src;
-
- partial = sctx->count & 0x3f;
- sctx->count += len;
- done = 0;
- src = data;
-
- if ((partial + len) > 63) {
- if (partial) {
- done = -partial;
- memcpy(sctx->buf + partial, data, done + 64);
- src = sctx->buf;
- }
-
- do {
- sha256_transform(sctx->state, src);
- done += 64;
- src = data + done;
- } while (done + 63 < len);
-
- partial = 0;
- }
- memcpy(sctx->buf + partial, src, len - done);
-
- return 0;
+ return sha256_base_do_update(desc, data, len, sha256_generic_block_fn);
}
EXPORT_SYMBOL(crypto_sha256_update);
static int sha256_final(struct shash_desc *desc, u8 *out)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- __be64 bits;
- unsigned int index, pad_len;
- int i;
- static const u8 padding[64] = { 0x80, };
-
- /* Save number of bits */
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64. */
- index = sctx->count & 0x3f;
- pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
- crypto_sha256_update(desc, padding, pad_len);
-
- /* Append length (before padding) */
- crypto_sha256_update(desc, (const u8 *)&bits, sizeof(bits));
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Zeroize sensitive information. */
- memset(sctx, 0, sizeof(*sctx));
-
- return 0;
+ sha256_base_do_finalize(desc, sha256_generic_block_fn);
+ return sha256_base_finish(desc, out);
}
-static int sha224_final(struct shash_desc *desc, u8 *hash)
+int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *hash)
{
- u8 D[SHA256_DIGEST_SIZE];
-
- sha256_final(desc, D);
-
- memcpy(hash, D, SHA224_DIGEST_SIZE);
- memzero_explicit(D, SHA256_DIGEST_SIZE);
-
- return 0;
-}
-
-static int sha256_export(struct shash_desc *desc, void *out)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
-}
-
-static int sha256_import(struct shash_desc *desc, const void *in)
-{
- struct sha256_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
+ sha256_base_do_update(desc, data, len, sha256_generic_block_fn);
+ return sha256_final(desc, hash);
}
+EXPORT_SYMBOL(crypto_sha256_finup);
static struct shash_alg sha256_algs[2] = { {
.digestsize = SHA256_DIGEST_SIZE,
- .init = sha256_init,
+ .init = sha256_base_init,
.update = crypto_sha256_update,
.final = sha256_final,
- .export = sha256_export,
- .import = sha256_import,
+ .finup = crypto_sha256_finup,
.descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha256",
.cra_driver_name= "sha256-generic",
@@ -355,9 +261,10 @@ static struct shash_alg sha256_algs[2] = { {
}
}, {
.digestsize = SHA224_DIGEST_SIZE,
- .init = sha224_init,
+ .init = sha224_base_init,
.update = crypto_sha256_update,
- .final = sha224_final,
+ .final = sha256_final,
+ .finup = crypto_sha256_finup,
.descsize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 1c3c3767e079..eba965d18bfc 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -18,6 +18,7 @@
#include <linux/crypto.h>
#include <linux/types.h>
#include <crypto/sha.h>
+#include <crypto/sha512_base.h>
#include <linux/percpu.h>
#include <asm/byteorder.h>
#include <asm/unaligned.h>
@@ -130,125 +131,42 @@ sha512_transform(u64 *state, const u8 *input)
a = b = c = d = e = f = g = h = t1 = t2 = 0;
}
-static int
-sha512_init(struct shash_desc *desc)
+static void sha512_generic_block_fn(struct sha512_state *sst, u8 const *src,
+ int blocks)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- sctx->state[0] = SHA512_H0;
- sctx->state[1] = SHA512_H1;
- sctx->state[2] = SHA512_H2;
- sctx->state[3] = SHA512_H3;
- sctx->state[4] = SHA512_H4;
- sctx->state[5] = SHA512_H5;
- sctx->state[6] = SHA512_H6;
- sctx->state[7] = SHA512_H7;
- sctx->count[0] = sctx->count[1] = 0;
-
- return 0;
-}
-
-static int
-sha384_init(struct shash_desc *desc)
-{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- sctx->state[0] = SHA384_H0;
- sctx->state[1] = SHA384_H1;
- sctx->state[2] = SHA384_H2;
- sctx->state[3] = SHA384_H3;
- sctx->state[4] = SHA384_H4;
- sctx->state[5] = SHA384_H5;
- sctx->state[6] = SHA384_H6;
- sctx->state[7] = SHA384_H7;
- sctx->count[0] = sctx->count[1] = 0;
-
- return 0;
+ while (blocks--) {
+ sha512_transform(sst->state, src);
+ src += SHA512_BLOCK_SIZE;
+ }
}
int crypto_sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
-
- unsigned int i, index, part_len;
-
- /* Compute number of bytes mod 128 */
- index = sctx->count[0] & 0x7f;
-
- /* Update number of bytes */
- if ((sctx->count[0] += len) < len)
- sctx->count[1]++;
-
- part_len = 128 - index;
-
- /* Transform as many times as possible. */
- if (len >= part_len) {
- memcpy(&sctx->buf[index], data, part_len);
- sha512_transform(sctx->state, sctx->buf);
-
- for (i = part_len; i + 127 < len; i+=128)
- sha512_transform(sctx->state, &data[i]);
-
- index = 0;
- } else {
- i = 0;
- }
-
- /* Buffer remaining input */
- memcpy(&sctx->buf[index], &data[i], len - i);
-
- return 0;
+ return sha512_base_do_update(desc, data, len, sha512_generic_block_fn);
}
EXPORT_SYMBOL(crypto_sha512_update);
-static int
-sha512_final(struct shash_desc *desc, u8 *hash)
+static int sha512_final(struct shash_desc *desc, u8 *hash)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
- static u8 padding[128] = { 0x80, };
- __be64 *dst = (__be64 *)hash;
- __be64 bits[2];
- unsigned int index, pad_len;
- int i;
-
- /* Save number of bits */
- bits[1] = cpu_to_be64(sctx->count[0] << 3);
- bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
-
- /* Pad out to 112 mod 128. */
- index = sctx->count[0] & 0x7f;
- pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
- crypto_sha512_update(desc, padding, pad_len);
-
- /* Append length (before padding) */
- crypto_sha512_update(desc, (const u8 *)bits, sizeof(bits));
-
- /* Store state in digest */
- for (i = 0; i < 8; i++)
- dst[i] = cpu_to_be64(sctx->state[i]);
-
- /* Zeroize sensitive information. */
- memset(sctx, 0, sizeof(struct sha512_state));
-
- return 0;
+ sha512_base_do_finalize(desc, sha512_generic_block_fn);
+ return sha512_base_finish(desc, hash);
}
-static int sha384_final(struct shash_desc *desc, u8 *hash)
+int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *hash)
{
- u8 D[64];
-
- sha512_final(desc, D);
-
- memcpy(hash, D, 48);
- memzero_explicit(D, 64);
-
- return 0;
+ sha512_base_do_update(desc, data, len, sha512_generic_block_fn);
+ return sha512_final(desc, hash);
}
+EXPORT_SYMBOL(crypto_sha512_finup);
static struct shash_alg sha512_algs[2] = { {
.digestsize = SHA512_DIGEST_SIZE,
- .init = sha512_init,
+ .init = sha512_base_init,
.update = crypto_sha512_update,
.final = sha512_final,
+ .finup = crypto_sha512_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
@@ -259,9 +177,10 @@ static struct shash_alg sha512_algs[2] = { {
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
- .init = sha384_init,
+ .init = sha384_base_init,
.update = crypto_sha512_update,
- .final = sha384_final,
+ .final = sha512_final,
+ .finup = crypto_sha512_finup,
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 4b9e23fa4204..1a2800107fc8 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1155,9 +1155,9 @@ static void test_acipher_speed(const char *algo, int enc, unsigned int secs,
goto out_free_req;
}
- sg_init_table(sg, TVMEMSIZE);
-
k = *keysize + *b_size;
+ sg_init_table(sg, DIV_ROUND_UP(k, PAGE_SIZE));
+
if (k > PAGE_SIZE) {
sg_set_buf(sg, tvmem[0] + *keysize,
PAGE_SIZE - *keysize);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f4ed6d4205e7..f9bce3d7ee7f 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1474,11 +1474,11 @@ static int test_cprng(struct crypto_rng *tfm, struct cprng_testvec *template,
for (j = 0; j < template[i].loops; j++) {
err = crypto_rng_get_bytes(tfm, result,
template[i].rlen);
- if (err != template[i].rlen) {
+ if (err < 0) {
printk(KERN_ERR "alg: cprng: Failed to obtain "
"the correct amount of random data for "
- "%s (requested %d, got %d)\n", algo,
- template[i].rlen, err);
+ "%s (requested %d)\n", algo,
+ template[i].rlen);
goto out;
}
}
@@ -1505,7 +1505,7 @@ static int alg_test_aead(const struct alg_test_desc *desc, const char *driver,
struct crypto_aead *tfm;
int err = 0;
- tfm = crypto_alloc_aead(driver, type, mask);
+ tfm = crypto_alloc_aead(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: aead: Failed to load transform for %s: "
"%ld\n", driver, PTR_ERR(tfm));
@@ -1534,7 +1534,7 @@ static int alg_test_cipher(const struct alg_test_desc *desc,
struct crypto_cipher *tfm;
int err = 0;
- tfm = crypto_alloc_cipher(driver, type, mask);
+ tfm = crypto_alloc_cipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: cipher: Failed to load transform for "
"%s: %ld\n", driver, PTR_ERR(tfm));
@@ -1563,7 +1563,7 @@ static int alg_test_skcipher(const struct alg_test_desc *desc,
struct crypto_ablkcipher *tfm;
int err = 0;
- tfm = crypto_alloc_ablkcipher(driver, type, mask);
+ tfm = crypto_alloc_ablkcipher(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: skcipher: Failed to load transform for "
"%s: %ld\n", driver, PTR_ERR(tfm));
@@ -1636,7 +1636,7 @@ static int alg_test_hash(const struct alg_test_desc *desc, const char *driver,
struct crypto_ahash *tfm;
int err;
- tfm = crypto_alloc_ahash(driver, type, mask);
+ tfm = crypto_alloc_ahash(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: hash: Failed to load transform for %s: "
"%ld\n", driver, PTR_ERR(tfm));
@@ -1664,7 +1664,7 @@ static int alg_test_crc32c(const struct alg_test_desc *desc,
if (err)
goto out;
- tfm = crypto_alloc_shash(driver, type, mask);
+ tfm = crypto_alloc_shash(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(tfm)) {
printk(KERN_ERR "alg: crc32c: Failed to load transform for %s: "
"%ld\n", driver, PTR_ERR(tfm));
@@ -1706,7 +1706,7 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
struct crypto_rng *rng;
int err;
- rng = crypto_alloc_rng(driver, type, mask);
+ rng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(rng)) {
printk(KERN_ERR "alg: cprng: Failed to load transform for %s: "
"%ld\n", driver, PTR_ERR(rng));
@@ -1733,7 +1733,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
if (!buf)
return -ENOMEM;
- drng = crypto_alloc_rng(driver, type, mask);
+ drng = crypto_alloc_rng(driver, type | CRYPTO_ALG_INTERNAL, mask);
if (IS_ERR(drng)) {
printk(KERN_ERR "alg: drbg: could not allocate DRNG handle for "
"%s\n", driver);
@@ -1759,7 +1759,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
ret = crypto_drbg_get_bytes_addtl(drng,
buf, test->expectedlen, &addtl);
}
- if (ret <= 0) {
+ if (ret < 0) {
printk(KERN_ERR "alg: drbg: could not obtain random data for "
"driver %s\n", driver);
goto outbuf;
@@ -1774,7 +1774,7 @@ static int drbg_cavs_test(struct drbg_testvec *test, int pr,
ret = crypto_drbg_get_bytes_addtl(drng,
buf, test->expectedlen, &addtl);
}
- if (ret <= 0) {
+ if (ret < 0) {
printk(KERN_ERR "alg: drbg: could not obtain random data for "
"driver %s\n", driver);
goto outbuf;
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index de57b38809c7..f48cf11c655e 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -101,6 +101,19 @@ config HW_RANDOM_BCM2835
If unsure, say Y.
+config HW_RANDOM_IPROC_RNG200
+ tristate "Broadcom iProc RNG200 support"
+ depends on ARCH_BCM_IPROC
+ default HW_RANDOM
+ ---help---
+ This driver provides kernel-side support for the RNG200
+ hardware found on the Broadcom iProc SoCs.
+
+ To compile this driver as a module, choose M here: the
+ module will be called iproc-rng200
+
+ If unsure, say Y.
+
config HW_RANDOM_GEODE
tristate "AMD Geode HW Random Number Generator support"
depends on X86_32 && PCI
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index 0b4cd57f4e24..055bb01510ad 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -28,5 +28,6 @@ obj-$(CONFIG_HW_RANDOM_POWERNV) += powernv-rng.o
obj-$(CONFIG_HW_RANDOM_EXYNOS) += exynos-rng.o
obj-$(CONFIG_HW_RANDOM_TPM) += tpm-rng.o
obj-$(CONFIG_HW_RANDOM_BCM2835) += bcm2835-rng.o
+obj-$(CONFIG_HW_RANDOM_IPROC_RNG200) += iproc-rng200.o
obj-$(CONFIG_HW_RANDOM_MSM) += msm-rng.o
obj-$(CONFIG_HW_RANDOM_XGENE) += xgene-rng.o
diff --git a/drivers/char/hw_random/bcm63xx-rng.c b/drivers/char/hw_random/bcm63xx-rng.c
index ba6a65ac023b..d1494ecd9e11 100644
--- a/drivers/char/hw_random/bcm63xx-rng.c
+++ b/drivers/char/hw_random/bcm63xx-rng.c
@@ -13,24 +13,37 @@
#include <linux/platform_device.h>
#include <linux/hw_random.h>
-#include <bcm63xx_io.h>
-#include <bcm63xx_regs.h>
+#define RNG_CTRL 0x00
+#define RNG_EN (1 << 0)
+
+#define RNG_STAT 0x04
+#define RNG_AVAIL_MASK (0xff000000)
+
+#define RNG_DATA 0x08
+#define RNG_THRES 0x0c
+#define RNG_MASK 0x10
struct bcm63xx_rng_priv {
+ struct hwrng rng;
struct clk *clk;
void __iomem *regs;
};
-#define to_rng_priv(rng) ((struct bcm63xx_rng_priv *)rng->priv)
+#define to_rng_priv(rng) container_of(rng, struct bcm63xx_rng_priv, rng)
static int bcm63xx_rng_init(struct hwrng *rng)
{
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
u32 val;
+ int error;
+
+ error = clk_prepare_enable(priv->clk);
+ if (error)
+ return error;
- val = bcm_readl(priv->regs + RNG_CTRL);
+ val = __raw_readl(priv->regs + RNG_CTRL);
val |= RNG_EN;
- bcm_writel(val, priv->regs + RNG_CTRL);
+ __raw_writel(val, priv->regs + RNG_CTRL);
return 0;
}
@@ -40,23 +53,25 @@ static void bcm63xx_rng_cleanup(struct hwrng *rng)
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
u32 val;
- val = bcm_readl(priv->regs + RNG_CTRL);
+ val = __raw_readl(priv->regs + RNG_CTRL);
val &= ~RNG_EN;
- bcm_writel(val, priv->regs + RNG_CTRL);
+ __raw_writel(val, priv->regs + RNG_CTRL);
+
+ clk_didsable_unprepare(prov->clk);
}
static int bcm63xx_rng_data_present(struct hwrng *rng, int wait)
{
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
- return bcm_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK;
+ return __raw_readl(priv->regs + RNG_STAT) & RNG_AVAIL_MASK;
}
static int bcm63xx_rng_data_read(struct hwrng *rng, u32 *data)
{
struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
- *data = bcm_readl(priv->regs + RNG_DATA);
+ *data = __raw_readl(priv->regs + RNG_DATA);
return 4;
}
@@ -72,94 +87,53 @@ static int bcm63xx_rng_probe(struct platform_device *pdev)
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
dev_err(&pdev->dev, "no iomem resource\n");
- ret = -ENXIO;
- goto out;
+ return -ENXIO;
}
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv) {
- dev_err(&pdev->dev, "no memory for private structure\n");
- ret = -ENOMEM;
- goto out;
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->rng.name = pdev->name;
+ priv->rng.init = bcm63xx_rng_init;
+ priv->rng.cleanup = bcm63xx_rng_cleanup;
+ prov->rng.data_present = bcm63xx_rng_data_present;
+ priv->rng.data_read = bcm63xx_rng_data_read;
+
+ priv->clk = devm_clk_get(&pdev->dev, "ipsec");
+ if (IS_ERR(priv->clk)) {
+ error = PTR_ERR(priv->clk);
+ dev_err(&pdev->dev, "no clock for device: %d\n", error);
+ return error;
}
- rng = kzalloc(sizeof(*rng), GFP_KERNEL);
- if (!rng) {
- dev_err(&pdev->dev, "no memory for rng structure\n");
- ret = -ENOMEM;
- goto out_free_priv;
- }
-
- platform_set_drvdata(pdev, rng);
- rng->priv = (unsigned long)priv;
- rng->name = pdev->name;
- rng->init = bcm63xx_rng_init;
- rng->cleanup = bcm63xx_rng_cleanup;
- rng->data_present = bcm63xx_rng_data_present;
- rng->data_read = bcm63xx_rng_data_read;
-
- clk = clk_get(&pdev->dev, "ipsec");
- if (IS_ERR(clk)) {
- dev_err(&pdev->dev, "no clock for device\n");
- ret = PTR_ERR(clk);
- goto out_free_rng;
- }
-
- priv->clk = clk;
-
if (!devm_request_mem_region(&pdev->dev, r->start,
resource_size(r), pdev->name)) {
dev_err(&pdev->dev, "request mem failed");
- ret = -ENOMEM;
- goto out_free_rng;
+ return -EBUSY;
}
priv->regs = devm_ioremap_nocache(&pdev->dev, r->start,
resource_size(r));
if (!priv->regs) {
dev_err(&pdev->dev, "ioremap failed");
- ret = -ENOMEM;
- goto out_free_rng;
+ return -ENOMEM;
}
- clk_enable(clk);
-
- ret = hwrng_register(rng);
- if (ret) {
- dev_err(&pdev->dev, "failed to register rng device\n");
- goto out_clk_disable;
+ error = devm_hwrng_register(&pdev->dev, &priv->rng);
+ if (error) {
+ dev_err(&pdev->dev, "failed to register rng device: %d\n",
+ error);
+ return error;
}
dev_info(&pdev->dev, "registered RNG driver\n");
return 0;
-
-out_clk_disable:
- clk_disable(clk);
-out_free_rng:
- kfree(rng);
-out_free_priv:
- kfree(priv);
-out:
- return ret;
-}
-
-static int bcm63xx_rng_remove(struct platform_device *pdev)
-{
- struct hwrng *rng = platform_get_drvdata(pdev);
- struct bcm63xx_rng_priv *priv = to_rng_priv(rng);
-
- hwrng_unregister(rng);
- clk_disable(priv->clk);
- kfree(priv);
- kfree(rng);
-
- return 0;
}
static struct platform_driver bcm63xx_rng_driver = {
.probe = bcm63xx_rng_probe,
- .remove = bcm63xx_rng_remove,
.driver = {
.name = "bcm63xx-rng",
},
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 32a8a867f7f8..571ef61f8ea9 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -179,7 +179,8 @@ skip_init:
add_early_randomness(rng);
current_quality = rng->quality ? : default_quality;
- current_quality &= 1023;
+ if (current_quality > 1024)
+ current_quality = 1024;
if (current_quality == 0 && hwrng_fill)
kthread_stop(hwrng_fill);
@@ -536,6 +537,48 @@ void hwrng_unregister(struct hwrng *rng)
}
EXPORT_SYMBOL_GPL(hwrng_unregister);
+static void devm_hwrng_release(struct device *dev, void *res)
+{
+ hwrng_unregister(*(struct hwrng **)res);
+}
+
+static int devm_hwrng_match(struct device *dev, void *res, void *data)
+{
+ struct hwrng **r = res;
+
+ if (WARN_ON(!r || !*r))
+ return 0;
+
+ return *r == data;
+}
+
+int devm_hwrng_register(struct device *dev, struct hwrng *rng)
+{
+ struct hwrng **ptr;
+ int error;
+
+ ptr = devres_alloc(devm_hwrng_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return -ENOMEM;
+
+ error = hwrng_register(rng);
+ if (error) {
+ devres_free(ptr);
+ return error;
+ }
+
+ *ptr = rng;
+ devres_add(dev, ptr);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devm_hwrng_register);
+
+void devm_hwrng_unregister(struct device *dev, struct hwrng *rng)
+{
+ devres_release(dev, devm_hwrng_release, devm_hwrng_match, rng);
+}
+EXPORT_SYMBOL_GPL(devm_hwrng_unregister);
+
static int __init hwrng_modinit(void)
{
return register_miscdev();
diff --git a/drivers/char/hw_random/exynos-rng.c b/drivers/char/hw_random/exynos-rng.c
index fed0830bf724..dc4701fd814f 100644
--- a/drivers/char/hw_random/exynos-rng.c
+++ b/drivers/char/hw_random/exynos-rng.c
@@ -131,16 +131,7 @@ static int exynos_rng_probe(struct platform_device *pdev)
pm_runtime_use_autosuspend(&pdev->dev);
pm_runtime_enable(&pdev->dev);
- return hwrng_register(&exynos_rng->rng);
-}
-
-static int exynos_rng_remove(struct platform_device *pdev)
-{
- struct exynos_rng *exynos_rng = platform_get_drvdata(pdev);
-
- hwrng_unregister(&exynos_rng->rng);
-
- return 0;
+ return devm_hwrng_register(&pdev->dev, &exynos_rng->rng);
}
#ifdef CONFIG_PM
@@ -172,7 +163,6 @@ static struct platform_driver exynos_rng_driver = {
.pm = &exynos_rng_pm_ops,
},
.probe = exynos_rng_probe,
- .remove = exynos_rng_remove,
};
module_platform_driver(exynos_rng_driver);
diff --git a/drivers/char/hw_random/iproc-rng200.c b/drivers/char/hw_random/iproc-rng200.c
new file mode 100644
index 000000000000..3eaf7cb96d36
--- /dev/null
+++ b/drivers/char/hw_random/iproc-rng200.c
@@ -0,0 +1,239 @@
+/*
+* Copyright (C) 2015 Broadcom Corporation
+*
+* This program is free software; you can redistribute it and/or
+* modify it under the terms of the GNU General Public License as
+* published by the Free Software Foundation version 2.
+*
+* This program is distributed "as is" WITHOUT ANY WARRANTY of any
+* kind, whether express or implied; without even the implied warranty
+* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*/
+/*
+ * DESCRIPTION: The Broadcom iProc RNG200 Driver
+ */
+
+#include <linux/hw_random.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+
+/* Registers */
+#define RNG_CTRL_OFFSET 0x00
+#define RNG_CTRL_RNG_RBGEN_MASK 0x00001FFF
+#define RNG_CTRL_RNG_RBGEN_ENABLE 0x00000001
+#define RNG_CTRL_RNG_RBGEN_DISABLE 0x00000000
+
+#define RNG_SOFT_RESET_OFFSET 0x04
+#define RNG_SOFT_RESET 0x00000001
+
+#define RBG_SOFT_RESET_OFFSET 0x08
+#define RBG_SOFT_RESET 0x00000001
+
+#define RNG_INT_STATUS_OFFSET 0x18
+#define RNG_INT_STATUS_MASTER_FAIL_LOCKOUT_IRQ_MASK 0x80000000
+#define RNG_INT_STATUS_STARTUP_TRANSITIONS_MET_IRQ_MASK 0x00020000
+#define RNG_INT_STATUS_NIST_FAIL_IRQ_MASK 0x00000020
+#define RNG_INT_STATUS_TOTAL_BITS_COUNT_IRQ_MASK 0x00000001
+
+#define RNG_FIFO_DATA_OFFSET 0x20
+
+#define RNG_FIFO_COUNT_OFFSET 0x24
+#define RNG_FIFO_COUNT_RNG_FIFO_COUNT_MASK 0x000000FF
+
+struct iproc_rng200_dev {
+ struct hwrng rng;
+ void __iomem *base;
+};
+
+#define to_rng_priv(rng) container_of(rng, struct iproc_rng200_dev, rng)
+
+static void iproc_rng200_restart(void __iomem *rng_base)
+{
+ uint32_t val;
+
+ /* Disable RBG */
+ val = ioread32(rng_base + RNG_CTRL_OFFSET);
+ val &= ~RNG_CTRL_RNG_RBGEN_MASK;
+ val |= RNG_CTRL_RNG_RBGEN_DISABLE;
+ iowrite32(val, rng_base + RNG_CTRL_OFFSET);
+
+ /* Clear all interrupt status */
+ iowrite32(0xFFFFFFFFUL, rng_base + RNG_INT_STATUS_OFFSET);
+
+ /* Reset RNG and RBG */
+ val = ioread32(rng_base + RBG_SOFT_RESET_OFFSET);
+ val |= RBG_SOFT_RESET;
+ iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET);
+
+ val = ioread32(rng_base + RNG_SOFT_RESET_OFFSET);
+ val |= RNG_SOFT_RESET;
+ iowrite32(val, rng_base + RNG_SOFT_RESET_OFFSET);
+
+ val = ioread32(rng_base + RNG_SOFT_RESET_OFFSET);
+ val &= ~RNG_SOFT_RESET;
+ iowrite32(val, rng_base + RNG_SOFT_RESET_OFFSET);
+
+ val = ioread32(rng_base + RBG_SOFT_RESET_OFFSET);
+ val &= ~RBG_SOFT_RESET;
+ iowrite32(val, rng_base + RBG_SOFT_RESET_OFFSET);
+
+ /* Enable RBG */
+ val = ioread32(rng_base + RNG_CTRL_OFFSET);
+ val &= ~RNG_CTRL_RNG_RBGEN_MASK;
+ val |= RNG_CTRL_RNG_RBGEN_ENABLE;
+ iowrite32(val, rng_base + RNG_CTRL_OFFSET);
+}
+
+static int iproc_rng200_read(struct hwrng *rng, void *buf, size_t max,
+ bool wait)
+{
+ struct iproc_rng200_dev *priv = to_rng_priv(rng);
+ uint32_t num_remaining = max;
+ uint32_t status;
+
+ #define MAX_RESETS_PER_READ 1
+ uint32_t num_resets = 0;
+
+ #define MAX_IDLE_TIME (1 * HZ)
+ unsigned long idle_endtime = jiffies + MAX_IDLE_TIME;
+
+ while ((num_remaining > 0) && time_before(jiffies, idle_endtime)) {
+
+ /* Is RNG sane? If not, reset it. */
+ status = ioread32(priv->base + RNG_INT_STATUS_OFFSET);
+ if ((status & (RNG_INT_STATUS_MASTER_FAIL_LOCKOUT_IRQ_MASK |
+ RNG_INT_STATUS_NIST_FAIL_IRQ_MASK)) != 0) {
+
+ if (num_resets >= MAX_RESETS_PER_READ)
+ return max - num_remaining;
+
+ iproc_rng200_restart(priv->base);
+ num_resets++;
+ }
+
+ /* Are there any random numbers available? */
+ if ((ioread32(priv->base + RNG_FIFO_COUNT_OFFSET) &
+ RNG_FIFO_COUNT_RNG_FIFO_COUNT_MASK) > 0) {
+
+ if (num_remaining >= sizeof(uint32_t)) {
+ /* Buffer has room to store entire word */
+ *(uint32_t *)buf = ioread32(priv->base +
+ RNG_FIFO_DATA_OFFSET);
+ buf += sizeof(uint32_t);
+ num_remaining -= sizeof(uint32_t);
+ } else {
+ /* Buffer can only store partial word */
+ uint32_t rnd_number = ioread32(priv->base +
+ RNG_FIFO_DATA_OFFSET);
+ memcpy(buf, &rnd_number, num_remaining);
+ buf += num_remaining;
+ num_remaining = 0;
+ }
+
+ /* Reset the IDLE timeout */
+ idle_endtime = jiffies + MAX_IDLE_TIME;
+ } else {
+ if (!wait)
+ /* Cannot wait, return immediately */
+ return max - num_remaining;
+
+ /* Can wait, give others chance to run */
+ usleep_range(min(num_remaining * 10, 500U), 500);
+ }
+ }
+
+ return max - num_remaining;
+}
+
+static int iproc_rng200_init(struct hwrng *rng)
+{
+ struct iproc_rng200_dev *priv = to_rng_priv(rng);
+ uint32_t val;
+
+ /* Setup RNG. */
+ val = ioread32(priv->base + RNG_CTRL_OFFSET);
+ val &= ~RNG_CTRL_RNG_RBGEN_MASK;
+ val |= RNG_CTRL_RNG_RBGEN_ENABLE;
+ iowrite32(val, priv->base + RNG_CTRL_OFFSET);
+
+ return 0;
+}
+
+static void iproc_rng200_cleanup(struct hwrng *rng)
+{
+ struct iproc_rng200_dev *priv = to_rng_priv(rng);
+ uint32_t val;
+
+ /* Disable RNG hardware */
+ val = ioread32(priv->base + RNG_CTRL_OFFSET);
+ val &= ~RNG_CTRL_RNG_RBGEN_MASK;
+ val |= RNG_CTRL_RNG_RBGEN_DISABLE;
+ iowrite32(val, priv->base + RNG_CTRL_OFFSET);
+}
+
+static int iproc_rng200_probe(struct platform_device *pdev)
+{
+ struct iproc_rng200_dev *priv;
+ struct resource *res;
+ struct device *dev = &pdev->dev;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ /* Map peripheral */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(dev, "failed to get rng resources\n");
+ return -EINVAL;
+ }
+
+ priv->base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(priv->base)) {
+ dev_err(dev, "failed to remap rng regs\n");
+ return PTR_ERR(priv->base);
+ }
+
+ priv->rng.name = "iproc-rng200",
+ priv->rng.read = iproc_rng200_read,
+ priv->rng.init = iproc_rng200_init,
+ priv->rng.cleanup = iproc_rng200_cleanup,
+
+ /* Register driver */
+ ret = devm_hwrng_register(dev, &priv->rng);
+ if (ret) {
+ dev_err(dev, "hwrng registration failed\n");
+ return ret;
+ }
+
+ dev_info(dev, "hwrng registered\n");
+
+ return 0;
+}
+
+static const struct of_device_id iproc_rng200_of_match[] = {
+ { .compatible = "brcm,iproc-rng200", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, iproc_rng200_of_match);
+
+static struct platform_driver iproc_rng200_driver = {
+ .driver = {
+ .name = "iproc-rng200",
+ .of_match_table = iproc_rng200_of_match,
+ },
+ .probe = iproc_rng200_probe,
+};
+module_platform_driver(iproc_rng200_driver);
+
+MODULE_AUTHOR("Broadcom");
+MODULE_DESCRIPTION("iProc RNG200 Random Number Generator driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/char/hw_random/msm-rng.c b/drivers/char/hw_random/msm-rng.c
index cea1c703d62f..96fb986402eb 100644
--- a/drivers/char/hw_random/msm-rng.c
+++ b/drivers/char/hw_random/msm-rng.c
@@ -157,7 +157,7 @@ static int msm_rng_probe(struct platform_device *pdev)
rng->hwrng.cleanup = msm_rng_cleanup,
rng->hwrng.read = msm_rng_read,
- ret = hwrng_register(&rng->hwrng);
+ ret = devm_hwrng_register(&pdev->dev, &rng->hwrng);
if (ret) {
dev_err(&pdev->dev, "failed to register hwrng\n");
return ret;
@@ -166,14 +166,6 @@ static int msm_rng_probe(struct platform_device *pdev)
return 0;
}
-static int msm_rng_remove(struct platform_device *pdev)
-{
- struct msm_rng *rng = platform_get_drvdata(pdev);
-
- hwrng_unregister(&rng->hwrng);
- return 0;
-}
-
static const struct of_device_id msm_rng_of_match[] = {
{ .compatible = "qcom,prng", },
{}
@@ -182,7 +174,6 @@ MODULE_DEVICE_TABLE(of, msm_rng_of_match);
static struct platform_driver msm_rng_driver = {
.probe = msm_rng_probe,
- .remove = msm_rng_remove,
.driver = {
.name = KBUILD_MODNAME,
.of_match_table = of_match_ptr(msm_rng_of_match),
diff --git a/drivers/char/hw_random/octeon-rng.c b/drivers/char/hw_random/octeon-rng.c
index be1c3f607398..6234a4a19b56 100644
--- a/drivers/char/hw_random/octeon-rng.c
+++ b/drivers/char/hw_random/octeon-rng.c
@@ -105,7 +105,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
return 0;
}
-static int __exit octeon_rng_remove(struct platform_device *pdev)
+static int octeon_rng_remove(struct platform_device *pdev)
{
struct hwrng *rng = platform_get_drvdata(pdev);
@@ -119,7 +119,7 @@ static struct platform_driver octeon_rng_driver = {
.name = "octeon_rng",
},
.probe = octeon_rng_probe,
- .remove = __exit_p(octeon_rng_remove),
+ .remove = octeon_rng_remove,
};
module_platform_driver(octeon_rng_driver);
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index d14dcf788f17..8a1432e8bb80 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -236,7 +236,7 @@ static int omap4_rng_init(struct omap_rng_dev *priv)
u32 val;
/* Return if RNG is already running. */
- if (omap_rng_read(priv, RNG_CONFIG_REG) & RNG_CONTROL_ENABLE_TRNG_MASK)
+ if (omap_rng_read(priv, RNG_CONTROL_REG) & RNG_CONTROL_ENABLE_TRNG_MASK)
return 0;
val = RNG_CONFIG_MIN_REFIL_CYCLES << RNG_CONFIG_MIN_REFIL_CYCLES_SHIFT;
@@ -262,7 +262,7 @@ static void omap4_rng_cleanup(struct omap_rng_dev *priv)
val = omap_rng_read(priv, RNG_CONTROL_REG);
val &= ~RNG_CONTROL_ENABLE_TRNG_MASK;
- omap_rng_write(priv, RNG_CONFIG_REG, val);
+ omap_rng_write(priv, RNG_CONTROL_REG, val);
}
static irqreturn_t omap4_rng_irq(int irq, void *dev_id)
@@ -408,7 +408,7 @@ err_ioremap:
return ret;
}
-static int __exit omap_rng_remove(struct platform_device *pdev)
+static int omap_rng_remove(struct platform_device *pdev)
{
struct omap_rng_dev *priv = platform_get_drvdata(pdev);
@@ -422,9 +422,7 @@ static int __exit omap_rng_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM_SLEEP
-
-static int omap_rng_suspend(struct device *dev)
+static int __maybe_unused omap_rng_suspend(struct device *dev)
{
struct omap_rng_dev *priv = dev_get_drvdata(dev);
@@ -434,7 +432,7 @@ static int omap_rng_suspend(struct device *dev)
return 0;
}
-static int omap_rng_resume(struct device *dev)
+static int __maybe_unused omap_rng_resume(struct device *dev)
{
struct omap_rng_dev *priv = dev_get_drvdata(dev);
@@ -445,22 +443,15 @@ static int omap_rng_resume(struct device *dev)
}
static SIMPLE_DEV_PM_OPS(omap_rng_pm, omap_rng_suspend, omap_rng_resume);
-#define OMAP_RNG_PM (&omap_rng_pm)
-
-#else
-
-#define OMAP_RNG_PM NULL
-
-#endif
static struct platform_driver omap_rng_driver = {
.driver = {
.name = "omap_rng",
- .pm = OMAP_RNG_PM,
+ .pm = &omap_rng_pm,
.of_match_table = of_match_ptr(omap_rng_of_match),
},
.probe = omap_rng_probe,
- .remove = __exit_p(omap_rng_remove),
+ .remove = omap_rng_remove,
};
module_platform_driver(omap_rng_driver);
diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c
index bcf86f91800a..63ce51d09af1 100644
--- a/drivers/char/hw_random/pseries-rng.c
+++ b/drivers/char/hw_random/pseries-rng.c
@@ -61,13 +61,13 @@ static struct hwrng pseries_rng = {
.read = pseries_rng_read,
};
-static int __init pseries_rng_probe(struct vio_dev *dev,
+static int pseries_rng_probe(struct vio_dev *dev,
const struct vio_device_id *id)
{
return hwrng_register(&pseries_rng);
}
-static int __exit pseries_rng_remove(struct vio_dev *dev)
+static int pseries_rng_remove(struct vio_dev *dev)
{
hwrng_unregister(&pseries_rng);
return 0;
diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c
index 23caa05380a8..c37cf754a985 100644
--- a/drivers/char/hw_random/xgene-rng.c
+++ b/drivers/char/hw_random/xgene-rng.c
@@ -21,6 +21,7 @@
*
*/
+#include <linux/acpi.h>
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/hw_random.h>
@@ -310,6 +311,14 @@ static int xgene_rng_init(struct hwrng *rng)
return 0;
}
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_rng_acpi_match[] = {
+ { "APMC0D18", },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, xgene_rng_acpi_match);
+#endif
+
static struct hwrng xgene_rng_func = {
.name = "xgene-rng",
.init = xgene_rng_init,
@@ -415,6 +424,7 @@ static struct platform_driver xgene_rng_driver = {
.driver = {
.name = "xgene-rng",
.of_match_table = xgene_rng_of_match,
+ .acpi_match_table = ACPI_PTR(xgene_rng_acpi_match),
},
};
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2fb0fdfc87df..800bf41718e1 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -391,7 +391,7 @@ config CRYPTO_DEV_ATMEL_SHA
config CRYPTO_DEV_CCP
bool "Support for AMD Cryptographic Coprocessor"
- depends on (X86 && PCI) || ARM64
+ depends on ((X86 && PCI) || (ARM64 && (OF_ADDRESS || ACPI))) && HAS_IOMEM
default n
help
The AMD Cryptographic Coprocessor provides hardware support
@@ -436,4 +436,26 @@ config CRYPTO_DEV_QCE
hardware. To compile this driver as a module, choose M here. The
module will be called qcrypto.
+config CRYPTO_DEV_VMX
+ bool "Support for VMX cryptographic acceleration instructions"
+ depends on PPC64
+ default n
+ help
+ Support for VMX cryptographic acceleration instructions.
+
+source "drivers/crypto/vmx/Kconfig"
+
+config CRYPTO_DEV_IMGTEC_HASH
+ depends on MIPS || COMPILE_TEST
+ tristate "Imagination Technologies hardware hash accelerator"
+ select CRYPTO_ALGAPI
+ select CRYPTO_MD5
+ select CRYPTO_SHA1
+ select CRYPTO_SHA256
+ select CRYPTO_HASH
+ help
+ This driver interfaces with the Imagination Technologies
+ hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256
+ hashing algorithms.
+
endif # CRYPTO_HW
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 3924f93d5774..fb84be7e6be5 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_CRYPTO_DEV_CCP) += ccp/
obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam/
obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o
obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o
+obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o
obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o
obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o
obj-$(CONFIG_CRYPTO_DEV_MXS_DCP) += mxs-dcp.o
@@ -25,3 +26,4 @@ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/
obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/
+obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/
diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index d02b77150070..3b28e8c3de28 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -1155,7 +1155,7 @@ struct crypto4xx_alg_common crypto4xx_alg[] = {
/**
* Module Initialization Routine
*/
-static int __init crypto4xx_probe(struct platform_device *ofdev)
+static int crypto4xx_probe(struct platform_device *ofdev)
{
int rc;
struct resource res;
@@ -1263,7 +1263,7 @@ err_alloc_dev:
return rc;
}
-static int __exit crypto4xx_remove(struct platform_device *ofdev)
+static int crypto4xx_remove(struct platform_device *ofdev)
{
struct device *dev = &ofdev->dev;
struct crypto4xx_core_device *core_dev = dev_get_drvdata(dev);
@@ -1291,7 +1291,7 @@ static struct platform_driver crypto4xx_driver = {
.of_match_table = crypto4xx_match,
},
.probe = crypto4xx_probe,
- .remove = __exit_p(crypto4xx_remove),
+ .remove = crypto4xx_remove,
};
module_platform_driver(crypto4xx_driver);
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 6597aac9905d..0f9a9dc06a83 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -315,10 +315,10 @@ static int atmel_aes_crypt_dma(struct atmel_aes_dev *dd,
dd->dma_size = length;
- if (!(dd->flags & AES_FLAGS_FAST)) {
- dma_sync_single_for_device(dd->dev, dma_addr_in, length,
- DMA_TO_DEVICE);
- }
+ dma_sync_single_for_device(dd->dev, dma_addr_in, length,
+ DMA_TO_DEVICE);
+ dma_sync_single_for_device(dd->dev, dma_addr_out, length,
+ DMA_FROM_DEVICE);
if (dd->flags & AES_FLAGS_CFB8) {
dd->dma_lch_in.dma_conf.dst_addr_width =
@@ -391,6 +391,11 @@ static int atmel_aes_crypt_cpu_start(struct atmel_aes_dev *dd)
{
dd->flags &= ~AES_FLAGS_DMA;
+ dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in,
+ dd->dma_size, DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out,
+ dd->dma_size, DMA_FROM_DEVICE);
+
/* use cache buffers */
dd->nb_in_sg = atmel_aes_sg_length(dd->req, dd->in_sg);
if (!dd->nb_in_sg)
@@ -459,6 +464,9 @@ static int atmel_aes_crypt_dma_start(struct atmel_aes_dev *dd)
dd->flags |= AES_FLAGS_FAST;
} else {
+ dma_sync_single_for_cpu(dd->dev, dd->dma_addr_in,
+ dd->dma_size, DMA_TO_DEVICE);
+
/* use cache buffers */
count = atmel_aes_sg_copy(&dd->in_sg, &dd->in_offset,
dd->buf_in, dd->buflen, dd->total, 0);
@@ -619,7 +627,7 @@ static int atmel_aes_crypt_dma_stop(struct atmel_aes_dev *dd)
dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE);
dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE);
} else {
- dma_sync_single_for_device(dd->dev, dd->dma_addr_out,
+ dma_sync_single_for_cpu(dd->dev, dd->dma_addr_out,
dd->dma_size, DMA_FROM_DEVICE);
/* copy data */
@@ -1246,6 +1254,11 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
+ case 0x200:
+ dd->caps.has_dualbuff = 1;
+ dd->caps.has_cfb64 = 1;
+ dd->caps.max_burst_size = 4;
+ break;
case 0x130:
dd->caps.has_dualbuff = 1;
dd->caps.has_cfb64 = 1;
@@ -1336,6 +1349,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, aes_dd);
INIT_LIST_HEAD(&aes_dd->list);
+ spin_lock_init(&aes_dd->lock);
tasklet_init(&aes_dd->done_task, atmel_aes_done_task,
(unsigned long)aes_dd);
@@ -1374,7 +1388,7 @@ static int atmel_aes_probe(struct platform_device *pdev)
/* Initializing the clock */
aes_dd->iclk = clk_get(&pdev->dev, "aes_clk");
if (IS_ERR(aes_dd->iclk)) {
- dev_err(dev, "clock intialization failed.\n");
+ dev_err(dev, "clock initialization failed.\n");
err = PTR_ERR(aes_dd->iclk);
goto clk_err;
}
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index 34db04addc18..5b35433c5399 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -163,8 +163,20 @@ static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
count = min(ctx->sg->length - ctx->offset, ctx->total);
count = min(count, ctx->buflen - ctx->bufcnt);
- if (count <= 0)
- break;
+ if (count <= 0) {
+ /*
+ * Check if count <= 0 because the buffer is full or
+ * because the sg length is 0. In the latest case,
+ * check if there is another sg in the list, a 0 length
+ * sg doesn't necessarily mean the end of the sg list.
+ */
+ if ((ctx->sg->length == 0) && !sg_is_last(ctx->sg)) {
+ ctx->sg = sg_next(ctx->sg);
+ continue;
+ } else {
+ break;
+ }
+ }
scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
ctx->offset, count, 0);
@@ -420,14 +432,8 @@ static int atmel_sha_xmit_dma(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
dev_dbg(dd->dev, "xmit_dma: digcnt: 0x%llx 0x%llx, length: %d, final: %d\n",
ctx->digcnt[1], ctx->digcnt[0], length1, final);
- if (ctx->flags & (SHA_FLAGS_SHA1 | SHA_FLAGS_SHA224 |
- SHA_FLAGS_SHA256)) {
- dd->dma_lch_in.dma_conf.src_maxburst = 16;
- dd->dma_lch_in.dma_conf.dst_maxburst = 16;
- } else {
- dd->dma_lch_in.dma_conf.src_maxburst = 32;
- dd->dma_lch_in.dma_conf.dst_maxburst = 32;
- }
+ dd->dma_lch_in.dma_conf.src_maxburst = 16;
+ dd->dma_lch_in.dma_conf.dst_maxburst = 16;
dmaengine_slave_config(dd->dma_lch_in.chan, &dd->dma_lch_in.dma_conf);
@@ -529,7 +535,7 @@ static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
if (final)
atmel_sha_fill_padding(ctx, 0);
- if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
+ if (final || (ctx->bufcnt == ctx->buflen)) {
count = ctx->bufcnt;
ctx->bufcnt = 0;
return atmel_sha_xmit_dma_map(dd, ctx, count, final);
@@ -1266,6 +1272,12 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd)
/* keep only major version number */
switch (dd->hw_version & 0xff0) {
+ case 0x420:
+ dd->caps.has_dma = 1;
+ dd->caps.has_dualbuff = 1;
+ dd->caps.has_sha224 = 1;
+ dd->caps.has_sha_384_512 = 1;
+ break;
case 0x410:
dd->caps.has_dma = 1;
dd->caps.has_dualbuff = 1;
@@ -1349,6 +1361,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sha_dd);
INIT_LIST_HEAD(&sha_dd->list);
+ spin_lock_init(&sha_dd->lock);
tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
(unsigned long)sha_dd);
@@ -1385,7 +1398,7 @@ static int atmel_sha_probe(struct platform_device *pdev)
/* Initializing the clock */
sha_dd->iclk = clk_get(&pdev->dev, "sha_clk");
if (IS_ERR(sha_dd->iclk)) {
- dev_err(dev, "clock intialization failed.\n");
+ dev_err(dev, "clock initialization failed.\n");
err = PTR_ERR(sha_dd->iclk);
goto clk_err;
}
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index 258772d9b22f..ca2999709eb4 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -1370,6 +1370,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, tdes_dd);
INIT_LIST_HEAD(&tdes_dd->list);
+ spin_lock_init(&tdes_dd->lock);
tasklet_init(&tdes_dd->done_task, atmel_tdes_done_task,
(unsigned long)tdes_dd);
@@ -1408,7 +1409,7 @@ static int atmel_tdes_probe(struct platform_device *pdev)
/* Initializing the clock */
tdes_dd->iclk = clk_get(&pdev->dev, "tdes_clk");
if (IS_ERR(tdes_dd->iclk)) {
- dev_err(dev, "clock intialization failed.\n");
+ dev_err(dev, "clock initialization failed.\n");
err = PTR_ERR(tdes_dd->iclk);
goto clk_err;
}
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index f347ab7eea95..ba0532efd3ae 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -1172,6 +1172,7 @@ static int ahash_final_no_ctx(struct ahash_request *req)
return -ENOMEM;
}
+ edesc->sec4_sg_bytes = 0;
sh_len = desc_len(sh_desc);
desc = edesc->hw_desc;
init_job_desc_shared(desc, ptr, sh_len, HDR_SHARE_DEFER | HDR_REVERSE);
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index ae31e555793c..26a544b505f1 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -52,7 +52,7 @@
/* length of descriptors */
#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
-#define DESC_RNG_LEN (10 * CAAM_CMD_SZ)
+#define DESC_RNG_LEN (4 * CAAM_CMD_SZ)
/* Buffer, its dma address and lock */
struct buf_data {
@@ -90,8 +90,8 @@ static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
struct device *jrdev = ctx->jrdev;
if (ctx->sh_desc_dma)
- dma_unmap_single(jrdev, ctx->sh_desc_dma, DESC_RNG_LEN,
- DMA_TO_DEVICE);
+ dma_unmap_single(jrdev, ctx->sh_desc_dma,
+ desc_bytes(ctx->sh_desc), DMA_TO_DEVICE);
rng_unmap_buf(jrdev, &ctx->bufs[0]);
rng_unmap_buf(jrdev, &ctx->bufs[1]);
}
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 7f592d8d07bb..55a1f3951578 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -1,11 +1,6 @@
obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
-ccp-objs := ccp-dev.o ccp-ops.o
-ifdef CONFIG_X86
-ccp-objs += ccp-pci.o
-endif
-ifdef CONFIG_ARM64
-ccp-objs += ccp-platform.o
-endif
+ccp-objs := ccp-dev.o ccp-ops.o ccp-platform.o
+ccp-$(CONFIG_PCI) += ccp-pci.o
obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
ccp-crypto-objs := ccp-crypto-main.o \
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 8e162ad82085..ea7e8446956a 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -23,7 +23,6 @@
#include "ccp-crypto.h"
-
static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
int ret)
{
@@ -38,11 +37,13 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
if (rctx->hash_rem) {
/* Save remaining data to buffer */
unsigned int offset = rctx->nbytes - rctx->hash_rem;
+
scatterwalk_map_and_copy(rctx->buf, rctx->src,
offset, rctx->hash_rem, 0);
rctx->buf_count = rctx->hash_rem;
- } else
+ } else {
rctx->buf_count = 0;
+ }
/* Update result area if supplied */
if (req->result)
@@ -202,7 +203,7 @@ static int ccp_aes_cmac_digest(struct ahash_request *req)
}
static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
- unsigned int key_len)
+ unsigned int key_len)
{
struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
struct ccp_crypto_ahash_alg *alg =
@@ -292,7 +293,8 @@ static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm)
crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
cipher_tfm = crypto_alloc_cipher("aes", 0,
- CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+ CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(cipher_tfm)) {
pr_warn("could not load aes cipher driver\n");
return PTR_ERR(cipher_tfm);
@@ -354,7 +356,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head)
ret = crypto_register_ahash(alg);
if (ret) {
pr_err("%s ahash algorithm registration error (%d)\n",
- base->cra_name, ret);
+ base->cra_name, ret);
kfree(ccp_alg);
return ret;
}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 0cc5594b7de3..52c7395cb8d8 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -21,7 +21,6 @@
#include "ccp-crypto.h"
-
struct ccp_aes_xts_def {
const char *name;
const char *drv_name;
@@ -216,7 +215,6 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
ctx->u.aes.tfm_ablkcipher = NULL;
}
-
static int ccp_register_aes_xts_alg(struct list_head *head,
const struct ccp_aes_xts_def *def)
{
@@ -255,7 +253,7 @@ static int ccp_register_aes_xts_alg(struct list_head *head,
ret = crypto_register_alg(alg);
if (ret) {
pr_err("%s ablkcipher algorithm registration error (%d)\n",
- alg->cra_name, ret);
+ alg->cra_name, ret);
kfree(ccp_alg);
return ret;
}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index e46490db0f63..7984f910884d 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -22,7 +22,6 @@
#include "ccp-crypto.h"
-
static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
{
struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
@@ -345,7 +344,7 @@ static int ccp_register_aes_alg(struct list_head *head,
ret = crypto_register_alg(alg);
if (ret) {
pr_err("%s ablkcipher algorithm registration error (%d)\n",
- alg->cra_name, ret);
+ alg->cra_name, ret);
kfree(ccp_alg);
return ret;
}
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 4d4e016d755b..bdec01ec608f 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -33,7 +33,6 @@ static unsigned int sha_disable;
module_param(sha_disable, uint, 0444);
MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value");
-
/* List heads for the supported algorithms */
static LIST_HEAD(hash_algs);
static LIST_HEAD(cipher_algs);
@@ -48,6 +47,7 @@ struct ccp_crypto_queue {
struct list_head *backlog;
unsigned int cmd_count;
};
+
#define CCP_CRYPTO_MAX_QLEN 100
static struct ccp_crypto_queue req_queue;
@@ -77,7 +77,6 @@ struct ccp_crypto_cpu {
int err;
};
-
static inline bool ccp_crypto_success(int err)
{
if (err && (err != -EINPROGRESS) && (err != -EBUSY))
@@ -143,7 +142,7 @@ static void ccp_crypto_complete(void *data, int err)
int ret;
if (err == -EINPROGRESS) {
- /* Only propogate the -EINPROGRESS if necessary */
+ /* Only propagate the -EINPROGRESS if necessary */
if (crypto_cmd->ret == -EBUSY) {
crypto_cmd->ret = -EINPROGRESS;
req->complete(req, -EINPROGRESS);
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 96531571f7cf..507b34e0cc19 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -23,7 +23,6 @@
#include "ccp-crypto.h"
-
static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
{
struct ahash_request *req = ahash_request_cast(async_req);
@@ -37,11 +36,13 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
if (rctx->hash_rem) {
/* Save remaining data to buffer */
unsigned int offset = rctx->nbytes - rctx->hash_rem;
+
scatterwalk_map_and_copy(rctx->buf, rctx->src,
offset, rctx->hash_rem, 0);
rctx->buf_count = rctx->hash_rem;
- } else
+ } else {
rctx->buf_count = 0;
+ }
/* Update result area if supplied */
if (req->result)
@@ -227,8 +228,9 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
}
key_len = digest_size;
- } else
+ } else {
memcpy(ctx->u.sha.key, key, key_len);
+ }
for (i = 0; i < block_size; i++) {
ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36;
@@ -355,7 +357,7 @@ static int ccp_register_hmac_alg(struct list_head *head,
ret = crypto_register_ahash(alg);
if (ret) {
pr_err("%s ahash algorithm registration error (%d)\n",
- base->cra_name, ret);
+ base->cra_name, ret);
kfree(ccp_alg);
return ret;
}
@@ -410,7 +412,7 @@ static int ccp_register_sha_alg(struct list_head *head,
ret = crypto_register_ahash(alg);
if (ret) {
pr_err("%s ahash algorithm registration error (%d)\n",
- base->cra_name, ret);
+ base->cra_name, ret);
kfree(ccp_alg);
return ret;
}
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 9aa4ae184f7f..76a96f0f44c6 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -13,7 +13,6 @@
#ifndef __CCP_CRYPTO_H__
#define __CCP_CRYPTO_H__
-
#include <linux/list.h>
#include <linux/wait.h>
#include <linux/pci.h>
@@ -25,7 +24,6 @@
#include <crypto/hash.h>
#include <crypto/sha.h>
-
#define CCP_CRA_PRIORITY 300
struct ccp_crypto_ablkcipher_alg {
@@ -68,7 +66,6 @@ static inline struct ccp_crypto_ahash_alg *
return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg);
}
-
/***** AES related defines *****/
struct ccp_aes_ctx {
/* Fallback cipher for XTS with unsupported unit sizes */
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index ca29c120b85f..861bacc1bb94 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -37,7 +37,6 @@ struct ccp_tasklet_data {
struct ccp_cmd *cmd;
};
-
static struct ccp_device *ccp_dev;
static inline struct ccp_device *ccp_get_device(void)
{
@@ -296,11 +295,9 @@ struct ccp_device *ccp_alloc_struct(struct device *dev)
{
struct ccp_device *ccp;
- ccp = kzalloc(sizeof(*ccp), GFP_KERNEL);
- if (ccp == NULL) {
- dev_err(dev, "unable to allocate device struct\n");
+ ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
+ if (!ccp)
return NULL;
- }
ccp->dev = dev;
INIT_LIST_HEAD(&ccp->cmd);
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 62ff35a6b9ec..6ff89031fb96 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -21,7 +21,7 @@
#include <linux/wait.h>
#include <linux/dmapool.h>
#include <linux/hw_random.h>
-
+#include <linux/bitops.h>
#define MAX_DMAPOOL_NAME_LEN 32
@@ -33,7 +33,6 @@
#define CACHE_NONE 0x00
#define CACHE_WB_NO_ALLOC 0xb7
-
/****** Register Mappings ******/
#define Q_MASK_REG 0x000
#define TRNG_OUT_REG 0x00c
@@ -54,8 +53,8 @@
#define CMD_Q_CACHE_BASE 0x228
#define CMD_Q_CACHE_INC 0x20
-#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f);
-#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f);
+#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f)
+#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f)
/****** REQ0 Related Values ******/
#define REQ0_WAIT_FOR_WRITE 0x00000004
@@ -103,7 +102,6 @@
/****** REQ6 Related Values ******/
#define REQ6_MEMTYPE_SHIFT 16
-
/****** Key Storage Block ******/
#define KSB_START 77
#define KSB_END 127
@@ -114,7 +112,7 @@
#define CCP_JOBID_MASK 0x0000003f
#define CCP_DMAPOOL_MAX_SIZE 64
-#define CCP_DMAPOOL_ALIGN (1 << 5)
+#define CCP_DMAPOOL_ALIGN BIT(5)
#define CCP_REVERSE_BUF_SIZE 64
@@ -142,7 +140,6 @@
#define CCP_ECC_RESULT_OFFSET 60
#define CCP_ECC_RESULT_SUCCESS 0x0001
-
struct ccp_device;
struct ccp_cmd;
@@ -261,7 +258,6 @@ struct ccp_device {
unsigned int axcache;
};
-
int ccp_pci_init(void);
void ccp_pci_exit(void);
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 8729364261d7..71f2e3c89424 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -27,7 +27,6 @@
#include "ccp-dev.h"
-
enum ccp_memtype {
CCP_MEMTYPE_SYSTEM = 0,
CCP_MEMTYPE_KSB,
@@ -515,7 +514,6 @@ static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
if (!wa->dma_count)
return -ENOMEM;
-
return 0;
}
@@ -763,8 +761,9 @@ static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
op_len = min(sg_src_len, sg_dst_len);
- } else
+ } else {
op_len = sg_src_len;
+ }
/* The data operation length will be at least block_size in length
* or the smaller of available sg room remaining for the source or
@@ -1131,9 +1130,9 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
if (ret)
goto e_ctx;
- if (in_place)
+ if (in_place) {
dst = src;
- else {
+ } else {
ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
AES_BLOCK_SIZE, DMA_FROM_DEVICE);
if (ret)
@@ -1304,9 +1303,9 @@ static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
if (ret)
goto e_ctx;
- if (in_place)
+ if (in_place) {
dst = src;
- else {
+ } else {
ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
unit_size, DMA_FROM_DEVICE);
if (ret)
@@ -1451,8 +1450,9 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
goto e_ctx;
}
memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
- } else
+ } else {
ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
+ }
ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
CCP_PASSTHRU_BYTESWAP_256BIT);
@@ -1732,9 +1732,9 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
if (ret)
goto e_mask;
- if (in_place)
+ if (in_place) {
dst = src;
- else {
+ } else {
ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
if (ret)
@@ -1974,7 +1974,7 @@ static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
src.address += CCP_ECC_OPERAND_SIZE;
/* Set the first point Z coordianate to 1 */
- *(src.address) = 0x01;
+ *src.address = 0x01;
src.address += CCP_ECC_OPERAND_SIZE;
if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
@@ -1989,7 +1989