aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/crypto/api.rst9
-rw-r--r--Documentation/crypto/architecture.rst31
-rw-r--r--Documentation/devicetree/bindings/crypto/arm-cryptocell.txt8
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-dcp.txt2
-rw-r--r--MAINTAINERS9
-rw-r--r--arch/arm/crypto/Kconfig16
-rw-r--r--arch/arm/crypto/Makefile6
-rw-r--r--arch/arm/crypto/aes-ce-glue.c1
-rw-r--r--arch/arm/crypto/aes-cipher-core.S62
-rw-r--r--arch/arm/crypto/chacha-neon-core.S (renamed from arch/arm/crypto/chacha20-neon-core.S)98
-rw-r--r--arch/arm/crypto/chacha-neon-glue.c201
-rw-r--r--arch/arm/crypto/chacha20-neon-glue.c127
-rw-r--r--arch/arm/crypto/nh-neon-core.S116
-rw-r--r--arch/arm/crypto/nhpoly1305-neon-glue.c77
-rw-r--r--arch/arm64/crypto/Kconfig7
-rw-r--r--arch/arm64/crypto/Makefile7
-rw-r--r--arch/arm64/crypto/chacha-neon-core.S (renamed from arch/arm64/crypto/chacha20-neon-core.S)484
-rw-r--r--arch/arm64/crypto/chacha-neon-glue.c198
-rw-r--r--arch/arm64/crypto/chacha20-neon-glue.c133
-rw-r--r--arch/arm64/crypto/nh-neon-core.S103
-rw-r--r--arch/arm64/crypto/nhpoly1305-neon-glue.c77
-rw-r--r--arch/s390/crypto/aes_s390.c2
-rw-r--r--arch/sparc/crypto/aes_glue.c5
-rw-r--r--arch/sparc/crypto/camellia_glue.c5
-rw-r--r--arch/sparc/crypto/des_glue.c5
-rw-r--r--arch/x86/crypto/Makefile18
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S2125
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c353
-rw-r--r--arch/x86/crypto/chacha-avx2-x86_64.S1025
-rw-r--r--arch/x86/crypto/chacha-avx512vl-x86_64.S836
-rw-r--r--arch/x86/crypto/chacha-ssse3-x86_64.S (renamed from arch/x86/crypto/chacha20-ssse3-x86_64.S)327
-rw-r--r--arch/x86/crypto/chacha20-avx2-x86_64.S448
-rw-r--r--arch/x86/crypto/chacha20_glue.c146
-rw-r--r--arch/x86/crypto/chacha_glue.c304
-rw-r--r--arch/x86/crypto/nh-avx2-x86_64.S157
-rw-r--r--arch/x86/crypto/nh-sse2-x86_64.S123
-rw-r--r--arch/x86/crypto/nhpoly1305-avx2-glue.c77
-rw-r--r--arch/x86/crypto/nhpoly1305-sse2-glue.c76
-rw-r--r--arch/x86/crypto/poly1305_glue.c20
-rw-r--r--crypto/Kconfig99
-rw-r--r--crypto/Makefile8
-rw-r--r--crypto/ablkcipher.c94
-rw-r--r--crypto/acompress.c10
-rw-r--r--crypto/adiantum.c664
-rw-r--r--crypto/aead.c14
-rw-r--r--crypto/aes_generic.c9
-rw-r--r--crypto/aes_ti.c18
-rw-r--r--crypto/ahash.c29
-rw-r--r--crypto/akcipher.c11
-rw-r--r--crypto/algapi.c247
-rw-r--r--crypto/blkcipher.c20
-rw-r--r--crypto/cfb.c2
-rw-r--r--crypto/chacha20_generic.c137
-rw-r--r--crypto/chacha20poly1305.c12
-rw-r--r--crypto/chacha_generic.c217
-rw-r--r--crypto/cryptd.c4
-rw-r--r--crypto/crypto_user_base.c136
-rw-r--r--crypto/crypto_user_stat.c301
-rw-r--r--crypto/ctr.c2
-rw-r--r--crypto/ecc.c58
-rw-r--r--crypto/hash_info.c4
-rw-r--r--crypto/kpp.c10
-rw-r--r--crypto/lz4.c1
-rw-r--r--crypto/lz4hc.c1
-rw-r--r--crypto/nhpoly1305.c254
-rw-r--r--crypto/pcrypt.c2
-rw-r--r--crypto/poly1305_generic.c174
-rw-r--r--crypto/rng.c16
-rw-r--r--crypto/salsa20_generic.c2
-rw-r--r--crypto/scompress.c11
-rw-r--r--crypto/shash.c12
-rw-r--r--crypto/skcipher.c23
-rw-r--r--crypto/streebog_generic.c1140
-rw-r--r--crypto/tcrypt.c59
-rw-r--r--crypto/testmgr.c62
-rw-r--r--crypto/testmgr.h3220
-rw-r--r--drivers/block/drbd/drbd_receiver.c2
-rw-r--r--drivers/char/hw_random/bcm2835-rng.c7
-rw-r--r--drivers/char/random.c51
-rw-r--r--drivers/crypto/Kconfig4
-rw-r--r--drivers/crypto/amcc/crypto4xx_alg.c3
-rw-r--r--drivers/crypto/bcm/cipher.c9
-rw-r--r--drivers/crypto/caam/caamalg.c266
-rw-r--r--drivers/crypto/caam/caamalg_desc.c139
-rw-r--r--drivers/crypto/caam/caamalg_desc.h5
-rw-r--r--drivers/crypto/caam/caamalg_qi.c37
-rw-r--r--drivers/crypto/caam/caamalg_qi2.c156
-rw-r--r--drivers/crypto/caam/caamhash.c20
-rw-r--r--drivers/crypto/caam/caampkc.c10
-rw-r--r--drivers/crypto/caam/caamrng.c10
-rw-r--r--drivers/crypto/caam/compat.h2
-rw-r--r--drivers/crypto/caam/ctrl.c28
-rw-r--r--drivers/crypto/caam/desc.h28
-rw-r--r--drivers/crypto/caam/desc_constr.h7
-rw-r--r--drivers/crypto/caam/regs.h74
-rw-r--r--drivers/crypto/cavium/nitrox/Makefile5
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_aead.c364
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_algs.c456
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_common.h6
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_csr.h12
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_debugfs.c48
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_debugfs.h21
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_dev.h74
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_hal.c114
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_hal.h2
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_isr.c92
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_isr.h2
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_lib.c22
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_main.c3
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_mbx.c204
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_mbx.h9
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_req.h326
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_reqmgr.c302
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_skcipher.c498
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_sriov.c94
-rw-r--r--drivers/crypto/ccp/ccp-crypto-aes-cmac.c4
-rw-r--r--drivers/crypto/ccree/cc_aead.c35
-rw-r--r--drivers/crypto/ccree/cc_cipher.c104
-rw-r--r--drivers/crypto/ccree/cc_crypto_ctx.h4
-rw-r--r--drivers/crypto/ccree/cc_driver.c50
-rw-r--r--drivers/crypto/ccree/cc_driver.h15
-rw-r--r--drivers/crypto/ccree/cc_hash.c189
-rw-r--r--drivers/crypto/ccree/cc_hw_queue_defs.h30
-rw-r--r--drivers/crypto/chelsio/chcr_algo.c418
-rw-r--r--drivers/crypto/chelsio/chcr_algo.h2
-rw-r--r--drivers/crypto/chelsio/chcr_core.c195
-rw-r--r--drivers/crypto/chelsio/chcr_core.h44
-rw-r--r--drivers/crypto/chelsio/chcr_crypto.h10
-rw-r--r--drivers/crypto/chelsio/chcr_ipsec.c183
-rw-r--r--drivers/crypto/geode-aes.c2
-rw-r--r--drivers/crypto/inside-secure/safexcel_cipher.c8
-rw-r--r--drivers/crypto/ixp4xx_crypto.c5
-rw-r--r--drivers/crypto/mxc-scc.c12
-rw-r--r--drivers/crypto/mxs-dcp.c28
-rw-r--r--drivers/crypto/nx/nx-aes-ctr.c1
-rw-r--r--drivers/crypto/omap-aes.c3
-rw-r--r--drivers/crypto/omap-des.c1
-rw-r--r--drivers/crypto/picoxcell_crypto.c3
-rw-r--r--drivers/crypto/qce/ablkcipher.c1
-rw-r--r--drivers/crypto/qce/sha.c1
-rw-r--r--drivers/crypto/sahara.c1
-rw-r--r--drivers/crypto/talitos.c1
-rw-r--r--drivers/crypto/ux500/cryp/cryp_core.c4
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c2
-rw-r--r--drivers/md/dm-crypt.c2
-rw-r--r--drivers/md/dm-integrity.c2
-rw-r--r--drivers/net/wireless/cisco/airo.c2
-rw-r--r--drivers/net/wireless/intersil/orinoco/mic.c6
-rw-r--r--drivers/staging/rtl8192e/rtllib_crypt_ccmp.c2
-rw-r--r--drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_ccmp.c2
-rw-r--r--drivers/usb/wusbcore/crypto.c2
-rw-r--r--fs/ubifs/auth.c5
-rw-r--r--include/crypto/acompress.h38
-rw-r--r--include/crypto/aead.h41
-rw-r--r--include/crypto/akcipher.h74
-rw-r--r--include/crypto/chacha.h54
-rw-r--r--include/crypto/chacha20.h27
-rw-r--r--include/crypto/hash.h32
-rw-r--r--include/crypto/hash_info.h1
-rw-r--r--include/crypto/internal/cryptouser.h9
-rw-r--r--include/crypto/internal/skcipher.h2
-rw-r--r--include/crypto/kpp.h48
-rw-r--r--include/crypto/nhpoly1305.h74
-rw-r--r--include/crypto/poly1305.h28
-rw-r--r--include/crypto/rng.h27
-rw-r--r--include/crypto/skcipher.h49
-rw-r--r--include/crypto/streebog.h34
-rw-r--r--include/linux/crypto.h331
-rw-r--r--include/uapi/linux/cryptouser.h102
-rw-r--r--include/uapi/linux/hash_info.h2
-rw-r--r--kernel/padata.c2
-rw-r--r--lib/Makefile2
-rw-r--r--lib/chacha.c (renamed from lib/chacha20.c)59
-rw-r--r--net/bluetooth/smp.c8
-rw-r--r--net/mac80211/wep.c4
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c2
-rw-r--r--net/wireless/lib80211_crypt_tkip.c4
-rw-r--r--net/wireless/lib80211_crypt_wep.c4
-rw-r--r--security/apparmor/crypto.c2
-rw-r--r--security/integrity/evm/evm_crypto.c3
-rw-r--r--security/keys/encrypted-keys/encrypted.c4
-rw-r--r--security/keys/trusted.c4
-rw-r--r--tools/crypto/getstat.c72
183 files changed, 15845 insertions, 5096 deletions
diff --git a/Documentation/crypto/api.rst b/Documentation/crypto/api.rst
index 2e519193ab4a..b91b31736df8 100644
--- a/Documentation/crypto/api.rst
+++ b/Documentation/crypto/api.rst
@@ -1,15 +1,6 @@
Programming Interface
=====================
-Please note that the kernel crypto API contains the AEAD givcrypt API
-(crypto_aead_giv\* and aead_givcrypt\* function calls in
-include/crypto/aead.h). This API is obsolete and will be removed in the
-future. To obtain the functionality of an AEAD cipher with internal IV
-generation, use the IV generator as a regular cipher. For example,
-rfc4106(gcm(aes)) is the AEAD cipher with external IV generation and
-seqniv(rfc4106(gcm(aes))) implies that the kernel crypto API generates
-the IV. Different IV generators are available.
-
.. class:: toc-title
Table of contents
diff --git a/Documentation/crypto/architecture.rst b/Documentation/crypto/architecture.rst
index ca2d09b991f5..ee8ff0762d7f 100644
--- a/Documentation/crypto/architecture.rst
+++ b/Documentation/crypto/architecture.rst
@@ -157,10 +157,6 @@ applicable to a cipher, it is not displayed:
- rng for random number generator
- - givcipher for cipher with associated IV generator (see the geniv
- entry below for the specification of the IV generator type used by
- the cipher implementation)
-
- kpp for a Key-agreement Protocol Primitive (KPP) cipher such as
an ECDH or DH implementation
@@ -174,16 +170,7 @@ applicable to a cipher, it is not displayed:
- digestsize: output size of the message digest
-- geniv: IV generation type:
-
- - eseqiv for encrypted sequence number based IV generation
-
- - seqiv for sequence number based IV generation
-
- - chainiv for chain iv generation
-
- - <builtin> is a marker that the cipher implements IV generation and
- handling as it is specific to the given cipher
+- geniv: IV generator (obsolete)
Key Sizes
---------
@@ -218,10 +205,6 @@ the aforementioned cipher types:
- CRYPTO_ALG_TYPE_ABLKCIPHER Asynchronous multi-block cipher
-- CRYPTO_ALG_TYPE_GIVCIPHER Asynchronous multi-block cipher packed
- together with an IV generator (see geniv field in the /proc/crypto
- listing for the known IV generators)
-
- CRYPTO_ALG_TYPE_KPP Key-agreement Protocol Primitive (KPP) such as
an ECDH or DH implementation
@@ -338,18 +321,14 @@ uses the API applicable to the cipher type specified for the block.
The following call sequence is applicable when the IPSEC layer triggers
an encryption operation with the esp_output function. During
-configuration, the administrator set up the use of rfc4106(gcm(aes)) as
-the cipher for ESP. The following call sequence is now depicted in the
-ASCII art above:
+configuration, the administrator set up the use of seqiv(rfc4106(gcm(aes)))
+as the cipher for ESP. The following call sequence is now depicted in
+the ASCII art above:
1. esp_output() invokes crypto_aead_encrypt() to trigger an
encryption operation of the AEAD cipher with IV generator.
- In case of GCM, the SEQIV implementation is registered as GIVCIPHER
- in crypto_rfc4106_alloc().
-
- The SEQIV performs its operation to generate an IV where the core
- function is seqiv_geniv().
+ The SEQIV generates the IV.
2. Now, SEQIV uses the AEAD API function calls to invoke the associated
AEAD cipher. In our case, during the instantiation of SEQIV, the
diff --git a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
index 999fb2a810f6..6130e6eb4af8 100644
--- a/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
+++ b/Documentation/devicetree/bindings/crypto/arm-cryptocell.txt
@@ -1,8 +1,12 @@
Arm TrustZone CryptoCell cryptographic engine
Required properties:
-- compatible: Should be one of: "arm,cryptocell-712-ree",
- "arm,cryptocell-710-ree" or "arm,cryptocell-630p-ree".
+- compatible: Should be one of -
+ "arm,cryptocell-713-ree"
+ "arm,cryptocell-703-ree"
+ "arm,cryptocell-712-ree"
+ "arm,cryptocell-710-ree"
+ "arm,cryptocell-630p-ree"
- reg: Base physical address of the engine and length of memory mapped region.
- interrupts: Interrupt number for the device.
diff --git a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
index 76a0b4e80e83..4e4d387e38a5 100644
--- a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
@@ -6,6 +6,8 @@ Required properties:
- interrupts : Should contain MXS DCP interrupt numbers, VMI IRQ and DCP IRQ
must be supplied, optionally Secure IRQ can be present, but
is currently not implemented and not used.
+- clocks : Clock reference (only required on some SOCs: 6ull and 6sll).
+- clock-names : Must be "dcp".
Example:
diff --git a/MAINTAINERS b/MAINTAINERS
index db3f690eb590..7a9804a891fd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3484,6 +3484,7 @@ F: include/linux/spi/cc2520.h
F: Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
CCREE ARM TRUSTZONE CRYPTOCELL REE DRIVER
+M: Yael Chemla <yael.chemla@foss.arm.com>
M: Gilad Ben-Yossef <gilad@benyossef.com>
L: linux-crypto@vger.kernel.org
S: Supported
@@ -7147,7 +7148,9 @@ F: crypto/842.c
F: lib/842/
IBM Power in-Nest Crypto Acceleration
-M: Paulo Flabiano Smorigo <pfsmorigo@linux.ibm.com>
+M: Breno Leitão <leitao@debian.org>
+M: Nayna Jain <nayna@linux.ibm.com>
+M: Paulo Flabiano Smorigo <pfsmorigo@gmail.com>
L: linux-crypto@vger.kernel.org
S: Supported
F: drivers/crypto/nx/Makefile
@@ -7211,7 +7214,9 @@ S: Supported
F: drivers/scsi/ibmvscsi_tgt/
IBM Power VMX Cryptographic instructions
-M: Paulo Flabiano Smorigo <pfsmorigo@linux.ibm.com>
+M: Breno Leitão <leitao@debian.org>
+M: Nayna Jain <nayna@linux.ibm.com>
+M: Paulo Flabiano Smorigo <pfsmorigo@gmail.com>
L: linux-crypto@vger.kernel.org
S: Supported
F: drivers/crypto/vmx/Makefile
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index ef0c7feea6e2..a95322b59799 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -69,6 +69,15 @@ config CRYPTO_AES_ARM
help
Use optimized AES assembler routines for ARM platforms.
+ On ARM processors without the Crypto Extensions, this is the
+ fastest AES implementation for single blocks. For multiple
+ blocks, the NEON bit-sliced implementation is usually faster.
+
+ This implementation may be vulnerable to cache timing attacks,
+ since it uses lookup tables. However, as countermeasures it
+ disables IRQs and preloads the tables; it is hoped this makes
+ such attacks very difficult.
+
config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
@@ -117,9 +126,14 @@ config CRYPTO_CRC32_ARM_CE
select CRYPTO_HASH
config CRYPTO_CHACHA20_NEON
- tristate "NEON accelerated ChaCha20 symmetric cipher"
+ tristate "NEON accelerated ChaCha stream cipher algorithms"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
+config CRYPTO_NHPOLY1305_NEON
+ tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_NHPOLY1305
+
endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index bd5bceef0605..b65d6bfab8e6 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -9,7 +9,8 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
@@ -52,7 +53,8 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
ifdef REGENERATE_ARM_CRYPTO
quiet_cmd_perl = PERL $@
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index d0a9cec73707..5affb8482379 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -10,7 +10,6 @@
#include <asm/hwcap.h>
#include <asm/neon.h>
-#include <asm/hwcap.h>
#include <crypto/aes.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index 184d6c2d15d5..f2d67c095e59 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#include <asm/cache.h>
.text
@@ -41,7 +42,7 @@
.endif
.endm
- .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op
+ .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
__select \out0, \in0, 0
__select t0, \in1, 1
__load \out0, \out0, 0, \sz, \op
@@ -73,6 +74,14 @@
__load t0, t0, 3, \sz, \op
__load \t4, \t4, 3, \sz, \op
+ .ifnb \oldcpsr
+ /*
+ * This is the final round and we're done with all data-dependent table
+ * lookups, so we can safely re-enable interrupts.
+ */
+ restore_irqs \oldcpsr
+ .endif
+
eor \out1, \out1, t1, ror #24
eor \out0, \out0, t2, ror #16
ldm rk!, {t1, t2}
@@ -83,14 +92,14 @@
eor \out1, \out1, t2
.endm
- .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
__hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
- __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
.endm
- .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
__hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
- __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
.endm
.macro __rev, out, in
@@ -118,13 +127,14 @@
.macro do_crypt, round, ttab, ltab, bsz
push {r3-r11, lr}
+ // Load keys first, to reduce latency in case they're not cached yet.
+ ldm rk!, {r8-r11}
+
ldr r4, [in]
ldr r5, [in, #4]
ldr r6, [in, #8]
ldr r7, [in, #12]
- ldm rk!, {r8-r11}
-
#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
__rev r5, r5
@@ -138,6 +148,25 @@
eor r7, r7, r11
__adrl ttab, \ttab
+ /*
+ * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
+ * L1 cache, assuming cacheline size >= 32. This is a hardening measure
+ * intended to make cache-timing attacks more difficult. They may not
+ * be fully prevented, however; see the paper
+ * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
+ * ("Cache-timing attacks on AES") for a discussion of the many
+ * difficulties involved in writing truly constant-time AES software.
+ */
+ save_and_disable_irqs t0
+ .set i, 0
+ .rept 1024 / 128
+ ldr r8, [ttab, #i + 0]
+ ldr r9, [ttab, #i + 32]
+ ldr r10, [ttab, #i + 64]
+ ldr r11, [ttab, #i + 96]
+ .set i, i + 128
+ .endr
+ push {t0} // oldcpsr
tst rounds, #2
bne 1f
@@ -151,8 +180,21 @@
\round r4, r5, r6, r7, r8, r9, r10, r11
b 0b
-2: __adrl ttab, \ltab
- \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b
+2: .ifb \ltab
+ add ttab, ttab, #1
+ .else
+ __adrl ttab, \ltab
+ // Prefetch inverse S-box for final round; see explanation above
+ .set i, 0
+ .rept 256 / 64
+ ldr t0, [ttab, #i + 0]
+ ldr t1, [ttab, #i + 32]
+ .set i, i + 64
+ .endr
+ .endif
+
+ pop {rounds} // oldcpsr
+ \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
#ifdef CONFIG_CPU_BIG_ENDIAN
__rev r4, r4
@@ -175,7 +217,7 @@
.endm
ENTRY(__aes_arm_encrypt)
- do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
+ do_crypt fround, crypto_ft_tab,, 2
ENDPROC(__aes_arm_encrypt)
.align 5
diff --git a/arch/arm/crypto/chacha20-neon-core.S b/arch/arm/crypto/chacha-neon-core.S
index 50e7b9896818..eb22926d4912 100644
--- a/arch/arm/crypto/chacha20-neon-core.S
+++ b/arch/arm/crypto/chacha-neon-core.S
@@ -1,5 +1,5 @@
/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
+ * ChaCha/XChaCha NEON helper functions
*
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
@@ -27,9 +27,9 @@
* (d) vtbl.8 + vtbl.8 (multiple of 8 bits rotations only,
* needs index vector)
*
- * ChaCha20 has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit
- * rotations, the only choices are (a) and (b). We use (a) since it takes
- * two-thirds the cycles of (b) on both Cortex-A7 and Cortex-A53.
+ * ChaCha has 16, 12, 8, and 7-bit rotations. For the 12 and 7-bit rotations,
+ * the only choices are (a) and (b). We use (a) since it takes two-thirds the
+ * cycles of (b) on both Cortex-A7 and Cortex-A53.
*
* For the 16-bit rotation, we use vrev32.16 since it's consistently fastest
* and doesn't need a temporary register.
@@ -52,30 +52,20 @@
.fpu neon
.align 5
-ENTRY(chacha20_block_xor_neon)
- // r0: Input state matrix, s
- // r1: 1 data block output, o
- // r2: 1 data block input, i
-
- //
- // This function encrypts one ChaCha20 block by loading the state matrix
- // in four NEON registers. It performs matrix operation on four words in
- // parallel, but requireds shuffling to rearrange the words after each
- // round.
- //
-
- // x0..3 = s0..3
- add ip, r0, #0x20
- vld1.32 {q0-q1}, [r0]
- vld1.32 {q2-q3}, [ip]
-
- vmov q8, q0
- vmov q9, q1
- vmov q10, q2
- vmov q11, q3
+/*
+ * chacha_permute - permute one block
+ *
+ * Permute one 64-byte block where the state matrix is stored in the four NEON
+ * registers q0-q3. It performs matrix operations on four words in parallel,
+ * but requires shuffling to rearrange the words after each round.
+ *
+ * The round count is given in r3.
+ *
+ * Clobbers: r3, ip, q4-q5
+ */
+chacha_permute:
adr ip, .Lrol8_table
- mov r3, #10
vld1.8 {d10}, [ip, :64]
.Ldoubleround:
@@ -139,9 +129,31 @@ ENTRY(chacha20_block_xor_neon)
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
vext.8 q3, q3, q3, #4
- subs r3, r3, #1
+ subs r3, r3, #2
bne .Ldoubleround
+ bx lr
+ENDPROC(chacha_permute)
+
+ENTRY(chacha_block_xor_neon)
+ // r0: Input state matrix, s
+ // r1: 1 data block output, o
+ // r2: 1 data block input, i
+ // r3: nrounds
+ push {lr}
+
+ // x0..3 = s0..3
+ add ip, r0, #0x20
+ vld1.32 {q0-q1}, [r0]
+ vld1.32 {q2-q3}, [ip]
+
+ vmov q8, q0
+ vmov q9, q1
+ vmov q10, q2
+ vmov q11, q3
+
+ bl chacha_permute
+
add ip, r2, #0x20
vld1.8 {q4-q5}, [r2]
vld1.8 {q6-q7}, [ip]
@@ -166,15 +178,33 @@ ENTRY(chacha20_block_xor_neon)
vst1.8 {q0-q1}, [r1]
vst1.8 {q2-q3}, [ip]
- bx lr
-ENDPROC(chacha20_block_xor_neon)
+ pop {pc}
+ENDPROC(chacha_block_xor_neon)
+
+ENTRY(hchacha_block_neon)
+ // r0: Input state matrix, s
+ // r1: output (8 32-bit words)
+ // r2: nrounds
+ push {lr}
+
+ vld1.32 {q0-q1}, [r0]!
+ vld1.32 {q2-q3}, [r0]
+
+ mov r3, r2
+ bl chacha_permute
+
+ vst1.32 {q0}, [r1]!
+ vst1.32 {q3}, [r1]
+
+ pop {pc}
+ENDPROC(hchacha_block_neon)
.align 4
.Lctrinc: .word 0, 1, 2, 3
.Lrol8_table: .byte 3, 0, 1, 2, 7, 4, 5, 6
.align 5
-ENTRY(chacha20_4block_xor_neon)
+ENTRY(chacha_4block_xor_neon)
push {r4-r5}
mov r4, sp // preserve the stack pointer
sub ip, sp, #0x20 // allocate a 32 byte buffer
@@ -184,9 +214,10 @@ ENTRY(chacha20_4block_xor_neon)
// r0: Input state matrix, s
// r1: 4 data blocks output, o
// r2: 4 data blocks input, i
+ // r3: nrounds
//
- // This function encrypts four consecutive ChaCha20 blocks by loading
+ // This function encrypts four consecutive ChaCha blocks by loading
// the state matrix in NEON registers four times. The algorithm performs
// each operation on the corresponding word of each state matrix, hence
// requires no word shuffling. The words are re-interleaved before the
@@ -219,7 +250,6 @@ ENTRY(chacha20_4block_xor_neon)
vdup.32 q0, d0[0]
adr ip, .Lrol8_table
- mov r3, #10
b 1f
.Ldoubleround4:
@@ -417,7 +447,7 @@ ENTRY(chacha20_4block_xor_neon)
vsri.u32 q5, q8, #25
vsri.u32 q6, q9, #25
- subs r3, r3, #1
+ subs r3, r3, #2
bne .Ldoubleround4
// x0..7[0-3] are in q0-q7, x10..15[0-3] are in q10-q15.
@@ -527,4 +557,4 @@ ENTRY(chacha20_4block_xor_neon)
pop {r4-r5}
bx lr
-ENDPROC(chacha20_4block_xor_neon)
+ENDPROC(chacha_4block_xor_neon)
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
new file mode 100644
index 000000000000..9d6fda81986d
--- /dev/null
+++ b/arch/arm/crypto/chacha-neon-glue.c
@@ -0,0 +1,201 @@
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ u8 buf[CHACHA_BLOCK_SIZE];
+
+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+ chacha_4block_xor_neon(state, dst, src, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 4;
+ src += CHACHA_BLOCK_SIZE * 4;
+ dst += CHACHA_BLOCK_SIZE * 4;
+ state[12] += 4;
+ }
+ while (bytes >= CHACHA_BLOCK_SIZE) {
+ chacha_block_xor_neon(state, dst, src, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE;
+ src += CHACHA_BLOCK_SIZE;
+ dst += CHACHA_BLOCK_SIZE;
+ state[12]++;
+ }
+ if (bytes) {
+ memcpy(buf, src, bytes);
+ chacha_block_xor_neon(state, buf, buf, nrounds);
+ memcpy(dst, buf, bytes);
+ }
+}
+
+static int chacha_neon_stream_xor(struct skcipher_request *req,
+ struct chacha_ctx *ctx, u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ crypto_chacha_init(state, ctx, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ kernel_neon_begin();
+ chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, ctx->nrounds);
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int chacha_neon(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+ return crypto_chacha_crypt(req);
+
+ return chacha_neon_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+ return crypto_xchacha_crypt(req);
+
+ crypto_chacha_init(state, ctx, req->iv);
+
+ kernel_neon_begin();
+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
+ kernel_neon_end();
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_neon_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = chacha_neon,
+ .decrypt = chacha_neon,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha12_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_NEON))
+ return -ENODEV;
+
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm/crypto/chacha20-neon-glue.c b/arch/arm/crypto/chacha20-neon-glue.c
deleted file mode 100644
index 59a7be08e80c..000000000000
--- a/arch/arm/crypto/chacha20-neon-glue.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
-{
- u8 buf[CHACHA20_BLOCK_SIZE];
-
- while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
- chacha20_4block_xor_neon(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE * 4;
- src += CHACHA20_BLOCK_SIZE * 4;
- dst += CHACHA20_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block_xor_neon(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE;
- src += CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha20_block_xor_neon(state, buf, buf);
- memcpy(dst, buf, bytes);
- }
-}
-
-static int chacha20_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
- return crypto_chacha20_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_chacha20_init(state, ctx, walk.iv);
-
- kernel_neon_begin();
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
- kernel_neon_end();
-
- return err;
-}
-
-static struct skcipher_alg alg = {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA20_KEY_SIZE,
- .max_keysize = CHACHA20_KEY_SIZE,
- .ivsize = CHACHA20_IV_SIZE,
- .chunksize = CHACHA20_BLOCK_SIZE,
- .walksize = 4 * CHACHA20_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha20_neon,
- .decrypt = chacha20_neon,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
-
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S
new file mode 100644
index 000000000000..434d80ab531c
--- /dev/null
+++ b/arch/arm/crypto/nh-neon-core.S
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NH - ε-almost-universal hash function, NEON accelerated version
+ *
+ * Copyright 2018 Google LLC
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+ .text
+ .fpu neon
+
+ KEY .req r0
+ MESSAGE .req r1
+ MESSAGE_LEN .req r2
+ HASH .req r3
+
+ PASS0_SUMS .req q0
+ PASS0_SUM_A .req d0
+ PASS0_SUM_B .req d1
+ PASS1_SUMS .req q1
+ PASS1_SUM_A .req d2
+ PASS1_SUM_B .req d3
+ PASS2_SUMS .req q2
+ PASS2_SUM_A .req d4
+ PASS2_SUM_B .req d5
+ PASS3_SUMS .req q3
+ PASS3_SUM_A .req d6
+ PASS3_SUM_B .req d7
+ K0 .req q4
+ K1 .req q5
+ K2 .req q6
+ K3 .req q7
+ T0 .req q8
+ T0_L .req d16
+ T0_H .req d17
+ T1 .req q9
+ T1_L .req d18
+ T1_H .req d19
+ T2 .req q10
+ T2_L .req d20
+ T2_H .req d21
+ T3 .req q11
+ T3_L .req d22
+ T3_H .req d23
+
+.macro _nh_stride k0, k1, k2, k3
+
+ // Load next message stride
+ vld1.8 {T3}, [MESSAGE]!
+
+ // Load next key stride
+ vld1.32 {\k3}, [KEY]!
+
+ // Add message words to key words
+ vadd.u32 T0, T3, \k0
+ vadd.u32 T1, T3, \k1
+ vadd.u32 T2, T3, \k2
+ vadd.u32 T3, T3, \k3
+
+ // Multiply 32x32 => 64 and accumulate
+ vmlal.u32 PASS0_SUMS, T0_L, T0_H
+ vmlal.u32 PASS1_SUMS, T1_L, T1_H
+ vmlal.u32 PASS2_SUMS, T2_L, T2_H
+ vmlal.u32 PASS3_SUMS, T3_L, T3_H
+.endm
+
+/*
+ * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
+ * u8 hash[NH_HASH_BYTES])
+ *
+ * It's guaranteed that message_len % 16 == 0.
+ */
+ENTRY(nh_neon)
+
+ vld1.32 {K0,K1}, [KEY]!
+ vmov.u64 PASS0_SUMS, #0
+ vmov.u64 PASS1_SUMS, #0
+ vld1.32 {K2}, [KEY]!
+ vmov.u64 PASS2_SUMS, #0
+ vmov.u64 PASS3_SUMS, #0
+
+ subs MESSAGE_LEN, MESSAGE_LEN, #64
+ blt .Lloop4_done
+.Lloop4:
+ _nh_stride K0, K1, K2, K3
+ _nh_stride K1, K2, K3, K0
+ _nh_stride K2, K3, K0, K1
+ _nh_stride K3, K0, K1, K2
+ subs MESSAGE_LEN, MESSAGE_LEN, #64
+ bge .Lloop4
+
+.Lloop4_done:
+ ands MESSAGE_LEN, MESSAGE_LEN, #63
+ beq .Ldone
+ _nh_stride K0, K1, K2, K3
+
+ subs MESSAGE_LEN, MESSAGE_LEN, #16
+ beq .Ldone
+ _nh_stride K1, K2, K3, K0
+
+ subs MESSAGE_LEN, MESSAGE_LEN, #16
+ beq .Ldone
+ _nh_stride K2, K3, K0, K1
+
+.Ldone:
+ // Sum the accumulators for each pass, then store the sums to 'hash'
+ vadd.u64 T0_L, PASS0_SUM_A, PASS0_SUM_B
+ vadd.u64 T0_H, PASS1_SUM_A, PASS1_SUM_B
+ vadd.u64 T1_L, PASS2_SUM_A, PASS2_SUM_B
+ vadd.u64 T1_H, PASS3_SUM_A, PASS3_SUM_B
+ vst1.8 {T0-T1}, [HASH]
+ bx lr
+ENDPROC(nh_neon)
diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c
new file mode 100644
index 000000000000..49aae87cb2bc
--- /dev/null
+++ b/arch/arm/crypto/nhpoly1305-neon-glue.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ * (NEON accelerated version)
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/module.h>
+
+asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
+ u8 hash[NH_HASH_BYTES]);
+
+/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
+static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
+ __le64 hash[NH_NUM_PASSES])
+{
+ nh_neon(key, message, message_len, (u8 *)hash);
+}
+
+static int nhpoly1305_neon_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ if (srclen < 64 || !may_use_simd())
+ return crypto_nhpoly1305_update(desc, src, srclen);
+
+ do {
+ unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+
+ kernel_neon_begin();
+ crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+ kernel_neon_end();
+ src += n;
+ srclen -= n;
+ } while (srclen);
+ return 0;
+}
+
+static struct shash_alg nhpoly1305_alg = {
+ .base.cra_name = "nhpoly1305",
+ .base.cra_driver_name = "nhpoly1305-neon",
+ .base.cra_priority = 200,
+ .base.cra_ctxsize = sizeof(struct nhpoly1305_key),
+ .base.cra_module = THIS_MODULE,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .init = crypto_nhpoly1305_init,
+ .update = nhpoly1305_neon_update,
+ .final = crypto_nhpoly1305_final,
+ .setkey = crypto_nhpoly1305_setkey,
+ .descsize = sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_NEON))
+ return -ENODEV;
+
+ return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-neon");
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index a5606823ed4d..d9a523ecdd83 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -101,11 +101,16 @@ config CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_SIMD
config CRYPTO_CHACHA20_NEON
- tristate "NEON accelerated ChaCha20 symmetric cipher"
+ tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
depends on KERNEL_MODE_NEON
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
+config CRYPTO_NHPOLY1305_NEON
+ tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_NHPOLY1305
+
config CRYPTO_AES_ARM64_BS
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index f476fede09ba..a4ffd9fe3265 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -50,8 +50,11 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
sha512-arm64-y := sha512-glue.o sha512-core.o
-obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
-chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+
+obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
+nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
diff --git a/arch/arm64/crypto/chacha20-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S
index 13c85e272c2a..021bb9e9784b 100644
--- a/arch/arm64/crypto/chacha20-neon-core.S
+++ b/arch/arm64/crypto/chacha-neon-core.S
@@ -1,13 +1,13 @@
/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
+ * ChaCha/XChaCha NEON helper functions
*
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2016-2018 Linaro, Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
- * Based on:
+ * Originally based on:
* ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
*
* Copyright (C) 2015 Martin Willi
@@ -19,29 +19,27 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
.text
.align 6
-ENTRY(chacha20_block_xor_neon)
- // x0: Input state matrix, s
- // x1: 1 data block output, o
- // x2: 1 data block input, i
-
- //
- // This function encrypts one ChaCha20 block by loading the state matrix
- // in four NEON registers. It performs matrix operation on four words in
- // parallel, but requires shuffling to rearrange the words after each
- // round.
- //
-
- // x0..3 = s0..3
- adr x3, ROT8
- ld1 {v0.4s-v3.4s}, [x0]
- ld1 {v8.4s-v11.4s}, [x0]
- ld1 {v12.4s}, [x3]
+/*
+ * chacha_permute - permute one block
+ *
+ * Permute one 64-byte block where the state matrix is stored in the four NEON
+ * registers v0-v3. It performs matrix operations on four words in parallel,
+ * but requires shuffling to rearrange the words after each round.
+ *
+ * The round count is given in w3.
+ *
+ * Clobbers: w3, x10, v4, v12
+ */
+chacha_permute:
- mov x3, #10
+ adr_l x10, ROT8
+ ld1 {v12.4s}, [x10]
.Ldoubleround:
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
@@ -102,9 +100,27 @@ ENTRY(chacha20_block_xor_neon)
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
ext v3.16b, v3.16b, v3.16b, #4
- subs x3, x3, #1
+ subs w3, w3, #2
b.ne .Ldoubleround
+ ret
+ENDPROC(chacha_permute)
+
+ENTRY(chacha_block_xor_neon)
+ // x0: Input state matrix, s
+ // x1: 1 data block output, o
+ // x2: 1 data block input, i
+ // w3: nrounds
+
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ // x0..3 = s0..3
+ ld1 {v0.4s-v3.4s}, [x0]
+ ld1 {v8.4s-v11.4s}, [x0]
+
+ bl chacha_permute
+
ld1 {v4.16b-v7.16b}, [x2]
// o0 = i0 ^ (x0 + s0)
@@ -125,71 +141,156 @@ ENTRY(chacha20_block_xor_neon)
st1 {v0.16b-v3.16b}, [x1]
+ ldp x29, x30, [sp], #16
ret
-ENDPROC(chacha20_block_xor_neon)
+ENDPROC(chacha_block_xor_neon)
+
+ENTRY(hchacha_block_neon)
+ // x0: Input state matrix, s
+ // x1: output (8 32-bit words)
+ // w2: nrounds
+
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ ld1 {v0.4s-v3.4s}, [x0]
+
+ mov w3, w2
+ bl chacha_permute
+
+ st1 {v0.16b}, [x1], #16
+ st1 {v3.16b}, [x1]
+
+ ldp x29, x30, [sp], #16
+ ret
+ENDPROC(hchacha_block_neon)
+
+ a0 .req w12
+ a1 .req w13
+ a2 .req w14
+ a3 .req w15
+ a4 .req w16
+ a5 .req w17
+ a6 .req w19
+ a7 .req w20
+ a8 .req w21
+ a9 .req w22
+ a10 .req w23
+ a11 .req w24
+ a12 .req w25
+ a13 .req w26
+ a14 .req w27
+ a15 .req w28
.align 6
-ENTRY(chacha20_4block_xor_neon)
+ENTRY(chacha_4block_xor_neon)
+ frame_push 10
+
// x0: Input state matrix, s
// x1: 4 data blocks output, o
// x2: 4 data blocks input, i
+ // w3: nrounds
+ // x4: byte count
+
+ adr_l x10, .Lpermute
+ and x5, x4, #63
+ add x10, x10, x5
+ add x11, x10, #64
//
- // This function encrypts four consecutive ChaCha20 blocks by loading
+ // This function encrypts four consecutive ChaCha blocks by loading
// the state matrix in NEON registers four times. The algorithm performs
// each operation on the corresponding word of each state matrix, hence
// requires no word shuffling. For final XORing step we transpose the
// matrix by interleaving 32- and then 64-bit words, which allows us to
// do XOR in NEON registers.
//
- adr x3, CTRINC // ... and ROT8
- ld1 {v30.4s-v31.4s}, [x3]
+ // At the same time, a fifth block is encrypted in parallel using
+ // scalar registers
+ //
+ adr_l x9, CTRINC // ... and ROT8
+ ld1 {v30.4s-v31.4s}, [x9]
// x0..15[0-3] = s0..3[0..3]
- mov x4, x0
- ld4r { v0.4s- v3.4s}, [x4], #16
- ld4r { v4.4s- v7.4s}, [x4], #16
- ld4r { v8.4s-v11.4s}, [x4], #16
- ld4r {v12.4s-v15.4s}, [x4]
-
- // x12 += counter values 0-3
+ add x8, x0, #16
+ ld4r { v0.4s- v3.4s}, [x0]
+ ld4r { v4.4s- v7.4s}, [x8], #16
+ ld4r { v8.4s-v11.4s}, [x8], #16
+ ld4r {v12.4s-v15.4s}, [x8]
+
+ mov a0, v0.s[0]
+ mov a1, v1.s[0]
+ mov a2, v2.s[0]
+ mov a3, v3.s[0]
+ mov a4, v4.s[0]
+ mov a5, v5.s[0]
+ mov a6, v6.s[0]
+ mov a7, v7.s[0]
+ mov a8, v8.s[0]
+ mov a9, v9.s[0]
+ mov a10, v10.s[0]
+ mov a11, v11.s[0]
+ mov a12, v12.s[0]
+ mov a13, v13.s[0]
+ mov a14, v14.s[0]
+ mov a15, v15.s[0]
+
+ // x12 += counter values 1-4
add v12.4s, v12.4s, v30.4s
- mov x3, #10
-
.Ldoubleround4:
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
add v0.4s, v0.4s, v4.4s
+ add a0, a0, a4
add v1.4s, v1.4s, v5.4s
+ add a1, a1, a5
add v2.4s, v2.4s, v6.4s
+ add a2, a2, a6
add v3.4s, v3.4s, v7.4s
+ add a3, a3, a7
eor v12.16b, v12.16b, v0.16b
+ eor a12, a12, a0
eor v13.16b, v13.16b, v1.16b
+ eor a13, a13, a1
eor v14.16b, v14.16b, v2.16b
+ eor a14, a14, a2
eor v15.16b, v15.16b, v3.16b
+ eor a15, a15, a3
rev32 v12.8h, v12.8h
+ ror a12, a12, #16
rev32 v13.8h, v13.8h
+ ror a13, a13, #16
rev32 v14.8h, v14.8h
+ ror a14, a14, #16
rev32 v15.8h, v15.8h
+ ror a15, a15, #16
// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
add v8.4s, v8.4s, v12.4s
+ add a8, a8, a12
add v9.4s, v9.4s, v13.4s
+ add a9, a9, a13
add v10.4s, v10.4s, v14.4s
+ add a10, a10, a14
add v11.4s, v11.4s, v15.4s
+ add a11, a11, a15
eor v16.16b, v4.16b, v8.16b
+ eor a4, a4, a8
eor v17.16b, v5.16b, v9.16b
+ eor a5, a5, a9
eor v18.16b, v6.16b, v10.16b
+ eor a6, a6, a10
eor v19.16b, v7.16b, v11.16b
+ eor a7, a7, a11
shl v4.4s, v16.4s, #12
shl v5.4s, v17.4s, #12
@@ -197,42 +298,66 @@ ENTRY(chacha20_4block_xor_neon)
shl v7.4s, v19.4s, #12
sri v4.4s, v16.4s, #20
+ ror a4, a4, #20
sri v5.4s, v17.4s, #20
+ ror a5, a5, #20
sri v6.4s, v18.4s, #20
+ ror a6, a6, #20
sri v7.4s, v19.4s, #20
+ ror a7, a7, #20
// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
add v0.4s, v0.4s, v4.4s
+ add a0, a0, a4
add v1.4s, v1.4s, v5.4s
+ add a1, a1, a5
add v2.4s, v2.4s, v6.4s
+ add a2, a2, a6
add v3.4s, v3.4s, v7.4s
+ add a3, a3, a7
eor v12.16b, v12.16b, v0.16b
+ eor a12, a12, a0
eor v13.16b, v13.16b, v1.16b
+ eor a13, a13, a1
eor v14.16b, v14.16b, v2.16b
+ eor a14, a14, a2
eor v15.16b, v15.16b, v3.16b
+ eor a15, a15, a3
tbl v12.16b, {v12.16b}, v31.16b
+ ror a12, a12, #24
tbl v13.16b, {v13.16b}, v31.16b
+ ror a13, a13, #24
tbl v14.16b, {v14.16b}, v31.16b
+ ror a14, a14, #24
tbl v15.16b, {v15.16b}, v31.16b
+ ror a15, a15, #24
// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
add v8.4s, v8.4s, v12.4s
+ add a8, a8, a12
add v9.4s, v9.4s, v13.4s
+ add a9, a9, a13
add v10.4s, v10.4s, v14.4s
+ add a10, a10, a14
add v11.4s, v11.4s, v15.4s
+ add a11, a11, a15
eor v16.16b, v4.16b, v8.16b
+ eor a4, a4, a8
eor v17.16b, v5.16b, v9.16b
+ eor a5, a5, a9
eor v18.16b, v6.16b, v10.16b
+ eor a6, a6, a10
eor v19.16b, v7.16b, v11.16b
+ eor a7, a7, a11
shl v4.4s, v16.4s, #7
shl v5.4s, v17.4s, #7
@@ -240,42 +365,66 @@ ENTRY(chacha20_4block_xor_neon)
shl v7.4s, v19.4s, #7
sri v4.4s, v16.4s, #25
+ ror a4, a4, #25
sri v5.4s, v17.4s, #25
+ ror a5, a5, #25
sri v6.4s, v18.4s, #25
+ ror a6, a6, #25
sri v7.4s, v19.4s, #25
+ ror a7, a7, #25
// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
add v0.4s, v0.4s, v5.4s
+ add a0, a0, a5
add v1.4s, v1.4s, v6.4s
+ add a1, a1, a6
add v2.4s, v2.4s, v7.4s
+ add a2, a2, a7
add v3.4s, v3.4s, v4.4s
+ add a3, a3, a4
eor v15.16b, v15.16b, v0.16b
+ eor a15, a15, a0
eor v12.16b, v12.16b, v1.16b
+ eor a12, a12, a1
eor v13.16b, v13.16b, v2.16b
+ eor a13, a13, a2
eor v14.16b, v14.16b, v3.16b
+ eor a14, a14, a3
rev32 v15.8h, v15.8h
+ ror a15, a15, #16
rev32 v12.8h, v12.8h
+ ror a12, a12, #16
rev32 v13.8h, v13.8h
+ ror a13, a13, #16
rev32 v14.8h, v14.8h
+ ror a14, a14, #16
// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
add v10.4s, v10.4s, v15.4s
+ add a10, a10, a15
add v11.4s, v11.4s, v12.4s
+ add a11, a11, a12
add v8.4s, v8.4s, v13.4s
+ add a8, a8, a13
add v9.4s, v9.4s, v14.4s
+ add a9, a9, a14
eor v16.16b, v5.16b, v10.16b
+ eor a5, a5, a10
eor v17.16b, v6.16b, v11.16b
+ eor a6, a6, a11
eor v18.16b, v7.16b, v8.16b
+ eor a7, a7, a8
eor v19.16b, v4.16b, v9.16b
+ eor a4, a4, a9
shl v5.4s, v16.4s, #12
shl v6.4s, v17.4s, #12
@@ -283,42 +432,66 @@ ENTRY(chacha20_4block_xor_neon)
shl v4.4s, v19.4s, #12
sri v5.4s, v16.4s, #20
+ ror a5, a5, #20
sri v6.4s, v17.4s, #20
+ ror a6, a6, #20
sri v7.4s, v18.4s, #20
+ ror a7, a7, #20
sri v4.4s, v19.4s, #20
+ ror a4, a4, #20
// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
add v0.4s, v0.4s, v5.4s
+ add a0, a0, a5
add v1.4s, v1.4s, v6.4s
+ add a1, a1, a6
add v2.4s, v2.4s, v7.4s
+ add a2, a2, a7
add v3.4s, v3.4s, v4.4s
+ add a3, a3, a4
eor v15.16b, v15.16b, v0.16b
+ eor a15, a15, a0
eor v12.16b, v12.16b, v1.16b
+ eor a12, a12, a1
eor v13.16b, v13.16b, v2.16b
+ eor a13, a13, a2
eor v14.16b, v14.16b, v3.16b
+ eor a14, a14, a3
tbl v15.16b, {v15.16b}, v31.16b
+ ror a15, a15, #24
tbl v12.16b, {v12.16b}, v31.16b
+ ror a12, a12, #24
tbl v13.16b, {v13.16b}, v31.16b
+ ror a13, a13, #24
tbl v14.16b, {v14.16b}, v31.16b
+ ror a14, a14, #24
// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
add v10.4s, v10.4s, v15.4s
+ add a10, a10, a15
add v11.4s, v11.4s, v12.4s
+ add a11, a11, a12
add v8.4s, v8.4s, v13.4s
+ add a8, a8, a13
add v9.4s, v9.4s, v14.4s
+ add a9, a9, a14
eor v16.16b, v5.16b, v10.16b
+ eor a5, a5, a10
eor v17.16b, v6.16b, v11.16b
+ eor a6, a6, a11
eor v18.16b, v7.16b, v8.16b
+ eor a7, a7, a8
eor v19.16b, v4.16b, v9.16b
+ eor a4, a4, a9
shl v5.4s, v16.4s, #7
shl v6.4s, v17.4s, #7
@@ -326,11 +499,15 @@ ENTRY(chacha20_4block_xor_neon)
shl v4.4s, v19.4s, #7
sri v5.4s, v16.4s, #25
+ ror a5, a5, #25
sri v6.4s, v17.4s, #25
+ ror a6, a6, #25
sri v7.4s, v18.4s, #25
+ ror a7, a7, #25
sri v4.4s, v19.4s, #25
+ ror a4, a4, #25
- subs x3, x3, #1
+ subs w3, w3, #2
b.ne .Ldoubleround4
ld4r {v16.4s-v19.4s}, [x0], #16
@@ -344,9 +521,17 @@ ENTRY(chacha20_4block_xor_neon)
// x2[0-3] += s0[2]
// x3[0-3] += s0[3]
add v0.4s, v0.4s, v16.4s
+ mov w6, v16.s[0]
+ mov w7, v17.s[0]
add v1.4s, v1.4s, v17.4s
+ mov w8, v18.s[0]
+ mov w9, v19.s[0]
add v2.4s, v2.4s, v18.4s
+ add a0, a0, w6
+ add a1, a1, w7
add v3.4s, v3.4s, v19.4s
+ add a2, a2, w8
+ add a3, a3, w9
ld4r {v24.4s-v27.4s}, [x0], #16
ld4r {v28.4s-v31.4s}, [x0]
@@ -356,95 +541,304 @@ ENTRY(chacha20_4block_xor_neon)
// x6[0-3] += s1[2]
// x7[0-3] += s1[3]
add v4.4s, v4.4s, v20.4s
+ mov w6, v20.s[0]
+ mov w7, v21.s[0]
add v5.4s, v5.4s, v21.4s
+ mov w8, v22.s[0]
+ mov w9, v23.s[0]
add v6.4s, v6.4s, v22.4s
+ add a4, a4, w6
+ add a5, a5, w7
add v7.4s, v7.4s, v23.4s
+ add a6, a6, w8
+ add a7, a7, w9
// x8[0-3] += s2[0]
// x9[0-3] += s2[1]
// x10[0-3] += s2[2]
// x11[0-3] += s2[3]
add v8.4s, v8.4s, v24.4s
+ mov w6, v24.s[0]
+ mov w7, v25.s[0]
add v9.4s, v9.4s, v25.4s
+ mov w8, v26.s[0]
+ mov w9, v27.s[0]
add v10.4s, v10.4s, v26.4s
+ add a8, a8, w6
+ add a9, a9, w7
add v11.4s, v11.4s, v27.4s
+ add a10, a10, w8
+ add a11, a11, w9
// x12[0-3] += s3[0]
// x13[0-3] += s3[1]
// x14[0-3] += s3[2]
// x15[0-3] += s3[3]
add v12.4s, v12.4s, v28.4s
+ mov w6, v28.s[0]
+ mov w7, v29.s[0]
add v13.4s, v13.4s, v29.4s
+ mov w8, v30.s[0]
+ mov w9, v31.s[0]
add v14.4s, v14.4s, v30.4s
+ add a12, a12, w6
+ add a13, a13, w7
add v15.4s, v15.4s, v31.4s
+ add a14, a14, w8
+ add a15, a15, w9
// interleave 32-bit words in state n, n+1
+ ldp w6, w7, [x2], #64
zip1 v16.4s, v0.4s, v1.4s
+ ldp w8, w9, [x2, #-56]
+ eor a0, a0, w6
zip2 v17.4s, v0.4s, v1.4s
+ eor a1, a1, w7
zip1 v18.4s, v2.4s, v3.4s
+ eor a2, a2, w8
zip2 v19.4s, v2.4s, v3.4s
+ eor a3, a3, w9
+ ldp w6, w7, [x2, #-48]
zip1 v20.4s, v4.4s, v5.4s
+ ldp w8, w9, [x2, #-40]
+ eor a4, a4, w6
zip2 v21.4s, v4.4s, v5.4s
+ eor a5, a5, w7
zip1 v22.4s, v6.4s, v7.4s
+ eor a6, a6, w8
zip2 v23.4s, v6.4s, v7.4s
+ eor a7, a7, w9
+ ldp w6, w7, [x2, #-32]
zip1 v24.4s, v8.4s, v9.4s
+ ldp w8, w9, [x2, #-24]
+ eor a8, a8, w6
zip2 v25.4s, v8.4s, v9.4s
+ eor a9, a9, w7
zip1 v26.4s, v10.4s, v11.4s
+ eor a10, a10, w8
zip2 v27.4s, v10.4s, v11.4s
+ eor a11, a11, w9
+ ldp w6, w7, [x2, #-16]
zip1 v28.4s, v12.4s, v13.4s
+ ldp w8, w9, [x2, #-8]
+ eor a12, a12, w6
zip2 v29.4s, v12.4s, v13.4s
+ eor a13, a13, w7
zip1 v30.4s, v14.4s, v15.4s
+ eor a14, a14, w8
zip2 v31.4s, v14.4s, v15.4s
+ eor a15, a15, w9
+
+ mov x3, #64
+ subs x5, x4, #128
+ add x6, x5, x2
+ csel x3, x3, xzr, ge
+ csel x2, x2, x6, ge
// interleave 64-bit words in state n, n+2
zip1 v0.2d, v16.2d, v18.2d
zip2 v4.2d, v16.2d, v18.2d
+ stp a0, a1, [x1], #64
zip1 v8.2d, v17.2d, v19.2d
zip2 v12.2d, v17.2d, v19.2d
- ld1 {v16.16b-v19.16b}, [x2], #64
+ stp a2, a3, [x1, #-56]
+ ld1 {v16.16b-v19.16b}, [x2], x3
+
+ subs x6, x4, #192
+ ccmp x3, xzr, #4, lt
+ add x7, x6, x2
+ csel x3, x3, xzr, eq
+ csel x2, x2, x7, eq
zip1 v1.2d, v20.2d, v22.2d
zip2 v5.2d, v20.2d, v22.2d
+ stp a4, a5, [x1, #-48]
zip1 v9.2d, v21.2d, v23.2d
zip2 v13.2d, v21.2d, v23.2d
- ld1 {v20.16b-v23.16b}, [x2], #64
+ stp a6, a7, [x1, #-40]
+ ld1 {v20.16b-v23.16b}, [x2], x3
+
+ subs x7, x4, #256
+ ccmp x3, xzr, #4, lt
+ add x8, x7, x2
+ csel x3, x3, xzr, eq
+ csel x2, x2, x8, eq
zip1 v2.2d, v24.2d, v26.2d
zip2 v6.2d, v24.2d, v26.2d
+ stp a8, a9, [x1, #-32]
zip1 v10.2d, v25.2d, v27.2d
zip2 v14.2d, v25.2d, v27.2d
- ld1 {v24.16b-v27.16b}, [x2], #64
+ stp a10, a11, [x1, #-24]
+ ld1 {v24.16b-v27.16b}, [x2], x3
+
+ subs x8, x4, #320
+ ccmp x3, xzr, #4, lt
+ add x9, x8, x2
+ csel x2, x2, x9, eq
zip1 v3.2d, v28.2d, v30.2d
zip2 v7.2d, v28.2d, v30.2d
+ stp a12, a13, [x1, #-16]
zip1 v11.2d, v29.2d, v31.2d
zip2 v15.2d, v29.2d, v31.2d
+ stp a14, a15, [x1, #-8]
ld1 {v28.16b-v31.16b}, [x2]
// xor with corresponding input, write to output
+ tbnz x5, #63, 0f
eor v16.16b, v16.16b, v0.16b
eor v17.16b, v17.16b, v1.16b
eor v18.16b, v18.16b, v2.16b
eor v19.16b, v19.16b, v3.16b
+ st1 {v16.16b-v19.16b}, [x1], #64
+ cbz x5, .Lout
+
+ tbnz x6, #63, 1f
eor v20.16b, v20.16b, v4.16b
eor v21.16b, v21.16b, v5.16b
- st1 {v16.16b-v19.16b}, [x1], #64
eor v22.16b, v22.16b, v6.16b
eor v23.16b, v23.16b, v7.16b
+ st1 {v20.16b-v23.16b}, [x1], #64
+ cbz x6, .Lout
+
+ tbnz x7, #63, 2f
eor v24.16b, v24.16b, v8.16b
eor v25.16b, v25.16b, v9.16b
- st1 {v20.16b-v23.16b}, [x1], #64
eor v26.16b, v26.16b, v10.16b
eor v27.16b, v27.16b, v11.16b
- eor v28.16b, v28.16b, v12.16b
st1 {v24.16b-v27.16b}, [x1], #64
+ cbz x7, .Lout
+
+ tbnz x8, #63, 3f
+ eor v28.16b, v28.16b, v12.16b
eor v29.16b, v29.16b, v13.16b
eor v30.16b, v30.16b, v14.16b
eor v31.16b, v31.16b, v15.16b
st1 {v28.16b-v31.16b}, [x1]
+.Lout: frame_pop
ret
-ENDPROC(chacha20_4block_xor_neon)
-CTRINC: .word 0, 1, 2, 3
+ // fewer than 128 bytes of in/output
+0: ld1 {v8.16b}, [x10]
+ ld1 {v9.16b}, [x11]
+ movi v10.16b, #16
+ sub x2, x1, #64
+ add x1, x1, x5
+ ld1 {v16.16b-v19.16b}, [x2]
+ tbl v4.16b, {v0.16b-v3.16b}, v8.16b
+ tbx v20.16b, {v16.16b-v19.16b}, v9.16b
+ add v8.16b, v8.16b, v10.16b
+ add v9.16b, v9.16b, v10.16b
+ tbl v5.16b, {v0.16b-v3.16b}, v8.16b
+ tbx v21.16b, {v16.16b-v19.16b}, v9.16b
+ add v8.16b, v8.16b, v10.16b
+ add v9.16b, v9.16b, v10.16b
+ tbl v6.16b, {v0.16b-v3.16b}, v8.16b
+ tbx v22.16b, {v16.16b-v19.16b}, v9.16b
+ add v8.16b, v8.16b, v10.16b
+ add v9.16b, v9.16b, v10.16b
+ tbl v7.16b, {v0.16b-v3.16b}, v8.16b
+ tbx v23.16b, {v16.16b-v19.16b}, v9.16b
+
+ eor v20.16b, v20.16b, v4.16b
+ eor v21.16b, v21.16b, v5.16b
+ eor v22.16b, v22.16b, v6.16b
+ eor v23.16b, v23.16b, v7.16b
+ st1 {v20.16b-v23.16b}, [x1]
+ b .Lout
+
+ // fewer than 192 bytes of in/output
+1: ld1 {v8.16b}, [x10]
+ ld1 {v9.16b}, [x11]
+ movi v10.16b, #16
+ add x1, x1, x6
+ tbl v0.16b, {v4.16b-v7.16b}, v8.16b
+ tbx v20.16b, {v16.16b-v19.16b}, v9.16b
+ add v8.16b, v8.16b, v10.16b
+ add v9.16b, v9.16b, v10.16b
+ tbl v1.16b, {v4.16b-v7.16b}, v8.16b
+ tbx v21.16b, {v16.16b-v19.16b}, v9.16b
+ add v8.16b, v8.16b, v10.16b
+ add v9.16b, v9.16b, v10.16b
+ tbl v2.16b, {v4.16b-v7.16b}, v8.16b
+ tbx v22.16b, {v16.16b-v19.16b}, v9.16b
+ add v8.16b, v8.16b, v10.16b
+ add v9.16b, v9.16b, v10.16b
+ tbl v3.16b, {v4.16b-v7.16b}, v8.16b
+ tbx v23.16b, {v16.16b-v19.16b}, v9.16b
+
+ eor v20.16b, v20.16b, v0.16b
+ eor v21.16b, v21.16b, v1.16b
+ eor v22.16b, v22.16b, v2.16b
+ eor v23.16b, v23.16b, v3.16b
+ st1 {v20.16b-v23.16b}, [x1]
+ b .Lout
+
+ // fewer than 256 bytes of in/output
+2: ld1 {v4.16b}, [x10]
+ ld1 {v5.16b}, [x11]
+ movi v6.16b, #16
+ add x1, x1, x7
+ tbl v0.16b, {v8.16b-v11.16b}, v4.16b
+ tbx v24.16b, {v20.16b-v23.16b}, v5.16b
+ add v4.16b, v4.16b, v6.16b
+ add v5.16b, v5.16b, v6.16b
+ tbl v1.16b, {v8.16b-v11.16b}, v4.16b
+ tbx v25.16b, {v20.16b-v23.16b}, v5.16b
+ add v4.16b, v4.16b, v6.16b
+ add v5.16b, v5.16b, v6.16b
+ tbl v2.16b, {v8.16b-v11.16b}, v4.16b
+ tbx v26.16b, {v20.16b-v23.16b}, v5.16b
+ add v4.16b, v4.16b, v6.16b
+ add v5.16b, v5.16b, v6.16b
+ tbl v3.16b, {v8.16b-v11.16b}, v4.16b
+ tbx v27.16b, {v20.16b-v23.16b}, v5.16b
+
+ eor v24.16b, v24.16b, v0.16b
+ eor v25.16b, v25.16b, v1.16b
+ eor v26.16b, v26.16b, v2.16b
+ eor v27.16b, v27.16b, v3.16b
+ st1 {v24.16b-v27.16b}, [x1]
+ b .Lout
+
+ // fewer than 320 bytes of in/output
+3: ld1 {v4.16b}, [x10]
+ ld1 {v5.16b}, [x11]
+ movi v6.16b, #16
+ add x1, x1, x8
+ tbl v0.16b, {v12.16b-v15.16b}, v4.16b
+ tbx v28.16b, {v24.16b-v27.16b}, v5.16b
+ add v4.16b, v4.16b, v6.16b
+ add v5.16b, v5.16b, v6.16b
+ tbl v1.16b, {v12.16b-v15.16b}, v4.16b
+ tbx v29.16b, {v24.16b-v27.16b}, v5.16b
+ add v4.16b, v4.16b, v6.16b
+ add v5.16b, v5.16b, v6.16b
+ tbl v2.16b, {v12.16b-v15.16b}, v4.16b
+ tbx v30.16b, {v24.16b-v27.16b}, v5.16b
+ add v4.16b, v4.16b, v6.16b
+ add v5.16b, v5.16b, v6.16b
+ tbl v3.16b, {v12.16b-v15.16b}, v4.16b
+ tbx v31.16b, {v24.16b-v27.16b}, v5.16b
+
+ eor v28.16b, v28.16b, v0.16b
+ eor v29.16b, v29.16b, v1.16b
+ eor v30.16b, v30.16b, v2.16b
+ eor v31.16b, v31.16b, v3.16b
+ st1 {v28.16b-v31.16b}, [x1]
+ b .Lout
+ENDPROC(chacha_4block_xor_neon)
+
+ .section ".rodata", "a", %progbits
+ .align L1_CACHE_SHIFT
+.Lpermute:
+ .set .Li, 0
+ .rept 192
+ .byte (.Li - 64)
+ .set .Li, .Li + 1
+ .endr
+
+CTRINC: .word 1, 2, 3, 4
ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
new file mode 100644
index 000000000000..bece1d85bd81
--- /dev/null
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -0,0 +1,198 @@
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Based on:
+ * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
+ int nrounds, int bytes);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+ int bytes, int nrounds)
+{
+ while (bytes > 0) {
+ int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
+
+ if (l <= CHACHA_BLOCK_SIZE) {
+ u8 buf[CHACHA_BLOCK_SIZE];
+
+ memcpy(buf, src, l);
+ chacha_block_xor_neon(state, buf, buf, nrounds);
+ memcpy(dst, buf, l);
+ state[12] += 1;
+ break;
+ }
+ chacha_4block_xor_neon(state, dst, src, nrounds, l);
+ bytes -= CHACHA_BLOCK_SIZE * 5;
+ src += CHACHA_BLOCK_SIZE * 5;
+ dst += CHACHA_BLOCK_SIZE * 5;
+ state[12] += 5;
+ }
+}
+
+static int chacha_neon_stream_xor(struct skcipher_request *req,
+ struct chacha_ctx *ctx, u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ crypto_chacha_init(state, ctx, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = rounddown(nbytes, walk.stride);
+
+ kernel_neon_begin();
+ chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, ctx->nrounds);
+ kernel_neon_end();
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int chacha_neon(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+ return crypto_chacha_crypt(req);
+
+ return chacha_neon_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+ return crypto_xchacha_crypt(req);
+
+ crypto_chacha_init(state, ctx, req->iv);
+
+ kernel_neon_begin();
+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
+ kernel_neon_end();
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_neon_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 5 * CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = chacha_neon,
+ .decrypt = chacha_neon,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 5 * CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 5 * CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha12_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_ASIMD))
+ return -ENODEV;
+
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm64/crypto/chacha20-neon-glue.c b/arch/arm64/crypto/chacha20-neon-glue.c
deleted file mode 100644
index 727579c93ded..000000000000
--- a/arch/arm64/crypto/chacha20-neon-glue.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
- *
- * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
-
-static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
-{
- u8 buf[CHACHA20_BLOCK_SIZE];
-
- while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
- kernel_neon_begin();
- chacha20_4block_xor_neon(state, dst, src);
- kernel_neon_end();
- bytes -= CHACHA20_BLOCK_SIZE * 4;
- src += CHACHA20_BLOCK_SIZE * 4;
- dst += CHACHA20_BLOCK_SIZE * 4;
- state[12] += 4;
- }
-
- if (!bytes)
- return;
-
- kernel_neon_begin();
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block_xor_neon(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE;
- src += CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha20_block_xor_neon(state, buf, buf);
- memcpy(dst, buf, bytes);
- }
- kernel_neon_end();
-}
-
-static int chacha20_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- if (!may_use_simd() || req->cryptlen <= CHACHA20_BLOCK_SIZE)
- return crypto_chacha20_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, false);
-
- crypto_chacha20_init(state, ctx, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static struct skcipher_alg alg = {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA20_KEY_SIZE,
- .max_keysize = CHACHA20_KEY_SIZE,
- .ivsize = CHACHA20_IV_SIZE,
- .chunksize = CHACHA20_BLOCK_SIZE,
- .walksize = 4 * CHACHA20_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha20_neon,
- .decrypt = chacha20_neon,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_ASIMD))
- return -ENODEV;
-
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S
new file mode 100644
index 000000000000..e05570c38de7
--- /dev/null
+++ b/arch/arm64/crypto/nh-neon-core.S
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NH - ε-almost-universal hash function, ARM64 NEON accelerated version
+ *
+ * Copyright 2018 Google LLC
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+ KEY .req x0
+ MESSAGE .req x1
+ MESSAGE_LEN .req x2
+ HASH .req x3
+
+ PASS0_SUMS .req v0
+ PASS1_SUMS .req v1
+ PASS2_SUMS .req v2
+ PASS3_SUMS .req v3
+ K0 .req v4
+ K1 .req v5
+ K2 .req v6
+ K3 .req v7
+ T0 .req v8
+ T1 .req v9
+ T2 .req v10
+ T3 .req v11
+ T4 .req v12
+ T5 .req v13
+ T6 .req v14
+ T7 .req v15
+
+.macro _nh_stride k0, k1, k2, k3
+
+ // Load next message stride
+ ld1 {T3.16b}, [MESSAGE], #16
+
+ // Load next key stride
+ ld1 {\k3\().4s}, [KEY], #16
+
+ // Add message words to key words
+ add T0.4s, T3.4s, \k0\().4s
+ add T1.4s, T3.4s, \k1\().4s
+ add T2.4s, T3.4s, \k2\().4s
+ add T3.4s, T3.4s, \k3\().4s
+
+ // Multiply 32x32 => 64 and accumulate
+ mov T4.d[0], T0.d[1]
+ mov T5.d[0], T1.d[1]
+ mov T6.d[0], T2.d[1]
+ mov T7.d[0], T3.d[1]
+ umlal PASS0_SUMS.2d, T0.2s, T4.2s
+ umlal PASS1_SUMS.2d, T1.2s, T5.2s
+ umlal PASS2_SUMS.2d, T2.2s, T6.2s
+ umlal PASS3_SUMS.2d, T3.2s, T7.2s
+.endm
+
+/*
+ * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
+ * u8 hash[NH_HASH_BYTES])
+ *
+ * It's guaranteed that message_len % 16 == 0.
+ */
+ENTRY(nh_neon)
+
+ ld1 {K0.4s,K1.4s}, [KEY], #32
+ movi PASS0_SUMS.2d, #0
+ movi PASS1_SUMS.2d, #0
+ ld1 {K2.4s}, [KEY], #16
+ movi PASS2_SUMS.2d, #0
+ movi PASS3_SUMS.2d, #0
+
+ subs MESSAGE_LEN, MESSAGE_LEN, #64
+ blt .Lloop4_done
+.Lloop4:
+ _nh_stride K0, K1, K2, K3
+ _nh_stride K1, K2, K3, K0
+ _nh_stride K2, K3, K0, K1
+ _nh_stride K3, K0, K1, K2
+ subs MESSAGE_LEN, MESSAGE_LEN, #64
+ bge .Lloop4
+
+.Lloop4_done:
+ ands MESSAGE_LEN, MESSAGE_LEN, #63
+ beq .Ldone
+ _nh_stride K0, K1, K2, K3
+
+ subs MESSAGE_LEN, MESSAGE_LEN, #16
+ beq .Ldone
+ _nh_stride K1, K2, K3, K0
+
+ subs MESSAGE_LEN, MESSAGE_LEN, #16
+ beq .Ldone
+ _nh_stride K2, K3, K0, K1
+
+.Ldone:
+ // Sum the accumulators for each pass, then store the sums to 'hash'
+ addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
+ addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
+ st1 {T0.16b,T1.16b}, [HASH]
+ ret
+ENDPROC(nh_neon)
diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c
new file mode 100644
index 000000000000..22cc32ac9448
--- /dev/null
+++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ * (ARM64 NEON accelerated version)
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/module.h>
+
+asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
+ u8 hash[NH_HASH_BYTES]);
+
+/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
+static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
+ __le64 hash[NH_NUM_PASSES])
+{
+ nh_neon(key, message, message_len, (u8 *)hash);
+}
+
+static int nhpoly1305_neon_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ if (srclen < 64 || !may_use_simd())
+ return crypto_nhpoly1305_update(desc, src, srclen);
+
+ do {
+ unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+
+ kernel_neon_begin();
+ crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+ kernel_neon_end();
+ src += n;
+ srclen -= n;
+ } while (srclen);
+ return 0;
+}
+
+static struct shash_alg nhpoly1305_alg = {
+ .base.cra_name = "nhpoly1305",
+ .base.cra_driver_name = "nhpoly1305-neon",
+ .base.cra_priority = 200,
+ .base.cra_ctxsize = sizeof(struct nhpoly1305_key),
+ .base.cra_module = THIS_MODULE,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .init = crypto_nhpoly1305_init,
+ .update = nhpoly1305_neon_update,
+ .final = crypto_nhpoly1305_final,
+ .setkey = crypto_nhpoly1305_setkey,
+ .descsize = sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+ if (!(elf_hwcap & HWCAP_ASIMD))
+ return -ENODEV;
+
+ return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-neon");
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 812d9498d97b..dd456725189f 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -137,7 +137,7 @@ static int fallback_init_cip(struct crypto_tfm *tfm)
struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
sctx->fallback.cip = crypto_alloc_cipher(name, 0,
- CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
+ CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(sctx->fallback.cip)) {
pr_err("Allocating AES fallback algorithm %s failed\n",
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 3cd4f6b198b6..a9b8b0b94a8d 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -476,11 +476,6 @@ static bool __init sparc64_has_aes_opcode(void)
static int __init aes_sparc64_mod_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(algs); i++)
- INIT_LIST_HEAD(&algs[i].cra_list);
-
if (sparc64_has_aes_opcode()) {
pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
return crypto_register_algs(algs, ARRAY_SIZE(algs));
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 561a84d93cf6..900d5c617e83 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -299,11 +299,6 @@ static bool __init sparc64_has_camellia_opcode(void)
static int __init camellia_sparc64_mod_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(algs); i++)
- INIT_LIST_HEAD(&algs[i].cra_list);
-
if (sparc64_has_camellia_opcode()) {
pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
return crypto_register_algs(algs, ARRAY_SIZE(algs));
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index 61af794aa2d3..56499ea39fd3 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -510,11 +510,6 @@ static bool __init sparc64_has_des_opcode(void)
static int __init des_sparc64_mod_init(void)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(algs); i++)
- INIT_LIST_HEAD(&algs[i].cra_list);
-
if (sparc64_has_des_opcode()) {
pr_info("Using sparc64 des opcodes optimized DES implementation\n");
return crypto_register_algs(algs, ARRAY_SIZE(algs));
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a4b0007a54e1..45734e1cf967 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -8,6 +8,7 @@ OBJECT_FILES_NON_STANDARD := y
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
+avx512_supported :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,yes,no)
sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no)
sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no)
@@ -23,7 +24,7 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
-obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o
+obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o
obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
@@ -46,6 +47,9 @@ obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
+obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
+obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
+
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \
@@ -74,7 +78,7 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
-chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o
+chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o
serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
@@ -84,6 +88,8 @@ aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
+nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
+
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
camellia_aesni_avx_glue.o
@@ -97,10 +103,16 @@ endif
ifeq ($(avx2_supported),yes)
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
- chacha20-x86_64-y += chacha20-avx2-x86_64.o
+ chacha-x86_64-y += chacha-avx2-x86_64.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
+
+ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
+endif
+
+ifeq ($(avx512_supported),yes)
+ chacha-x86_64-y += chacha-avx512vl-x86_64.o
endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 1985ea0b551b..91c039ab5699 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -182,43 +182,30 @@ aad_shift_arr:
.text
-##define the fields of the gcm aes context
-#{
-# u8 expanded_keys[16*11] store expanded keys
-# u8 shifted_hkey_1[16] store HashKey <<1 mod poly here
-# u8 shifted_hkey_2[16] store HashKey^2 <<1 mod poly here
-# u8 shifted_hkey_3[16] store HashKey^3 <<1 mod poly here
-# u8 shifted_hkey_4[16] store HashKey^4 <<1 mod poly here
-# u8 shifted_hkey_5[16] store HashKey^5 <<1 mod poly here
-# u8 shifted_hkey_6[16] store HashKey^6 <<1 mod poly here
-# u8 shifted_hkey_7[16] store HashKey^7 <<1 mod poly here
-# u8 shifted_hkey_8[16] store HashKey^8 <<1 mod poly here
-# u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes)
-# u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes)
-# u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes)
-# u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes)
-# u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes)
-# u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes)
-# u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes)
-# u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes)
-#} gcm_ctx#
-
-HashKey = 16*11 # store HashKey <<1 mod poly here
-HashKey_2 = 16*12 # store HashKey^2 <<1 mod poly here
-HashKey_3 = 16*13 # store HashKey^3 <<1 mod poly here
-HashKey_4 = 16*14 # store HashKey^4 <<1 mod poly here
-HashKey_5 = 16*15 # store HashKey^5 <<1 mod poly here
-HashKey_6 = 16*16 # store HashKey^6 <<1 mod poly here
-HashKey_7 = 16*17 # store HashKey^7 <<1 mod poly here
-HashKey_8 = 16*18 # store HashKey^8 <<1 mod poly here
-HashKey_k = 16*19 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes)
-HashKey_2_k = 16*20 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
-HashKey_3_k = 16*21 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
-HashKey_4_k = 16*22 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
-HashKey_5_k = 16*23 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
-HashKey_6_k = 16*24 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
-HashKey_7_k = 16*25 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
-HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
+#define AadHash 16*0
+#define AadLen 16*1
+#define InLen (16*1)+8
+#define PBlockEncKey 16*2
+#define OrigIV 16*3
+#define CurCount 16*4
+#define PBlockLen 16*5
+
+HashKey = 16*6 # store HashKey <<1 mod poly here
+HashKey_2 = 16*7 # store HashKey^2 <<1 mod poly here
+HashKey_3 = 16*8 # store HashKey^3 <<1 mod poly here
+HashKey_4 = 16*9 # store HashKey^4 <<1 mod poly here
+HashKey_5 = 16*10 # store HashKey^5 <<1 mod poly here
+HashKey_6 = 16*11 # store HashKey^6 <<1 mod poly here
+HashKey_7 = 16*12 # store HashKey^7 <<1 mod poly here
+HashKey_8 = 16*13 # store HashKey^8 <<1 mod poly here
+HashKey_k = 16*14 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes)
+HashKey_2_k = 16*15 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
+HashKey_3_k = 16*16 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
+HashKey_4_k = 16*17 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
+HashKey_5_k = 16*18 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
+HashKey_6_k = 16*19 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
+HashKey_7_k = 16*20 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
+HashKey_8_k = 16*21 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
#define arg1 %rdi
#define arg2 %rsi
@@ -229,6 +216,8 @@ HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsu
#define arg7 STACK_OFFSET+8*1(%r14)
#define arg8 STACK_OFFSET+8*2(%r14)
#define arg9 STACK_OFFSET+8*3(%r14)
+#define arg10 STACK_OFFSET+8*4(%r14)
+#define keysize 2*15*16(arg1)
i = 0
j = 0
@@ -267,19 +256,636 @@ VARIABLE_OFFSET = 16*8
# Utility Macros
################################
+.macro FUNC_SAVE
+ #the number of pushes must equal STACK_OFFSET
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov %rsp, %r14
+
+
+
+ sub $VARIABLE_OFFSET, %rsp
+ and $~63, %rsp # align rsp to 64 bytes
+.endm
+
+.macro FUNC_RESTORE
+ mov %r14, %rsp
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+.endm
+
# Encryption of a single block
-.macro ENCRYPT_SINGLE_BLOCK XMM0
+.macro ENCRYPT_SINGLE_BLOCK REP XMM0
vpxor (arg1), \XMM0, \XMM0
- i = 1
- setreg
-.rep 9
+ i = 1
+ setreg
+.rep \REP
vaesenc 16*i(arg1), \XMM0, \XMM0
- i = (i+1)
- setreg
+ i = (i+1)
+ setreg
.endr
- vaesenclast 16*10(arg1), \XMM0, \XMM0
+ vaesenclast 16*i(arg1), \XMM0, \XMM0
.endm
+# combined for GCM encrypt and decrypt functions
+# clobbering all xmm registers
+# clobbering r10, r11, r12, r13, r14, r15
+.macro GCM_ENC_DEC INITIAL_BLOCKS GHASH_8_ENCRYPT_8_PARALLEL GHASH_LAST_8 GHASH_MUL ENC_DEC REP
+ vmovdqu AadHash(arg2), %xmm8
+ vmovdqu HashKey(arg2), %xmm13 # xmm13 = HashKey
+ add arg5, InLen(arg2)
+
+ # initialize the data pointer offset as zero
+ xor %r11d, %r11d
+
+ PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC
+ sub %r11, arg5
+
+ mov arg5, %r13 # save the number of bytes of plaintext/ciphertext
+ and $-16, %r13 # r13 = r13 - (r13 mod 16)
+
+ mov %r13, %r12
+ shr $4, %r12
+ and $7, %r12
+ jz _initial_num_blocks_is_0\@
+
+ cmp $7, %r12
+ je _initial_num_blocks_is_7\@
+ cmp $6, %r12
+ je _initial_num_blocks_is_6\@
+ cmp $5, %r12
+ je _initial_num_blocks_is_5\@
+ cmp $4, %r12
+ je _initial_num_blocks_is_4\@
+ cmp $3, %r12
+ je _initial_num_blocks_is_3\@
+ cmp $2, %r12
+ je _initial_num_blocks_is_2\@
+
+ jmp _initial_num_blocks_is_1\@
+
+_initial_num_blocks_is_7\@:
+ \INITIAL_BLOCKS \REP, 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
+ sub $16*7, %r13
+ jmp _initial_blocks_encrypted\@
+
+_initial_num_blocks_is_6\@:
+ \INITIAL_BLOCKS \REP, 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
+ sub $16*6, %r13
+ jmp _initial_blocks_encrypted\@
+
+_initial_num_blocks_is_5\@:
+ \INITIAL_BLOCKS \REP, 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
+ sub $16*5, %r13
+ jmp _initial_blocks_encrypted\@
+
+_initial_num_blocks_is_4\@:
+ \INITIAL_BLOCKS \REP, 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
+ sub $16*4, %r13
+ jmp _initial_blocks_encrypted\@
+
+_initial_num_blocks_is_3\@:
+ \INITIAL_BLOCKS \REP, 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
+ sub $16*3, %r13
+ jmp _initial_blocks_encrypted\@
+
+_initial_num_blocks_is_2\@:
+ \INITIAL_BLOCKS \REP, 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
+ sub $16*2, %r13
+ jmp _initial_blocks_encrypted\@
+
+_initial_num_blocks_is_1\@:
+ \INITIAL_BLOCKS \REP, 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
+ sub $16*1, %r13
+ jmp _initial_blocks_encrypted\@
+
+_initial_num_blocks_is_0\@:
+ \INITIAL_BLOCKS \REP, 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
+
+
+_initial_blocks_encrypted\@:
+ cmp $0, %r13
+ je _zero_cipher_left\@
+
+ sub $128, %r13
+ je _eight_cipher_left\@
+
+
+
+
+ vmovd %xmm9, %r15d
+ and $255, %r15d
+ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
+
+
+_encrypt_by_8_new\@:
+ cmp $(255-8), %r15d
+ jg _encrypt_by_8\@
+
+
+
+ add $8, %r15b
+ \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
+ add $128, %r11
+ sub $128, %r13
+ jne _encrypt_by_8_new\@
+
+ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
+ jmp _eight_cipher_left\@
+
+_encrypt_by_8\@:
+ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
+ add $8, %r15b
+ \GHASH_8_ENCRYPT_8_PARALLEL \REP, %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
+ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
+ add $128, %r11
+ sub $128, %r13
+ jne _encrypt_by_8_new\@
+
+ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
+
+
+
+
+_eight_cipher_left\@:
+ \GHASH_LAST_8 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
+
+
+_zero_cipher_left\@:
+ vmovdqu %xmm14, AadHash(arg2)
+ vmovdqu %xmm9, CurCount(arg2)
+
+ # check for 0 length
+ mov arg5, %r13
+ and $15, %r13 # r13 = (arg5 mod 16)
+
+ je _multiple_of_16_bytes\@
+
+ # handle the last <16 Byte block separately
+
+ mov %r13, PBlockLen(arg2)
+
+ vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
+ vmovdqu %xmm9, CurCount(arg2)
+ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
+
+ ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Yn)
+ vmovdqu %xmm9, PBlockEncKey(arg2)
+
+ cmp $16, arg5
+ jge _large_enough_update\@
+
+ lea (arg4,%r11,1), %r10
+ mov %r13, %r12
+
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm1
+
+ lea SHIFT_MASK+16(%rip), %r12
+ sub %r13, %r12 # adjust the shuffle mask pointer to be
+ # able to shift 16-r13 bytes (r13 is the
+ # number of bytes in plaintext mod 16)
+
+ jmp _final_ghash_mul\@
+
+_large_enough_update\@:
+ sub $16, %r11
+ add %r13, %r11
+
+ # receive the last <16 Byte block
+ vmovdqu (arg4, %r11, 1), %xmm1
+
+ sub %r13, %r11
+ add $16, %r11
+
+ lea SHIFT_MASK+16(%rip), %r12
+ # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
+ # (r13 is the number of bytes in plaintext mod 16)
+ sub %r13, %r12
+ # get the appropriate shuffle mask
+ vmovdqu (%r12), %xmm2
+ # shift right 16-r13 bytes
+ vpshufb %xmm2, %xmm1, %xmm1
+
+_final_ghash_mul\@:
+ .if \ENC_DEC == DEC
+ vmovdqa %xmm1, %xmm2
+ vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
+ vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
+ # mask out top 16-r13 bytes of xmm9
+ vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
+ vpand %xmm1, %xmm2, %xmm2
+ vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
+ vpxor %xmm2, %xmm14, %xmm14
+
+ vmovdqu %xmm14, AadHash(arg2)
+ .else
+ vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
+ vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
+ # mask out top 16-r13 bytes of xmm9
+ vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
+ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
+ vpxor %xmm9, %xmm14, %xmm14
+
+ vmovdqu %xmm14, AadHash(arg2)
+ vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
+ .endif
+
+
+ #############################
+ # output r13 Bytes
+ vmovq %xmm9, %rax
+ cmp $8, %r13
+ jle _less_than_8_bytes_left\@
+
+ mov %rax, (arg3 , %r11)
+ add $8, %r11
+ vpsrldq $8, %xmm9, %xmm9
+ vmovq %xmm9, %rax
+ sub $8, %r13
+
+_less_than_8_bytes_left\@:
+ movb %al, (arg3 , %r11)
+ add $1, %r11
+ shr $8, %rax
+ sub $1, %r13
+ jne _less_than_8_bytes_left\@
+ #############################
+
+_multiple_of_16_bytes\@:
+.endm
+
+
+# GCM_COMPLETE Finishes update of tag of last partial block
+# Output: Authorization Tag (AUTH_TAG)
+# Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
+.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN
+ vmovdqu AadHash(arg2), %xmm14
+ vmovdqu HashKey(arg2), %xmm13
+
+ mov PBlockLen(arg2), %r12
+ cmp $0, %r12
+ je _partial_done\@
+
+ #GHASH computation for the last <16 Byte block
+ \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+
+_partial_done\@:
+ mov AadLen(arg2), %r12 # r12 = aadLen (number of bytes)
+ shl $3, %r12 # convert into number of bits
+ vmovd %r12d, %xmm15 # len(A) in xmm15
+
+ mov InLen(arg2), %r12
+ shl $3, %r12 # len(C) in bits (*128)
+ vmovq %r12, %xmm1
+ vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000
+ vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C)
+
+ vpxor %xmm15, %xmm14, %xmm14
+ \GHASH_MUL %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation
+ vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap
+
+ vmovdqu OrigIV(arg2), %xmm9
+
+ ENCRYPT_SINGLE_BLOCK \REP, %xmm9 # E(K, Y0)
+
+ vpxor %xmm14, %xmm9, %xmm9
+
+
+
+_return_T\@:
+ mov \AUTH_TAG, %r10 # r10 = authTag
+ mov \AUTH_TAG_LEN, %r11 # r11 = auth_tag_len
+
+ cmp $16, %r11
+ je _T_16\@
+
+ cmp $8, %r11
+ jl _T_4\@
+
+_T_8\@:
+ vmovq %xmm9, %rax
+ mov %rax, (%r10)
+ add $8, %r10
+ sub $8, %r11
+ vpsrldq $8, %xmm9, %xmm9
+ cmp $0, %r11
+ je _return_T_done\@
+_T_4\@:
+ vmovd %xmm9, %eax
+ mov %eax, (%r10)
+ add $4, %r10
+ sub $4, %r11
+ vpsrldq $4, %xmm9, %xmm9
+ cmp $0, %r11
+ je _return_T_done\@
+_T_123\@:
+ vmovd %xmm9, %eax
+ cmp $2, %r11
+ jl _T_1\@
+ mov %ax, (%r10)
+ cmp $2, %r11
+ je _return_T_done\@
+ add $2, %r10
+ sar $16, %eax
+_T_1\@:
+ mov %al, (%r10)
+ jmp _return_T_done\@
+
+_T_16\@:
+ vmovdqu %xmm9, (%r10)
+
+_return_T_done\@:
+.endm
+
+.macro CALC_AAD_HASH GHASH_MUL AAD AADLEN T1 T2 T3 T4 T5 T6 T7 T8
+
+ mov \AAD, %r10 # r10 = AAD
+ mov \AADLEN, %r12 # r12 = aadLen
+
+
+ mov %r12, %r11
+
+ vpxor \T8, \T8, \T8
+ vpxor \T7, \T7, \T7
+ cmp $16, %r11
+ jl _get_AAD_rest8\@
+_get_AAD_blocks\@:
+ vmovdqu (%r10), \T7
+ vpshufb SHUF_MASK(%rip), \T7, \T7
+ vpxor \T7, \T8, \T8
+ \GHASH_MUL \T8, \T2, \T1, \T3, \T4, \T5, \T6
+ add $16, %r10
+ sub $16, %r12
+ sub $16, %r11
+ cmp $16, %r11
+ jge _get_AAD_blocks\@
+ vmovdqu \T8, \T7
+ cmp $0, %r11
+ je _get_AAD_done\@
+
+ vpxor \T7, \T7, \T7
+
+ /* read the last <16B of AAD. since we have at least 4B of
+ data right after the AAD (the ICV, and maybe some CT), we can
+ read 4B/8B blocks safely, and then get rid of the extra stuff */
+_get_AAD_rest8\@:
+ cmp $4, %r11
+ jle _get_AAD_rest4\@
+ movq (%r10), \T1
+ add $8, %r10
+ sub $8, %r11
+ vpslldq $8, \T1, \T1
+ vpsrldq $8, \T7, \T7
+ vpxor \T1, \T7, \T7
+ jmp _get_AAD_rest8\@
+_get_AAD_rest4\@:
+ cmp $0, %r11
+ jle _get_AAD_rest0\@
+ mov (%r10), %eax
+ movq %rax, \T1
+ add $4, %r10
+ sub $4, %r11
+ vpslldq $12, \T1, \T1
+ vpsrldq $4, \T7, \T7
+ vpxor \T1, \T7, \T7
+_get_AAD_rest0\@:
+ /* finalize: shift out the extra bytes we read, and align
+ left. since pslldq can only shift by an immediate, we use
+ vpshufb and an array of shuffle masks */
+ movq %r12, %r11
+ salq $4, %r11
+ vmovdqu aad_shift_arr(%r11), \T1
+ vpshufb \T1, \T7, \T7
+_get_AAD_rest_final\@:
+ vpshufb SHUF_MASK(%rip), \T7, \T7
+ vpxor \T8, \T7, \T7
+ \GHASH_MUL \T7, \T2, \T1, \T3, \T4, \T5, \T6
+
+_get_AAD_done\@:
+ vmovdqu \T7, AadHash(arg2)
+.endm
+
+.macro INIT GHASH_MUL PRECOMPUTE
+ mov arg6, %r11
+ mov %r11, AadLen(arg2) # ctx_data.aad_length = aad_length
+ xor %r11d, %r11d
+ mov %r11, InLen(arg2) # ctx_data.in_length = 0
+
+ mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0
+ mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0
+ mov arg3, %rax
+ movdqu (%rax), %xmm0
+ movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv
+
+ vpshufb SHUF_MASK(%rip), %xmm0, %xmm0
+ movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv
+
+ vmovdqu (arg4), %xmm6 # xmm6 = HashKey
+
+ vpshufb SHUF_MASK(%rip), %xmm6, %xmm6
+ ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
+ vmovdqa %xmm6, %xmm2
+ vpsllq $1, %xmm6, %xmm6
+ vpsrlq $63, %xmm2, %xmm2
+ vmovdqa %xmm2, %xmm1
+ vpslldq $8, %xmm2, %xmm2
+ vpsrldq $8, %xmm1, %xmm1
+ vpor %xmm2, %xmm6, %xmm6
+ #reduction
+ vpshufd $0b00100100, %xmm1, %xmm2
+ vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
+ vpand POLY(%rip), %xmm2, %xmm2
+ vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly
+ #######################################################################
+ vmovdqu %xmm6, HashKey(arg2) # store HashKey<<1 mod poly
+
+ CALC_AAD_HASH \GHASH_MUL, arg5, arg6, %xmm2, %xmm6, %xmm3, %xmm4, %xmm5, %xmm7, %xmm1, %xmm0
+
+ \PRECOMPUTE %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
+.endm
+
+
+# Reads DLEN bytes starting at DPTR and stores in XMMDst
+# where 0 < DLEN < 16
+# Clobbers %rax, DLEN
+.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst
+ vpxor \XMMDst, \XMMDst, \XMMDst
+
+ cmp $8, \DLEN
+ jl _read_lt8_\@
+ mov (\DPTR), %rax
+ vpinsrq $0, %rax, \XMMDst, \XMMDst
+ sub $8, \DLEN
+ jz _done_read_partial_block_\@
+ xor %eax, %eax
+_read_next_byte_\@:
+ shl $8, %rax
+ mov 7(\DPTR, \DLEN, 1), %al
+ dec \DLEN
+ jnz _read_next_byte_\@
+ vpinsrq $1, %rax, \XMMDst, \XMMDst
+ jmp _done_read_partial_block_\@
+_read_lt8_\@:
+ xor %eax, %eax
+_read_next_byte_lt8_\@:
+ shl $8, %rax
+ mov -1(\DPTR, \DLEN, 1), %al
+ dec \DLEN
+ jnz _read_next_byte_lt8_\@
+ vpinsrq $0, %rax, \XMMDst, \XMMDst
+_done_read_partial_block_\@:
+.endm
+
+# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
+# between update calls.
+# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
+# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
+# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
+.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
+ AAD_HASH ENC_DEC
+ mov PBlockLen(arg2), %r13
+ cmp $0, %r13
+ je _partial_block_done_\@ # Leave Macro if no partial blocks
+ # Read in input data without over reading
+ cmp $16, \PLAIN_CYPH_LEN
+ jl _fewer_than_16_bytes_\@
+ vmovdqu (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm
+ jmp _data_read_\@
+
+_fewer_than_16_bytes_\@:
+ lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
+ mov \PLAIN_CYPH_LEN, %r12
+ READ_PARTIAL_BLOCK %r10 %r12 %xmm1
+
+ mov PBlockLen(arg2), %r13
+
+_data_read_\@: # Finished reading in data
+
+ vmovdqu PBlockEncKey(arg2), %xmm9
+ vmovdqu HashKey(arg2), %xmm13
+
+ lea SHIFT_MASK(%rip), %r12
+
+ # adjust the shuffle mask pointer to be able to shift r13 bytes
+ # r16-r13 is the number of bytes in plaintext mod 16)
+ add %r13, %r12
+ vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
+ vpshufb %xmm2, %xmm9, %xmm9 # shift right r13 bytes
+
+.if \ENC_DEC == DEC
+ vmovdqa %xmm1, %xmm3
+ pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn)
+
+ mov \PLAIN_CYPH_LEN, %r10
+ add %r13, %r10
+ # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+ sub $16, %r10
+ # Determine if if partial block is not being filled and
+ # shift mask accordingly
+ jge _no_extra_mask_1_\@
+ sub %r10, %r12
+_no_extra_mask_1_\@:
+
+ vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+ # get the appropriate mask to mask out bottom r13 bytes of xmm9
+ vpand %xmm1, %xmm9, %xmm9 # mask out bottom r13 bytes of xmm9
+
+ vpand %xmm1, %xmm3, %xmm3
+ vmovdqa SHUF_MASK(%rip), %xmm10
+ vpshufb %xmm10, %xmm3, %xmm3
+ vpshufb %xmm2, %xmm3, %xmm3
+ vpxor %xmm3, \AAD_HASH, \AAD_HASH
+
+ cmp $0, %r10
+ jl _partial_incomplete_1_\@
+
+ # GHASH computation for the last <16 Byte block
+ \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+ xor %eax,%eax
+
+ mov %rax, PBlockLen(arg2)
+ jmp _dec_done_\@
+_partial_incomplete_1_\@:
+ add \PLAIN_CYPH_LEN, PBlockLen(arg2)
+_dec_done_\@:
+ vmovdqu \AAD_HASH, AadHash(arg2)
+.else
+ vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
+
+ mov \PLAIN_CYPH_LEN, %r10
+ add %r13, %r10
+ # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
+ sub $16, %r10
+ # Determine if if partial block is not being filled and
+ # shift mask accordingly
+ jge _no_extra_mask_2_\@
+ sub %r10, %r12
+_no_extra_mask_2_\@:
+
+ vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1
+ # get the appropriate mask to mask out bottom r13 bytes of xmm9
+ vpand %xmm1, %xmm9, %xmm9
+
+ vmovdqa SHUF_MASK(%rip), %xmm1
+ vpshufb %xmm1, %xmm9, %xmm9
+ vpshufb %xmm2, %xmm9, %xmm9
+ vpxor %xmm9, \AAD_HASH, \AAD_HASH
+
+ cmp $0, %r10
+ jl _partial_incomplete_2_\@
+
+ # GHASH computation for the last <16 Byte block
+ \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
+ xor %eax,%eax
+
+ mov %rax, PBlockLen(arg2)
+ jmp _encode_done_\@
+_partial_incomplete_2_\@:
+ add \PLAIN_CYPH_LEN, PBlockLen(arg2)
+_encode_done_\@:
+ vmovdqu \AAD_HASH, AadHash(arg2)
+
+ vmovdqa SHUF_MASK(%rip), %xmm10
+ # shuffle xmm9 back to output as ciphertext
+ vpshufb %xmm10, %xmm9, %xmm9
+ vpshufb %xmm2, %xmm9, %xmm9
+.endif
+ # output encrypted Bytes
+ cmp $0, %r10
+ jl _partial_fill_\@
+ mov %r13, %r12
+ mov $16, %r13
+ # Set r13 to be the number of bytes to write out
+ sub %r12, %r13
+ jmp _count_set_\@
+_partial_fill_\@:
+ mov \PLAIN_CYPH_LEN, %r13
+_count_set_\@:
+ vmovdqa %xmm9, %xmm0
+ vmovq %xmm0, %rax
+ cmp $8, %r13
+ jle _less_than_8_bytes_left_\@
+
+ mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+ add $8, \DATA_OFFSET
+ psrldq $8, %xmm0
+ vmovq %xmm0, %rax
+ sub $8, %r13
+_less_than_8_bytes_left_\@:
+ movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
+ add $1, \DATA_OFFSET
+ shr $8, %rax
+ sub $1, %r13
+ jne _less_than_8_bytes_left_\@
+_partial_block_done_\@:
+.endm # PARTIAL_BLOCK
+
#ifdef CONFIG_AS_AVX
###############################################################################
# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
@@ -341,49 +947,49 @@ VARIABLE_OFFSET = 16*8
vpshufd $0b01001110, \T5, \T1
vpxor \T5, \T1, \T1
- vmovdqa \T1, HashKey_k(arg1)
+ vmovdqu \T1, HashKey_k(arg2)
GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly
- vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly
+ vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly
vpshufd $0b01001110, \T5, \T1
vpxor \T5, \T1, \T1
- vmovdqa \T1, HashKey_2_k(arg1)
+ vmovdqu \T1, HashKey_2_k(arg2)
GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly
- vmovdqa \T5, HashKey_3(arg1)
+ vmovdqu \T5, HashKey_3(arg2)
vpshufd $0b01001110, \T5, \T1
vpxor \T5, \T1, \T1
- vmovdqa \T1, HashKey_3_k(arg1)
+ vmovdqu \T1, HashKey_3_k(arg2)
GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly
- vmovdqa \T5, HashKey_4(arg1)
+ vmovdqu \T5, HashKey_4(arg2)
vpshufd $0b01001110, \T5, \T1
vpxor \T5, \T1, \T1
- vmovdqa \T1, HashKey_4_k(arg1)
+ vmovdqu \T1, HashKey_4_k(arg2)
GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly
- vmovdqa \T5, HashKey_5(arg1)
+ vmovdqu \T5, HashKey_5(arg2)
vpshufd $0b01001110, \T5, \T1
vpxor \T5, \T1, \T1
- vmovdqa \T1, HashKey_5_k(arg1)
+ vmovdqu \T1, HashKey_5_k(arg2)
GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly
- vmovdqa \T5, HashKey_6(arg1)
+ vmovdqu \T5, HashKey_6(arg2)
vpshufd $0b01001110, \T5, \T1
vpxor \T5, \T1, \T1
- vmovdqa \T1, HashKey_6_k(arg1)
+ vmovdqu \T1, HashKey_6_k(arg2)
GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly
- vmovdqa \T5, HashKey_7(arg1)
+ vmovdqu \T5, HashKey_7(arg2)
vpshufd $0b01001110, \T5, \T1
vpxor \T5, \T1, \T1
- vmovdqa \T1, HashKey_7_k(arg1)
+ vmovdqu \T1, HashKey_7_k(arg2)
GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly
- vmovdqa \T5, HashKey_8(arg1)
+ vmovdqu \T5, HashKey_8(arg2)
vpshufd $0b01001110, \T5, \T1
vpxor \T5, \T1, \T1
- vmovdqa \T1, HashKey_8_k(arg1)
+ vmovdqu \T1, HashKey_8_k(arg2)
.endm
@@ -392,84 +998,15 @@ VARIABLE_OFFSET = 16*8
## num_initial_blocks = b mod 4#
## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
## r10, r11, r12, rax are clobbered
-## arg1, arg2, arg3, r14 are used as a pointer only, not modified
+## arg1, arg3, arg4, r14 are used as a pointer only, not modified
-.macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
+.macro INITIAL_BLOCKS_AVX REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC
i = (8-\num_initial_blocks)
- j = 0
setreg
-
- mov arg6, %r10 # r10 = AAD
- mov arg7, %r12 # r12 = aadLen
-
-
- mov %r12, %r11
-
- vpxor reg_j, reg_j, reg_j
- vpxor reg_i, reg_i, reg_i
- cmp $16, %r11
- jl _get_AAD_rest8\@
-_get_AAD_blocks\@:
- vmovdqu (%r10), reg_i
- vpshufb SHUF_MASK(%rip), reg_i, reg_i
- vpxor reg_i, reg_j, reg_j
- GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6
- add $16, %r10
- sub $16, %r12
- sub $16, %r11
- cmp $16, %r11
- jge _get_AAD_blocks\@
- vmovdqu reg_j, reg_i
- cmp $0, %r11
- je _get_AAD_done\@
-
- vpxor reg_i, reg_i, reg_i
-
- /* read the last <16B of AAD. since we have at least 4B of
- data right after the AAD (the ICV, and maybe some CT), we can
- read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\@:
- cmp $4, %r11
- jle _get_AAD_rest4\@
- movq (%r10), \T1
- add $8, %r10
- sub $8, %r11
- vpslldq $8, \T1, \T1
- vpsrldq $8, reg_i, reg_i
- vpxor \T1, reg_i, reg_i
- jmp _get_AAD_rest8\@
-_get_AAD_rest4\@:
- cmp $0, %r11
- jle _get_AAD_rest0\@
- mov (%r10), %eax
- movq %rax, \T1
- add $4, %r10
- sub $4, %r11
- vpslldq $12, \T1, \T1
- vpsrldq $4, reg_i, reg_i
- vpxor \T1, reg_i, reg_i
-_get_AAD_rest0\@:
- /* finalize: shift out the extra bytes we read, and align
- left. since pslldq can only shift by an immediate, we use
- vpshufb and an array of shuffle masks */
- movq %r12, %r11
- salq $4, %r11
- movdqu aad_shift_arr(%r11), \T1
- vpshufb \T1, reg_i, reg_i
-_get_AAD_rest_final\@:
- vpshufb SHUF_MASK(%rip), reg_i, reg_i
- vpxor reg_j, reg_i, reg_i
- GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6
-
-_get_AAD_done\@:
- # initialize the data pointer offset as zero
- xor %r11d, %r11d
+ vmovdqu AadHash(arg2), reg_i
# start AES for num_initial_blocks blocks
- mov arg5, %rax # rax = *Y0
- vmovdqu (%rax), \CTR # CTR = Y0
- vpshufb SHUF_MASK(%rip), \CTR, \CTR
-
+ vmovdqu CurCount(arg2), \CTR
i = (9-\num_initial_blocks)
setreg
@@ -490,10 +1027,10 @@ _get_AAD_done\@:
setreg
.endr
- j = 1
- setreg
-.rep 9
- vmovdqa 16*j(arg1), \T_key
+ j = 1
+ setreg
+.rep \REP
+ vmovdqa 16*j(arg1), \T_key
i = (9-\num_initial_blocks)
setreg
.rep \num_initial_blocks
@@ -502,12 +1039,11 @@ _get_AAD_done\@:
setreg
.endr
- j = (j+1)
- setreg
+ j = (j+1)
+ setreg
.endr
-
- vmovdqa 16*10(arg1), \T_key
+ vmovdqa 16*j(arg1), \T_key
i = (9-\num_initial_blocks)
setreg
.rep \num_initial_blocks
@@ -519,9 +1055,9 @@ _get_AAD_done\@:
i = (9-\num_initial_blocks)
setreg
.rep \num_initial_blocks
- vmovdqu (arg3, %r11), \T1
+ vmovdqu (arg4, %r11), \T1
vpxor \T1, reg_i, reg_i
- vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for num_initial_blocks blocks
+ vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for num_initial_blocks blocks
add $16, %r11
.if \ENC_DEC == DEC
vmovdqa \T1, reg_i
@@ -595,9 +1131,9 @@ _get_AAD_done\@:
vpxor \T_key, \XMM7, \XMM7
vpxor \T_key, \XMM8, \XMM8
- i = 1
- setreg
-.rep 9 # do 9 rounds
+ i = 1
+ setreg
+.rep \REP # do REP rounds
vmovdqa 16*i(arg1), \T_key
vaesenc \T_key, \XMM1, \XMM1
vaesenc \T_key, \XMM2, \XMM2
@@ -607,11 +1143,10 @@ _get_AAD_done\@:
vaesenc \T_key, \XMM6, \XMM6
vaesenc \T_key, \XMM7, \XMM7
vaesenc \T_key, \XMM8, \XMM8
- i = (i+1)
- setreg
+ i = (i+1)
+ setreg
.endr
-
vmovdqa 16*i(arg1), \T_key
vaesenclast \T_key, \XMM1, \XMM1
vaesenclast \T_key, \XMM2, \XMM2
@@ -622,58 +1157,58 @@ _get_AAD_done\@:
vaesenclast \T_key, \XMM7, \XMM7
vaesenclast \T_key, \XMM8, \XMM8
- vmovdqu (arg3, %r11), \T1
+ vmovdqu (arg4, %r11), \T1
vpxor \T1, \XMM1, \XMM1
- vmovdqu \XMM1, (arg2 , %r11)
+ vmovdqu \XMM1, (arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM1
.endif
- vmovdqu 16*1(arg3, %r11), \T1
+ vmovdqu 16*1(arg4, %r11), \T1
vpxor \T1, \XMM2, \XMM2
- vmovdqu \XMM2, 16*1(arg2 , %r11)
+ vmovdqu \XMM2, 16*1(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM2
.endif
- vmovdqu 16*2(arg3, %r11), \T1
+ vmovdqu 16*2(arg4, %r11), \T1
vpxor \T1, \XMM3, \XMM3
- vmovdqu \XMM3, 16*2(arg2 , %r11)
+ vmovdqu \XMM3, 16*2(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM3
.endif
- vmovdqu 16*3(arg3, %r11), \T1
+ vmovdqu 16*3(arg4, %r11), \T1
vpxor \T1, \XMM4, \XMM4
- vmovdqu \XMM4, 16*3(arg2 , %r11)
+ vmovdqu \XMM4, 16*3(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM4
.endif
- vmovdqu 16*4(arg3, %r11), \T1
+ vmovdqu 16*4(arg4, %r11), \T1
vpxor \T1, \XMM5, \XMM5
- vmovdqu \XMM5, 16*4(arg2 , %r11)
+ vmovdqu \XMM5, 16*4(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM5
.endif
- vmovdqu 16*5(arg3, %r11), \T1
+ vmovdqu 16*5(arg4, %r11), \T1
vpxor \T1, \XMM6, \XMM6
- vmovdqu \XMM6, 16*5(arg2 , %r11)
+ vmovdqu \XMM6, 16*5(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM6
.endif
- vmovdqu 16*6(arg3, %r11), \T1
+ vmovdqu 16*6(arg4, %r11), \T1
vpxor \T1, \XMM7, \XMM7
- vmovdqu \XMM7, 16*6(arg2 , %r11)
+ vmovdqu \XMM7, 16*6(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM7
.endif
- vmovdqu 16*7(arg3, %r11), \T1
+ vmovdqu 16*7(arg4, %r11), \T1
vpxor \T1, \XMM8, \XMM8
- vmovdqu \XMM8, 16*7(arg2 , %r11)
+ vmovdqu \XMM8, 16*7(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM8
.endif
@@ -698,9 +1233,9 @@ _initial_blocks_done\@:
# encrypt 8 blocks at a time
# ghash the 8 previously encrypted ciphertext blocks
-# arg1, arg2, arg3 are used as pointers only, not modified
+# arg1, arg3, arg4 are used as pointers only, not modified
# r11 is the data offset value
-.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
+.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
vmovdqa \XMM1, \T2
vmovdqa \XMM2, TMP2(%rsp)
@@ -784,14 +1319,14 @@ _initial_blocks_done\@:
#######################################################################
- vmovdqa HashKey_8(arg1), \T5
+ vmovdqu HashKey_8(arg2), \T5
vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1
vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0
vpshufd $0b01001110, \T2, \T6
vpxor \T2, \T6, \T6
- vmovdqa HashKey_8_k(arg1), \T5
+ vmovdqu HashKey_8_k(arg2), \T5
vpclmulqdq $0x00, \T5, \T6, \T6
vmovdqu 16*3(arg1), \T1
@@ -805,7 +1340,7 @@ _initial_blocks_done\@:
vaesenc \T1, \XMM8, \XMM8
vmovdqa TMP2(%rsp), \T1
- vmovdqa HashKey_7(arg1), \T5
+ vmovdqu HashKey_7(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
vpclmulqdq $0x00, \T5, \T1, \T3
@@ -813,7 +1348,7 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \T1, \T3
vpxor \T1, \T3, \T3
- vmovdqa HashKey_7_k(arg1), \T5
+ vmovdqu HashKey_7_k(arg2), \T5
vpclmulqdq $0x10, \T5, \T3, \T3
vpxor \T3, \T6, \T6
@@ -830,7 +1365,7 @@ _initial_blocks_done\@:
#######################################################################
vmovdqa TMP3(%rsp), \T1
- vmovdqa HashKey_6(arg1), \T5
+ vmovdqu HashKey_6(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
vpclmulqdq $0x00, \T5, \T1, \T3
@@ -838,7 +1373,7 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \T1, \T3
vpxor \T1, \T3, \T3
- vmovdqa HashKey_6_k(arg1), \T5
+ vmovdqu HashKey_6_k(arg2), \T5
vpclmulqdq $0x10, \T5, \T3, \T3
vpxor \T3, \T6, \T6
@@ -853,7 +1388,7 @@ _initial_blocks_done\@:
vaesenc \T1, \XMM8, \XMM8
vmovdqa TMP4(%rsp), \T1
- vmovdqa HashKey_5(arg1), \T5
+ vmovdqu HashKey_5(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
vpclmulqdq $0x00, \T5, \T1, \T3
@@ -861,7 +1396,7 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \T1, \T3
vpxor \T1, \T3, \T3
- vmovdqa HashKey_5_k(arg1), \T5
+ vmovdqu HashKey_5_k(arg2), \T5
vpclmulqdq $0x10, \T5, \T3, \T3
vpxor \T3, \T6, \T6
@@ -877,7 +1412,7 @@ _initial_blocks_done\@:
vmovdqa TMP5(%rsp), \T1
- vmovdqa HashKey_4(arg1), \T5
+ vmovdqu HashKey_4(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
vpclmulqdq $0x00, \T5, \T1, \T3
@@ -885,7 +1420,7 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \T1, \T3
vpxor \T1, \T3, \T3
- vmovdqa HashKey_4_k(arg1), \T5
+ vmovdqu HashKey_4_k(arg2), \T5
vpclmulqdq $0x10, \T5, \T3, \T3
vpxor \T3, \T6, \T6
@@ -900,7 +1435,7 @@ _initial_blocks_done\@:
vaesenc \T1, \XMM8, \XMM8
vmovdqa TMP6(%rsp), \T1
- vmovdqa HashKey_3(arg1), \T5
+ vmovdqu HashKey_3(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
vpclmulqdq $0x00, \T5, \T1, \T3
@@ -908,7 +1443,7 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \T1, \T3
vpxor \T1, \T3, \T3
- vmovdqa HashKey_3_k(arg1), \T5
+ vmovdqu HashKey_3_k(arg2), \T5
vpclmulqdq $0x10, \T5, \T3, \T3
vpxor \T3, \T6, \T6
@@ -924,7 +1459,7 @@ _initial_blocks_done\@:
vaesenc \T1, \XMM8, \XMM8
vmovdqa TMP7(%rsp), \T1
- vmovdqa HashKey_2(arg1), \T5
+ vmovdqu HashKey_2(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
vpclmulqdq $0x00, \T5, \T1, \T3
@@ -932,7 +1467,7 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \T1, \T3
vpxor \T1, \T3, \T3
- vmovdqa HashKey_2_k(arg1), \T5
+ vmovdqu HashKey_2_k(arg2), \T5
vpclmulqdq $0x10, \T5, \T3, \T3
vpxor \T3, \T6, \T6
@@ -949,7 +1484,7 @@ _initial_blocks_done\@:
vaesenc \T5, \XMM8, \XMM8
vmovdqa TMP8(%rsp), \T1
- vmovdqa HashKey(arg1), \T5
+ vmovdqu HashKey(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
vpclmulqdq $0x00, \T5, \T1, \T3
@@ -957,7 +1492,7 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \T1, \T3
vpxor \T1, \T3, \T3
- vmovdqa HashKey_k(arg1), \T5
+ vmovdqu HashKey_k(arg2), \T5
vpclmulqdq $0x10, \T5, \T3, \T3
vpxor \T3, \T6, \T6
@@ -966,17 +1501,35 @@ _initial_blocks_done\@:
vmovdqu 16*10(arg1), \T5
+ i = 11
+ setreg
+.rep (\REP-9)
+
+ vaesenc \T5, \XMM1, \XMM1
+ vaesenc \T5, \XMM2, \XMM2
+ vaesenc \T5, \XMM3, \XMM3
+ vaesenc \T5, \XMM4, \XMM4
+ vaesenc \T5, \XMM5, \XMM5
+ vaesenc \T5, \XMM6, \XMM6
+ vaesenc \T5, \XMM7, \XMM7
+ vaesenc \T5, \XMM8, \XMM8
+
+ vmovdqu 16*i(arg1), \T5
+ i = i + 1
+ setreg
+.endr
+
i = 0
j = 1
setreg
.rep 8
- vpxor 16*i(arg3, %r11), \T5, \T2
+ vpxor 16*i(arg4, %r11), \T5, \T2
.if \ENC_DEC == ENC
vaesenclast \T2, reg_j, reg_j
.else
vaesenclast \T2, reg_j, \T3
- vmovdqu 16*i(arg3, %r11), reg_j
- vmovdqu \T3, 16*i(arg2, %r11)
+ vmovdqu 16*i(arg4, %r11), reg_j
+ vmovdqu \T3, 16*i(arg3, %r11)
.endif
i = (i+1)
j = (j+1)
@@ -1008,14 +1561,14 @@ _initial_blocks_done\@:
vpxor \T2, \T7, \T7 # first phase of the reduction complete
#######################################################################
.if \ENC_DEC == ENC
- vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer
.endif
#######################################################################
@@ -1056,25 +1609,25 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \XMM1, \T2
vpxor \XMM1, \T2, \T2
- vmovdqa HashKey_8(arg1), \T5
+ vmovdqu HashKey_8(arg2), \T5
vpclmulqdq $0x11, \T5, \XMM1, \T6
vpclmulqdq $0x00, \T5, \XMM1, \T7
- vmovdqa HashKey_8_k(arg1), \T3
+ vmovdqu HashKey_8_k(arg2), \T3
vpclmulqdq $0x00, \T3, \T2, \XMM1
######################
vpshufd $0b01001110, \XMM2, \T2
vpxor \XMM2, \T2, \T2
- vmovdqa HashKey_7(arg1), \T5
+ vmovdqu HashKey_7(arg2), \T5
vpclmulqdq $0x11, \T5, \XMM2, \T4
vpxor \T4, \T6, \T6
vpclmulqdq $0x00, \T5, \XMM2, \T4
vpxor \T4, \T7, \T7
- vmovdqa HashKey_7_k(arg1), \T3
+ vmovdqu HashKey_7_k(arg2), \T3
vpclmulqdq $0x00, \T3, \T2, \T2
vpxor \T2, \XMM1, \XMM1
@@ -1082,14 +1635,14 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \XMM3, \T2
vpxor \XMM3, \T2, \T2
- vmovdqa HashKey_6(arg1), \T5
+ vmovdqu HashKey_6(arg2), \T5
vpclmulqdq $0x11, \T5, \XMM3, \T4
vpxor \T4, \T6, \T6
vpclmulqdq $0x00, \T5, \XMM3, \T4
vpxor \T4, \T7, \T7
- vmovdqa HashKey_6_k(arg1), \T3
+ vmovdqu HashKey_6_k(arg2), \T3
vpclmulqdq $0x00, \T3, \T2, \T2
vpxor \T2, \XMM1, \XMM1
@@ -1097,14 +1650,14 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \XMM4, \T2
vpxor \XMM4, \T2, \T2
- vmovdqa HashKey_5(arg1), \T5
+ vmovdqu HashKey_5(arg2), \T5
vpclmulqdq $0x11, \T5, \XMM4, \T4
vpxor \T4, \T6, \T6
vpclmulqdq $0x00, \T5, \XMM4, \T4
vpxor \T4, \T7, \T7
- vmovdqa HashKey_5_k(arg1), \T3
+ vmovdqu HashKey_5_k(arg2), \T3
vpclmulqdq $0x00, \T3, \T2, \T2
vpxor \T2, \XMM1, \XMM1
@@ -1112,14 +1665,14 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \XMM5, \T2
vpxor \XMM5, \T2, \T2
- vmovdqa HashKey_4(arg1), \T5
+ vmovdqu HashKey_4(arg2), \T5
vpclmulqdq $0x11, \T5, \XMM5, \T4
vpxor \T4, \T6, \T6
vpclmulqdq $0x00, \T5, \XMM5, \T4
vpxor \T4, \T7, \T7
- vmovdqa HashKey_4_k(arg1), \T3
+ vmovdqu HashKey_4_k(arg2), \T3
vpclmulqdq $0x00, \T3, \T2, \T2
vpxor \T2, \XMM1, \XMM1
@@ -1127,14 +1680,14 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \XMM6, \T2
vpxor \XMM6, \T2, \T2
- vmovdqa HashKey_3(arg1), \T5
+ vmovdqu HashKey_3(arg2), \T5
vpclmulqdq $0x11, \T5, \XMM6, \T4
vpxor \T4, \T6, \T6
vpclmulqdq $0x00, \T5, \XMM6, \T4
vpxor \T4, \T7, \T7
- vmovdqa HashKey_3_k(arg1), \T3
+ vmovdqu HashKey_3_k(arg2), \T3
vpclmulqdq $0x00, \T3, \T2, \T2
vpxor \T2, \XMM1, \XMM1
@@ -1142,14 +1695,14 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \XMM7, \T2
vpxor \XMM7, \T2, \T2
- vmovdqa HashKey_2(arg1), \T5
+ vmovdqu HashKey_2(arg2), \T5
vpclmulqdq $0x11, \T5, \XMM7, \T4
vpxor \T4, \T6, \T6
vpclmulqdq $0x00, \T5, \XMM7, \T4
vpxor \T4, \T7, \T7
- vmovdqa HashKey_2_k(arg1), \T3
+ vmovdqu HashKey_2_k(arg2), \T3
vpclmulqdq $0x00, \T3, \T2, \T2
vpxor \T2, \XMM1, \XMM1
@@ -1157,14 +1710,14 @@ _initial_blocks_done\@:
vpshufd $0b01001110, \XMM8, \T2
vpxor \XMM8, \T2, \T2
- vmovdqa HashKey(arg1), \T5
+ vmovdqu HashKey(arg2), \T5
vpclmulqdq $0x11, \T5, \XMM8, \T4
vpxor \T4, \T6, \T6
vpclmulqdq $0x00, \T5, \XMM8, \T4
vpxor \T4, \T7, \T7
- vmovdqa HashKey_k(arg1), \T3
+ vmovdqu HashKey_k(arg2), \T3
vpclmulqdq $0x00, \T3, \T2, \T2
vpxor \T2, \XMM1, \XMM1
@@ -1210,413 +1763,112 @@ _initial_blocks_done\@:
.endm
-
-# combined for GCM encrypt and decrypt functions
-# clobbering all xmm registers
-# clobbering r10, r11, r12, r13, r14, r15
-.macro GCM_ENC_DEC_AVX ENC_DEC
-
- #the number of pushes must equal STACK_OFFSET
- push %r12
- push %r13
- push %r14
- push %r15
-
- mov %rsp, %r14
-
-
-
-
- sub $VARIABLE_OFFSET, %rsp
- and $~63, %rsp # align rsp to 64 bytes
-
-
- vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey
-
- mov arg4, %r13 # save the number of bytes of plaintext/ciphertext
- and $-16, %r13 # r13 = r13 - (r13 mod 16)
-
- mov %r13, %r12
- shr $4, %r12
- and $7, %r12
- jz _initial_num_blocks_is_0\@
-
- cmp $7, %r12
- je _initial_num_blocks_is_7\@
- cmp $6, %r12
- je _initial_num_blocks_is_6\@
- cmp $5, %r12
- je _initial_num_blocks_is_5\@
- cmp $4, %r12
- je _initial_num_blocks_is_4\@
- cmp $3, %r12
- je _initial_num_blocks_is_3\@
- cmp $2, %r12
- je _initial_num_blocks_is_2\@
-
- jmp _initial_num_blocks_is_1\@
-
-_initial_num_blocks_is_7\@:
- INITIAL_BLOCKS_AVX 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*7, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_6\@:
- INITIAL_BLOCKS_AVX 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*6, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_5\@:
- INITIAL_BLOCKS_AVX 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*5, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_4\@:
- INITIAL_BLOCKS_AVX 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*4, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_3\@:
- INITIAL_BLOCKS_AVX 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*3, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_2\@:
- INITIAL_BLOCKS_AVX 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*2, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_1\@:
- INITIAL_BLOCKS_AVX 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*1, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_0\@:
- INITIAL_BLOCKS_AVX 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-
-
-_initial_blocks_encrypted\@:
- cmp $0, %r13
- je _zero_cipher_left\@
-
- sub $128, %r13
- je _eight_cipher_left\@
-
-
-
-
- vmovd %xmm9, %r15d
- and $255, %r15d
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-
-
-_encrypt_by_8_new\@:
- cmp $(255-8), %r15d
- jg _encrypt_by_8\@
-
-
-
- add $8, %r15b
- GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
- add $128, %r11
- sub $128, %r13
- jne _encrypt_by_8_new\@
-
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- jmp _eight_cipher_left\@
-
-_encrypt_by_8\@:
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- add $8, %r15b
- GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- add $128, %r11
- sub $128, %r13
- jne _encrypt_by_8_new\@
-
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-
-
-
-
-_eight_cipher_left\@:
- GHASH_LAST_8_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
-
-
-_zero_cipher_left\@:
- cmp $16, arg4
- jl _only_less_than_16\@
-
- mov arg4, %r13
- and $15, %r13 # r13 = (arg4 mod 16)
-
- je _multiple_of_16_bytes\@
-
- # handle the last <16 Byte block seperately
-
-
- vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
-
- sub $16, %r11
- add %r13, %r11
- vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block
-
- lea SHIFT_MASK+16(%rip), %r12
- sub %r13, %r12 # adjust the shuffle mask pointer to be
- # able to shift 16-r13 bytes (r13 is the
- # number of bytes in plaintext mod 16)
- vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
- vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes
- jmp _final_ghash_mul\@
-
-_only_less_than_16\@:
- # check for 0 length
- mov arg4, %r13
- and $15, %r13 # r13 = (arg4 mod 16)
-
- je _multiple_of_16_bytes\@
-
- # handle the last <16 Byte block seperately
-
-
- vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
-
-
- lea SHIFT_MASK+16(%rip), %r12
- sub %r13, %r12 # adjust the shuffle mask pointer to be
- # able to shift 16-r13 bytes (r13 is the
- # number of bytes in plaintext mod 16)
-
-_get_last_16_byte_loop\@:
- movb (arg3, %r11), %al
- movb %al, TMP1 (%rsp , %r11)
- add $1, %r11
- cmp %r13, %r11
- jne _get_last_16_byte_loop\@
-
- vmovdqu TMP1(%rsp), %xmm1
-
- sub $16, %r11
-
-_final_ghash_mul\@:
- .if \ENC_DEC == DEC
- vmovdqa %xmm1, %xmm2
- vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
- vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
- # mask out top 16-r13 bytes of xmm9
- vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
- vpand %xmm1, %xmm2, %xmm2
- vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
- vpxor %xmm2, %xmm14, %xmm14
- #GHASH computation for the last <16 Byte block
- GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
- sub %r13, %r11
- add $16, %r11
- .else
- vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
- vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to
- # mask out top 16-r13 bytes of xmm9
- vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- vpxor %xmm9, %xmm14, %xmm14
- #GHASH computation for the last <16 Byte block
- GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
- sub %r13, %r11
- add $16, %r11
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
- .endif
-
-
- #############################
- # output r13 Bytes
- vmovq %xmm9, %rax
- cmp $8, %r13
- jle _less_than_8_bytes_left\@
-
- mov %rax, (arg2 , %r11)
- add $8, %r11
- vpsrldq $8, %xmm9, %xmm9
- vmovq %xmm9, %rax
- sub $8, %r13
-
-_less_than_8_bytes_left\@:
- movb %al, (arg2 , %r11)
- add $1, %r11
- shr $8, %rax
- sub $1, %r13
- jne _less_than_8_bytes_left\@
- #############################
-
-_multiple_of_16_bytes\@:
- mov arg7, %r12 # r12 = aadLen (number of bytes)
- shl $3, %r12 # convert into number of bits
- vmovd %r12d, %xmm15 # len(A) in xmm15
-
- shl $3, arg4 # len(C) in bits (*128)
- vmovq arg4, %xmm1
- vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000
- vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C)
-
- vpxor %xmm15, %xmm14, %xmm14
- GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation
- vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap
-
- mov arg5, %rax # rax = *Y0
- vmovdqu (%rax), %xmm9 # xmm9 = Y0
-
- ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0)
-
- vpxor %xmm14, %xmm9, %xmm9
-
-
-
-_return_T\@:
- mov arg8, %r10 # r10 = authTag
- mov arg9, %r11 # r11 = auth_tag_len
-
- cmp $16, %r11
- je _T_16\@
-
- cmp $8, %r11
- jl _T_4\@
-
-_T_8\@:
- vmovq %xmm9, %rax
- mov %rax, (%r10)
- add $8, %r10
- sub $8, %r11
- vpsrldq $8, %xmm9, %xmm9
- cmp $0, %r11
- je _return_T_done\@
-_T_4\@:
- vmovd %xmm9, %eax
- mov %eax, (%r10)
- add $4, %r10
- sub $4, %r11
- vpsrldq $4, %xmm9, %xmm9
- cmp $0, %r11
- je _return_T_done\@
-_T_123\@:
- vmovd %xmm9, %eax
- cmp $2, %r11
- jl _T_1\@
- mov %ax, (%r10)
- cmp $2, %r11
- je _return_T_done\@
- add $2, %r10
- sar $16, %eax
-_T_1\@:
- mov %al, (%r10)
- jmp _return_T_done\@
-
-_T_16\@:
- vmovdqu %xmm9, (%r10)
-
-_return_T_done\@:
- mov %r14, %rsp
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-.endm
-
-
#############################################################
#void aesni_gcm_precomp_avx_gen2
# (gcm_data *my_ctx_data,
-# u8 *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
+# gcm_context_data *data,
+# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
+# u8 *iv, /* Pre-counter block j0: 4 byte salt
+# (from Security Association) concatenated with 8 byte
+# Initialisation Vector (from IPSec ESP Payload)
+# concatenated with 0x00000001. 16-byte aligned pointer. */
+# const u8 *aad, /* Additional Authentication Data (AAD)*/
+# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
#############################################################
-ENTRY(aesni_gcm_precomp_avx_gen2)
- #the number of pushes must equal STACK_OFFSET
- push %r12
- push %r13
- push %r14
- push %r15
-
- mov %rsp, %r14
-
-
-
- sub $VARIABLE_OFFSET, %rsp
- and $~63, %rsp # align rsp to 64 bytes
-
- vmovdqu (arg2), %xmm6 # xmm6 = HashKey
-
- vpshufb SHUF_MASK(%rip), %xmm6, %xmm6
- ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
- vmovdqa %xmm6, %xmm2
- vpsllq $1, %xmm6, %xmm6
- vpsrlq $63, %xmm2, %xmm2
- vmovdqa %xmm2, %xmm1
- vpslldq $8, %xmm2, %xmm2
- vpsrldq $8, %xmm1, %xmm1
- vpor %xmm2, %xmm6, %xmm6
- #reduction
- vpshufd $0b00100100, %xmm1, %xmm2
- vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
- vpand POLY(%rip), %xmm2, %xmm2
- vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly
- #######################################################################
- vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly
-
-
- PRECOMPUTE_AVX %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
-
- mov %r14, %rsp
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
+ENTRY(aesni_gcm_init_avx_gen2)
+ FUNC_SAVE
+ INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
+ FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_precomp_avx_gen2)
+ENDPROC(aesni_gcm_init_avx_gen2)
###############################################################################
-#void aesni_gcm_enc_avx_gen2(
+#void aesni_gcm_enc_update_avx_gen2(
# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
+# gcm_context_data *data,
# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */
# const u8 *in, /* Plaintext input */
-# u64 plaintext_len, /* Length of data in Bytes for encryption. */
-# u8 *iv, /* Pre-counter block j0: 4 byte salt
-# (from Security Association) concatenated with 8 byte
-# Initialisation Vector (from IPSec ESP Payload)
-# concatenated with 0x00000001. 16-byte aligned pointer. */
-# const u8 *aad, /* Additional Authentication Data (AAD)*/
-# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
-# u8 *auth_tag, /* Authenticated Tag output. */
-# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
-# Valid values are 16 (most likely), 12 or 8. */
+# u64 plaintext_len) /* Length of data in Bytes for encryption. */
###############################################################################
-ENTRY(aesni_gcm_enc_avx_gen2)
- GCM_ENC_DEC_AVX ENC
- ret
-ENDPROC(aesni_gcm_enc_avx_gen2)
+ENTRY(aesni_gcm_enc_update_avx_gen2)
+ FUNC_SAVE
+ mov keysize, %eax
+ cmp $32, %eax
+ je key_256_enc_update
+ cmp $16, %eax
+ je key_128_enc_update
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11
+ FUNC_RESTORE
+ ret
+key_128_enc_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9
+ FUNC_RESTORE
+ ret
+key_256_enc_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_enc_update_avx_gen2)
###############################################################################
-#void aesni_gcm_dec_avx_gen2(
+#void aesni_gcm_dec_update_avx_gen2(
# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
+# gcm_context_data *data,
# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */
# const u8 *in, /* Ciphertext input */
-# u64 plaintext_len, /* Length of data in Bytes for encryption. */
-# u8 *iv, /* Pre-counter block j0: 4 byte salt
-# (from Security Association) concatenated with 8 byte
-# Initialisation Vector (from IPSec ESP Payload)
-# concatenated with 0x00000001. 16-byte aligned pointer. */
-# const u8 *aad, /* Additional Authentication Data (AAD)*/
-# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
+# u64 plaintext_len) /* Length of data in Bytes for encryption. */
+###############################################################################
+ENTRY(aesni_gcm_dec_update_avx_gen2)
+ FUNC_SAVE
+ mov keysize,%eax
+ cmp $32, %eax
+ je key_256_dec_update
+ cmp $16, %eax
+ je key_128_dec_update
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11
+ FUNC_RESTORE
+ ret
+key_128_dec_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9
+ FUNC_RESTORE
+ ret
+key_256_dec_update:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_dec_update_avx_gen2)
+
+###############################################################################
+#void aesni_gcm_finalize_avx_gen2(
+# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
+# gcm_context_data *data,
# u8 *auth_tag, /* Authenticated Tag output. */
# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
# Valid values are 16 (most likely), 12 or 8. */
###############################################################################
-ENTRY(aesni_gcm_dec_avx_gen2)
- GCM_ENC_DEC_AVX DEC
- ret
-ENDPROC(aesni_gcm_dec_avx_gen2)
+ENTRY(aesni_gcm_finalize_avx_gen2)
+ FUNC_SAVE
+ mov keysize,%eax
+ cmp $32, %eax
+ je key_256_finalize
+ cmp $16, %eax
+ je key_128_finalize
+ # must be 192
+ GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4
+ FUNC_RESTORE
+ ret
+key_128_finalize:
+ GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4
+ FUNC_RESTORE
+ ret
+key_256_finalize:
+ GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_finalize_avx_gen2)
+
#endif /* CONFIG_AS_AVX */
#ifdef CONFIG_AS_AVX2
@@ -1670,113 +1922,42 @@ ENDPROC(aesni_gcm_dec_avx_gen2)
# Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
vmovdqa \HK, \T5
GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly
- vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly
+ vmovdqu \T5, HashKey_2(arg2) # [HashKey_2] = HashKey^2<<1 mod poly
GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly
- vmovdqa \T5, HashKey_3(arg1)
+ vmovdqu \T5, HashKey_3(arg2)
GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly
- vmovdqa \T5, HashKey_4(arg1)
+ vmovdqu \T5, HashKey_4(arg2)
GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly
- vmovdqa \T5, HashKey_5(arg1)
+ vmovdqu \T5, HashKey_5(arg2)
GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly
- vmovdqa \T5, HashKey_6(arg1)
+ vmovdqu \T5, HashKey_6(arg2)
GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly
- vmovdqa \T5, HashKey_7(arg1)
+ vmovdqu \T5, HashKey_7(arg2)
GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly
- vmovdqa \T5, HashKey_8(arg1)
+ vmovdqu \T5, HashKey_8(arg2)
.endm
-
## if a = number of total plaintext bytes
## b = floor(a/16)
## num_initial_blocks = b mod 4#
## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext
## r10, r11, r12, rax are clobbered
-## arg1, arg2, arg3, r14 are used as a pointer only, not modified
+## arg1, arg3, arg4, r14 are used as a pointer only, not modified
-.macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
+.macro INITIAL_BLOCKS_AVX2 REP num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER
i = (8-\num_initial_blocks)
- j = 0
setreg
-
- mov arg6, %r10 # r10 = AAD
- mov arg7, %r12 # r12 = aadLen
-
-
- mov %r12, %r11
-
- vpxor reg_j, reg_j, reg_j
- vpxor reg_i, reg_i, reg_i
-
- cmp $16, %r11
- jl _get_AAD_rest8\@
-_get_AAD_blocks\@:
- vmovdqu (%r10), reg_i
- vpshufb SHUF_MASK(%rip), reg_i, reg_i
- vpxor reg_i, reg_j, reg_j
- GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6
- add $16, %r10
- sub $16, %r12
- sub $16, %r11
- cmp $16, %r11
- jge _get_AAD_blocks\@
- vmovdqu reg_j, reg_i
- cmp $0, %r11
- je _get_AAD_done\@
-
- vpxor reg_i, reg_i, reg_i
-
- /* read the last <16B of AAD. since we have at least 4B of
- data right after the AAD (the ICV, and maybe some CT), we can
- read 4B/8B blocks safely, and then get rid of the extra stuff */
-_get_AAD_rest8\@:
- cmp $4, %r11
- jle _get_AAD_rest4\@
- movq (%r10), \T1
- add $8, %r10
- sub $8, %r11
- vpslldq $8, \T1, \T1
- vpsrldq $8, reg_i, reg_i
- vpxor \T1, reg_i, reg_i
- jmp _get_AAD_rest8\@
-_get_AAD_rest4\@:
- cmp $0, %r11
- jle _get_AAD_rest0\@
- mov (%r10), %eax
- movq %rax, \T1
- add $4, %r10
- sub $4, %r11
- vpslldq $12, \T1, \T1
- vpsrldq $4, reg_i, reg_i
- vpxor \T1, reg_i, reg_i
-_get_AAD_rest0\@:
- /* finalize: shift out the extra bytes we read, and align
- left. since pslldq can only shift by an immediate, we use
- vpshufb and an array of shuffle masks */
- movq %r12, %r11
- salq $4, %r11
- movdqu aad_shift_arr(%r11), \T1
- vpshufb \T1, reg_i, reg_i
-_get_AAD_rest_final\@:
- vpshufb SHUF_MASK(%rip), reg_i, reg_i
- vpxor reg_j, reg_i, reg_i
- GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6
-
-_get_AAD_done\@:
- # initialize the data pointer offset as zero
- xor %r11d, %r11d
+ vmovdqu AadHash(arg2), reg_i
# start AES for num_initial_blocks blocks
- mov arg5, %rax # rax = *Y0
- vmovdqu (%rax), \CTR # CTR = Y0
- vpshufb SHUF_MASK(%rip), \CTR, \CTR
-
+ vmovdqu CurCount(arg2), \CTR
i = (9-\num_initial_blocks)
setreg
@@ -1799,7 +1980,7 @@ _get_AAD_done\@:
j = 1
setreg
-.rep 9
+.rep \REP
vmovdqa 16*j(arg1), \T_key
i = (9-\num_initial_blocks)
setreg
@@ -1814,7 +1995,7 @@ _get_AAD_done\@:
.endr
- vmovdqa 16*10(arg1), \T_key
+ vmovdqa 16*j(arg1), \T_key
i = (9-\num_initial_blocks)
setreg
.rep \num_initial_blocks
@@ -1826,9 +2007,9 @@ _get_AAD_done\@:
i = (9-\num_initial_blocks)
setreg
.rep \num_initial_blocks
- vmovdqu (arg3, %r11), \T1
+ vmovdqu (arg4, %r11), \T1
vpxor \T1, reg_i, reg_i
- vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for
+ vmovdqu reg_i, (arg3 , %r11) # write back ciphertext for
# num_initial_blocks blocks
add $16, %r11
.if \ENC_DEC == DEC
@@ -1905,7 +2086,7 @@ _get_AAD_done\@:
i = 1
setreg
-.rep 9 # do 9 rounds
+.rep \REP # do REP rounds
vmovdqa 16*i(arg1), \T_key
vaesenc \T_key, \XMM1, \XMM1
vaesenc \T_key, \XMM2, \XMM2
@@ -1930,58 +2111,58 @@ _get_AAD_done\@:
vaesenclast \T_key, \XMM7, \XMM7
vaesenclast \T_key, \XMM8, \XMM8
- vmovdqu (arg3, %r11), \T1
+ vmovdqu (arg4, %r11), \T1
vpxor \T1, \XMM1, \XMM1
- vmovdqu \XMM1, (arg2 , %r11)
+ vmovdqu \XMM1, (arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM1
.endif
- vmovdqu 16*1(arg3, %r11), \T1
+ vmovdqu 16*1(arg4, %r11), \T1
vpxor \T1, \XMM2, \XMM2
- vmovdqu \XMM2, 16*1(arg2 , %r11)
+ vmovdqu \XMM2, 16*1(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM2
.endif
- vmovdqu 16*2(arg3, %r11), \T1
+ vmovdqu 16*2(arg4, %r11), \T1
vpxor \T1, \XMM3, \XMM3
- vmovdqu \XMM3, 16*2(arg2 , %r11)
+ vmovdqu \XMM3, 16*2(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM3
.endif
- vmovdqu 16*3(arg3, %r11), \T1
+ vmovdqu 16*3(arg4, %r11), \T1
vpxor \T1, \XMM4, \XMM4
- vmovdqu \XMM4, 16*3(arg2 , %r11)
+ vmovdqu \XMM4, 16*3(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM4
.endif
- vmovdqu 16*4(arg3, %r11), \T1
+ vmovdqu 16*4(arg4, %r11), \T1
vpxor \T1, \XMM5, \XMM5
- vmovdqu \XMM5, 16*4(arg2 , %r11)
+ vmovdqu \XMM5, 16*4(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM5
.endif
- vmovdqu 16*5(arg3, %r11), \T1
+ vmovdqu 16*5(arg4, %r11), \T1
vpxor \T1, \XMM6, \XMM6
- vmovdqu \XMM6, 16*5(arg2 , %r11)
+ vmovdqu \XMM6, 16*5(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM6
.endif
- vmovdqu 16*6(arg3, %r11), \T1
+ vmovdqu 16*6(arg4, %r11), \T1
vpxor \T1, \XMM7, \XMM7
- vmovdqu \XMM7, 16*6(arg2 , %r11)
+ vmovdqu \XMM7, 16*6(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM7
.endif
- vmovdqu 16*7(arg3, %r11), \T1
+ vmovdqu 16*7(arg4, %r11), \T1
vpxor \T1, \XMM8, \XMM8
- vmovdqu \XMM8, 16*7(arg2 , %r11)
+ vmovdqu \XMM8, 16*7(arg3 , %r11)
.if \ENC_DEC == DEC
vmovdqa \T1, \XMM8
.endif
@@ -2010,9 +2191,9 @@ _initial_blocks_done\@:
# encrypt 8 blocks at a time
# ghash the 8 previously encrypted ciphertext blocks
-# arg1, arg2, arg3 are used as pointers only, not modified
+# arg1, arg3, arg4 are used as pointers only, not modified
# r11 is the data offset value
-.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
+.macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 REP T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC
vmovdqa \XMM1, \T2
vmovdqa \XMM2, TMP2(%rsp)
@@ -2096,7 +2277,7 @@ _initial_blocks_done\@:
#######################################################################
- vmovdqa HashKey_8(arg1), \T5
+ vmovdqu HashKey_8(arg2), \T5
vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1
vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0
vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0
@@ -2114,7 +2295,7 @@ _initial_blocks_done\@:
vaesenc \T1, \XMM8, \XMM8
vmovdqa TMP2(%rsp), \T1
- vmovdqa HashKey_7(arg1), \T5
+ vmovdqu HashKey_7(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
@@ -2140,7 +2321,7 @@ _initial_blocks_done\@:
#######################################################################
vmovdqa TMP3(%rsp), \T1
- vmovdqa HashKey_6(arg1), \T5
+ vmovdqu HashKey_6(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
@@ -2164,7 +2345,7 @@ _initial_blocks_done\@:
vaesenc \T1, \XMM8, \XMM8
vmovdqa TMP4(%rsp), \T1
- vmovdqa HashKey_5(arg1), \T5
+ vmovdqu HashKey_5(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
@@ -2189,7 +2370,7 @@ _initial_blocks_done\@:
vmovdqa TMP5(%rsp), \T1
- vmovdqa HashKey_4(arg1), \T5
+ vmovdqu HashKey_4(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
@@ -2213,7 +2394,7 @@ _initial_blocks_done\@:
vaesenc \T1, \XMM8, \XMM8
vmovdqa TMP6(%rsp), \T1
- vmovdqa HashKey_3(arg1), \T5
+ vmovdqu HashKey_3(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
@@ -2237,7 +2418,7 @@ _initial_blocks_done\@:
vaesenc \T1, \XMM8, \XMM8
vmovdqa TMP7(%rsp), \T1
- vmovdqa HashKey_2(arg1), \T5
+ vmovdqu HashKey_2(arg2), \T5
vpclmulqdq $0x11, \T5, \T1, \T3
vpxor \T3, \T4, \T4
@@ -2264,7 +2445,7 @@ _initial_blocks_done\@:
vaesenc \T5, \XMM8, \XMM8
vmovdqa TMP8(%rsp), \T1
- vmovdqa HashKey(arg1), \T5
+ vmovdqu HashKey(arg2), \T5
vpclmulqdq $0x00, \T5, \T1, \T3
vpxor \T3, \T7, \T7
@@ -2281,17 +2462,34 @@ _initial_blocks_done\@:
vmovdqu 16*10(arg1), \T5
+ i = 11
+ setreg
+.rep (\REP-9)
+ vaesenc \T5, \XMM1, \XMM1
+ vaesenc \T5, \XMM2, \XMM2
+ vaesenc \T5, \XMM3, \XMM3
+ vaesenc \T5, \XMM4, \XMM4
+ vaesenc \T5, \XMM5, \XMM5
+ vaesenc \T5, \XMM6, \XMM6
+ vaesenc \T5, \XMM7, \XMM7
+ vaesenc \T5, \XMM8, \XMM8
+
+ vmovdqu 16*i(arg1), \T5
+ i = i + 1
+ setreg
+.endr
+
i = 0
j = 1
setreg
.rep 8
- vpxor 16*i(arg3, %r11), \T5, \T2
+ vpxor 16*i(arg4, %r11), \T5, \T2
.if \ENC_DEC == ENC
vaesenclast \T2, reg_j, reg_j
.else
vaesenclast \T2, reg_j, \T3
- vmovdqu 16*i(arg3, %r11), reg_j
- vmovdqu \T3, 16*i(arg2, %r11)
+ vmovdqu 16*i(arg4, %r11), reg_j
+ vmovdqu \T3, 16*i(arg3, %r11)
.endif
i = (i+1)
j = (j+1)
@@ -2317,14 +2515,14 @@ _initial_blocks_done\@:
vpxor \T2, \T7, \T7 # first phase of the reduction complete
#######################################################################
.if \ENC_DEC == ENC
- vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer
- vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM1, 16*0(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM2, 16*1(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM3, 16*2(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM4, 16*3(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM5, 16*4(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM6, 16*5(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM7, 16*6(arg3,%r11) # Write to the Ciphertext buffer
+ vmovdqu \XMM8, 16*7(arg3,%r11) # Write to the Ciphertext buffer
.endif
#######################################################################
@@ -2361,7 +2559,7 @@ _initial_blocks_done\@:
## Karatsuba Method
- vmovdqa HashKey_8(arg1), \T5
+ vmovdqu HashKey_8(arg2), \T5
vpshufd $0b01001110, \XMM1, \T2
vpshufd $0b01001110, \T5, \T3
@@ -2375,7 +2573,7 @@ _initial_blocks_done\@:
######################
- vmovdqa HashKey_7(arg1), \T5
+ vmovdqu HashKey_7(arg2), \T5
vpshufd $0b01001110, \XMM2, \T2
vpshufd $0b01001110, \T5, \T3
vpxor \XMM2, \T2, \T2
@@ -2393,7 +2591,7 @@ _initial_blocks_done\@:
######################
- vmovdqa HashKey_6(arg1), \T5
+ vmovdqu HashKey_6(arg2), \T5
vpshufd $0b01001110, \XMM3, \T2
vpshufd $0b01001110, \T5, \T3
vpxor \XMM3, \T2, \T2
@@ -2411,7 +2609,7 @@ _initial_blocks_done\@:
######################
- vmovdqa HashKey_5(arg1), \T5
+ vmovdqu HashKey_5(arg2), \T5
vpshufd $0b01001110, \XMM4, \T2
vpshufd $0b01001110, \T5, \T3
vpxor \XMM4, \T2, \T2
@@ -2429,7 +2627,7 @@ _initial_blocks_done\@:
######################
- vmovdqa HashKey_4(arg1), \T5
+ vmovdqu HashKey_4(arg2), \T5
vpshufd $0b01001110, \XMM5, \T2
vpshufd $0b01001110, \T5, \T3
vpxor \XMM5, \T2, \T2
@@ -2447,7 +2645,7 @@ _initial_blocks_done\@:
######################
- vmovdqa HashKey_3(arg1), \T5
+ vmovdqu HashKey_3(arg2), \T5
vpshufd $0b01001110, \XMM6, \T2
vpshufd $0b01001110, \T5, \T3
vpxor \XMM6, \T2, \T2
@@ -2465,7 +2663,7 @@ _initial_blocks_done\@:
######################
- vmovdqa HashKey_2(arg1), \T5
+ vmovdqu HashKey_2(arg2), \T5
vpshufd $0b01001110, \XMM7, \T2
vpshufd $0b01001110, \T5, \T3
vpxor \XMM7, \T2, \T2
@@ -2483,7 +2681,7 @@ _initial_blocks_done\@:
######################
- vmovdqa HashKey(arg1), \T5
+ vmovdqu HashKey(arg2), \T5
vpshufd $0b01001110, \XMM8, \T2
vpshufd $0b01001110, \T5, \T3
vpxor \XMM8, \T2, \T2
@@ -2536,411 +2734,110 @@ _initial_blocks_done\@:
-# combined for GCM encrypt and decrypt functions
-# clobbering all xmm registers
-# clobbering r10, r11, r12, r13, r14, r15
-.macro GCM_ENC_DEC_AVX2 ENC_DEC
-
- #the number of pushes must equal STACK_OFFSET
- push %r12
- push %r13
- push %r14
- push %r15
-
- mov %rsp, %r14
-
-
-
-
- sub $VARIABLE_OFFSET, %rsp
- and $~63, %rsp # align rsp to 64 bytes
-
-
- vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey
-
- mov arg4, %r13 # save the number of bytes of plaintext/ciphertext
- and $-16, %r13 # r13 = r13 - (r13 mod 16)
-
- mov %r13, %r12
- shr $4, %r12
- and $7, %r12
- jz _initial_num_blocks_is_0\@
-
- cmp $7, %r12
- je _initial_num_blocks_is_7\@
- cmp $6, %r12
- je _initial_num_blocks_is_6\@
- cmp $5, %r12
- je _initial_num_blocks_is_5\@
- cmp $4, %r12
- je _initial_num_blocks_is_4\@
- cmp $3, %r12
- je _initial_num_blocks_is_3\@
- cmp $2, %r12
- je _initial_num_blocks_is_2\@
-
- jmp _initial_num_blocks_is_1\@
-
-_initial_num_blocks_is_7\@:
- INITIAL_BLOCKS_AVX2 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*7, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_6\@:
- INITIAL_BLOCKS_AVX2 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*6, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_5\@:
- INITIAL_BLOCKS_AVX2 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*5, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_4\@:
- INITIAL_BLOCKS_AVX2 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*4, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_3\@:
- INITIAL_BLOCKS_AVX2 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*3, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_2\@:
- INITIAL_BLOCKS_AVX2 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*2, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_1\@:
- INITIAL_BLOCKS_AVX2 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
- sub $16*1, %r13
- jmp _initial_blocks_encrypted\@
-
-_initial_num_blocks_is_0\@:
- INITIAL_BLOCKS_AVX2 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC
-
-
-_initial_blocks_encrypted\@:
- cmp $0, %r13
- je _zero_cipher_left\@
-
- sub $128, %r13
- je _eight_cipher_left\@
-
-
-
-
- vmovd %xmm9, %r15d
- and $255, %r15d
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-
-
-_encrypt_by_8_new\@:
- cmp $(255-8), %r15d
- jg _encrypt_by_8\@
-
-
-
- add $8, %r15b
- GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC
- add $128, %r11
- sub $128, %r13
- jne _encrypt_by_8_new\@
-
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- jmp _eight_cipher_left\@
-
-_encrypt_by_8\@:
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- add $8, %r15b
- GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- add $128, %r11
- sub $128, %r13
- jne _encrypt_by_8_new\@
-
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
-
-
-
-
-_eight_cipher_left\@:
- GHASH_LAST_8_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8
-
-
-_zero_cipher_left\@:
- cmp $16, arg4
- jl _only_less_than_16\@
-
- mov arg4, %r13
- and $15, %r13 # r13 = (arg4 mod 16)
-
- je _multiple_of_16_bytes\@
-
- # handle the last <16 Byte block seperately
-
-
- vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
-
- sub $16, %r11
- add %r13, %r11
- vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block
-
- lea SHIFT_MASK+16(%rip), %r12
- sub %r13, %r12 # adjust the shuffle mask pointer
- # to be able to shift 16-r13 bytes
- # (r13 is the number of bytes in plaintext mod 16)
- vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask
- vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes
- jmp _final_ghash_mul\@
-
-_only_less_than_16\@:
- # check for 0 length
- mov arg4, %r13
- and $15, %r13 # r13 = (arg4 mod 16)
-
- je _multiple_of_16_bytes\@
-
- # handle the last <16 Byte block seperately
-
-
- vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn)
-
-
- lea SHIFT_MASK+16(%rip), %r12
- sub %r13, %r12 # adjust the shuffle mask pointer to be
- # able to shift 16-r13 bytes (r13 is the
- # number of bytes in plaintext mod 16)
-
-_get_last_16_byte_loop\@:
- movb (arg3, %r11), %al
- movb %al, TMP1 (%rsp , %r11)
- add $1, %r11
- cmp %r13, %r11
- jne _get_last_16_byte_loop\@
-
- vmovdqu TMP1(%rsp), %xmm1
-
- sub $16, %r11
-
-_final_ghash_mul\@:
- .if \ENC_DEC == DEC
- vmovdqa %xmm1, %xmm2
- vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
- vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9
- vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
- vpand %xmm1, %xmm2, %xmm2
- vpshufb SHUF_MASK(%rip), %xmm2, %xmm2
- vpxor %xmm2, %xmm14, %xmm14
- #GHASH computation for the last <16 Byte block
- GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
- sub %r13, %r11
- add $16, %r11
- .else
- vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn)
- vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9
- vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
- vpxor %xmm9, %xmm14, %xmm14
- #GHASH computation for the last <16 Byte block
- GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
- sub %r13, %r11
- add $16, %r11
- vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext
- .endif
-
-
- #############################
- # output r13 Bytes
- vmovq %xmm9, %rax
- cmp $8, %r13
- jle _less_than_8_bytes_left\@
-
- mov %rax, (arg2 , %r11)
- add $8, %r11
- vpsrldq $8, %xmm9, %xmm9
- vmovq %xmm9, %rax
- sub $8, %r13
-
-_less_than_8_bytes_left\@:
- movb %al, (arg2 , %r11)
- add $1, %r11
- shr $8, %rax
- sub $1, %r13
- jne _less_than_8_bytes_left\@
- #############################
-
-_multiple_of_16_bytes\@:
- mov arg7, %r12 # r12 = aadLen (number of bytes)
- shl $3, %r12 # convert into number of bits
- vmovd %r12d, %xmm15 # len(A) in xmm15
-
- shl $3, arg4 # len(C) in bits (*128)
- vmovq arg4, %xmm1
- vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000
- vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C)
-
- vpxor %xmm15, %xmm14, %xmm14
- GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation
- vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap
-
- mov arg5, %rax # rax = *Y0
- vmovdqu (%rax), %xmm9 # xmm9 = Y0
-
- ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0)
-
- vpxor %xmm14, %xmm9, %xmm9
-
-
-
-_return_T\@:
- mov arg8, %r10 # r10 = authTag
- mov arg9, %r11 # r11 = auth_tag_len
-
- cmp $16, %r11
- je _T_16\@
-
- cmp $8, %r11
- jl _T_4\@
-
-_T_8\@:
- vmovq %xmm9, %rax
- mov %rax, (%r10)
- add $8, %r10
- sub $8, %r11
- vpsrldq $8, %xmm9, %xmm9
- cmp $0, %r11
- je _return_T_done\@
-_T_4\@:
- vmovd %xmm9, %eax
- mov %eax, (%r10)
- add $4, %r10
- sub $4, %r11
- vpsrldq $4, %xmm9, %xmm9
- cmp $0, %r11
- je _return_T_done\@
-_T_123\@:
- vmovd %xmm9, %eax
- cmp $2, %r11
- jl _T_1\@
- mov %ax, (%r10)
- cmp $2, %r11
- je _return_T_done\@
- add $2, %r10
- sar $16, %eax
-_T_1\@:
- mov %al, (%r10)
- jmp _return_T_done\@
-
-_T_16\@:
- vmovdqu %xmm9, (%r10)
-
-_return_T_done\@:
- mov %r14, %rsp
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
-.endm
-
-
#############################################################
-#void aesni_gcm_precomp_avx_gen4
+#void aesni_gcm_init_avx_gen4
# (gcm_data *my_ctx_data,
-# u8 *hash_subkey)# /* H, the Hash sub key input.
-# Data starts on a 16-byte boundary. */
+# gcm_context_data *data,
+# u8 *iv, /* Pre-counter block j0: 4 byte salt
+# (from Security Association) concatenated with 8 byte
+# Initialisation Vector (from IPSec ESP Payload)
+# concatenated with 0x00000001. 16-byte aligned pointer. */
+# u8 *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
+# const u8 *aad, /* Additional Authentication Data (AAD)*/
+# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
#############################################################
-ENTRY(aesni_gcm_precomp_avx_gen4)
- #the number of pushes must equal STACK_OFFSET
- push %r12
- push %r13
- push %r14
- push %r15
-
- mov %rsp, %r14
-
-
-
- sub $VARIABLE_OFFSET, %rsp
- and $~63, %rsp # align rsp to 64 bytes
-
- vmovdqu (arg2), %xmm6 # xmm6 = HashKey
-
- vpshufb SHUF_MASK(%rip), %xmm6, %xmm6
- ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
- vmovdqa %xmm6, %xmm2
- vpsllq $1, %xmm6, %xmm6
- vpsrlq $63, %xmm2, %xmm2
- vmovdqa %xmm2, %xmm1
- vpslldq $8, %xmm2, %xmm2
- vpsrldq $8, %xmm1, %xmm1
- vpor %xmm2, %xmm6, %xmm6
- #reduction
- vpshufd $0b00100100, %xmm1, %xmm2
- vpcmpeqd TWOONE(%rip), %xmm2, %xmm2
- vpand POLY(%rip), %xmm2, %xmm2
- vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly
- #######################################################################
- vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly
-
-
- PRECOMPUTE_AVX2 %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
-
- mov %r14, %rsp
-
- pop %r15
- pop %r14
- pop %r13
- pop %r12
+ENTRY(aesni_gcm_init_avx_gen4)
+ FUNC_SAVE
+ INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
+ FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_precomp_avx_gen4)
-
+ENDPROC(aesni_gcm_init_avx_gen4)
###############################################################################
#void aesni_gcm_enc_avx_gen4(
# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
+# gcm_context_data *data,
# u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */
# const u8 *in, /* Plaintext input */
-# u64 plaintext_len, /* Length of data in Bytes for encryption. */
-# u8 *iv, /* Pre-counter block j0: 4 byte salt
-# (from Security Association) concatenated with 8 byte
-# Initialisation Vector (from IPSec ESP Payload)
-# concatenated with 0x00000001. 16-byte aligned pointer. */
-# const u8 *aad, /* Additional Authentication Data (AAD)*/
-# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
-# u8 *auth_tag, /* Authenticated Tag output. */
-# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
-# Valid values are 16 (most likely), 12 or 8. */
+# u64 plaintext_len) /* Length of data in Bytes for encryption. */
###############################################################################
-ENTRY(aesni_gcm_enc_avx_gen4)
- GCM_ENC_DEC_AVX2 ENC
+ENTRY(aesni_gcm_enc_update_avx_gen4)
+ FUNC_SAVE
+ mov keysize,%eax
+ cmp $32, %eax
+ je key_256_enc_update4
+ cmp $16, %eax
+ je key_128_enc_update4
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11
+ FUNC_RESTORE
+ ret
+key_128_enc_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9
+ FUNC_RESTORE
+ ret
+key_256_enc_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
+ FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_enc_avx_gen4)
+ENDPROC(aesni_gcm_enc_update_avx_gen4)
###############################################################################
-#void aesni_gcm_dec_avx_gen4(
+#void aesni_gcm_dec_update_avx_gen4(
# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
+# gcm_context_data *data,
# u8 *out, /* Plaintext output. Decrypt in-place is allowed. */
# const u8 *in, /* Ciphertext input */
-# u64 plaintext_len, /* Length of data in Bytes for encryption. */
-# u8 *iv, /* Pre-counter block j0: 4 byte salt
-# (from Security Association) concatenated with 8 byte
-# Initialisation Vector (from IPSec ESP Payload)
-# concatenated with 0x00000001. 16-byte aligned pointer. */
-# const u8 *aad, /* Additional Authentication Data (AAD)*/
-# u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
+# u64 plaintext_len) /* Length of data in Bytes for encryption. */
+###############################################################################
+ENTRY(aesni_gcm_dec_update_avx_gen4)
+ FUNC_SAVE
+ mov keysize,%eax
+ cmp $32, %eax
+ je key_256_dec_update4
+ cmp $16, %eax
+ je key_128_dec_update4
+ # must be 192
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11
+ FUNC_RESTORE
+ ret
+key_128_dec_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9
+ FUNC_RESTORE
+ ret
+key_256_dec_update4:
+ GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_dec_update_avx_gen4)
+
+###############################################################################
+#void aesni_gcm_finalize_avx_gen4(
+# gcm_data *my_ctx_data, /* aligned to 16 Bytes */
+# gcm_context_data *data,
# u8 *auth_tag, /* Authenticated Tag output. */
# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
-# Valid values are 16 (most likely), 12 or 8. */
+# Valid values are 16 (most likely), 12 or 8. */
###############################################################################
-ENTRY(aesni_gcm_dec_avx_gen4)
- GCM_ENC_DEC_AVX2 DEC
- ret
-ENDPROC(aesni_gcm_dec_avx_gen4)
+ENTRY(aesni_gcm_finalize_avx_gen4)
+ FUNC_SAVE
+ mov keysize,%eax
+ cmp $32, %eax
+ je key_256_finalize4
+ cmp $16, %eax
+ je key_128_finalize4
+ # must be 192
+ GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4
+ FUNC_RESTORE
+ ret
+key_128_finalize4:
+ GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4
+ FUNC_RESTORE
+ ret
+key_256_finalize4:
+ GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
+ FUNC_RESTORE
+ ret
+ENDPROC(aesni_gcm_finalize_avx_gen4)
#endif /* CONFIG_AS_AVX2 */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 661f7daf43da..1321700d6647 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -84,7 +84,7 @@ struct gcm_context_data {
u8 current_counter[GCM_BLOCK_LEN];
u64 partial_block_len;
u64 unused;
- u8 hash_keys[GCM_BLOCK_LEN * 8];
+ u8 hash_keys[GCM_BLOCK_LEN * 16];
};
asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
@@ -175,6 +175,32 @@ asmlinkage void aesni_gcm_finalize(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
+static struct aesni_gcm_tfm_s {
+void (*init)(void *ctx,
+ struct gcm_context_data *gdata,
+ u8 *iv,
+ u8 *hash_subkey, const u8 *aad,
+ unsigned long aad_len);
+void (*enc_update)(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
+ const u8 *in,
+ unsigned long plaintext_len);
+void (*dec_update)(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
+ const u8 *in,
+ unsigned long ciphertext_len);
+void (*finalize)(void *ctx,
+ struct gcm_context_data *gdata,
+ u8 *auth_tag, unsigned long auth_tag_len);
+} *aesni_gcm_tfm;
+
+struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = {
+ .init = &aesni_gcm_init,
+ .enc_update = &aesni_gcm_enc_update,
+ .dec_update = &aesni_gcm_dec_update,
+ .finalize = &aesni_gcm_finalize,
+};
+
#ifdef CONFIG_AS_AVX
asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
@@ -183,136 +209,94 @@ asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
/*
- * asmlinkage void aesni_gcm_precomp_avx_gen2()
+ * asmlinkage void aesni_gcm_init_avx_gen2()
* gcm_data *my_ctx_data, context data
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
*/
-asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey);
+asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data,
+ struct gcm_context_data *gdata,
+ u8 *iv,
+ u8 *hash_subkey,
+ const u8 *aad,
+ unsigned long aad_len);
+
+asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
+ const u8 *in, unsigned long plaintext_len);
+asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
+ const u8 *in,
+ unsigned long ciphertext_len);
+asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
+ struct gcm_context_data *gdata,
+ u8 *auth_tag, unsigned long auth_tag_len);
-asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
-asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
-static void aesni_gcm_enc_avx(void *ctx,
- struct gcm_context_data *data, u8 *out,
- const u8 *in, unsigned long plaintext_len, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len)
-{
- struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
- if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
- aesni_gcm_enc(ctx, data, out, in,
- plaintext_len, iv, hash_subkey, aad,
- aad_len, auth_tag, auth_tag_len);
- } else {
- aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
- aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
- aad_len, auth_tag, auth_tag_len);
- }
-}
+struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
+ .init = &aesni_gcm_init_avx_gen2,
+ .enc_update = &aesni_gcm_enc_update_avx_gen2,
+ .dec_update = &aesni_gcm_dec_update_avx_gen2,
+ .finalize = &aesni_gcm_finalize_avx_gen2,
+};
-static void aesni_gcm_dec_avx(void *ctx,
- struct gcm_context_data *data, u8 *out,
- const u8 *in, unsigned long ciphertext_len, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len)
-{
- struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
- if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
- aesni_gcm_dec(ctx, data, out, in,
- ciphertext_len, iv, hash_subkey, aad,
- aad_len, auth_tag, auth_tag_len);
- } else {
- aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
- aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
- aad_len, auth_tag, auth_tag_len);
- }
-}
#endif
#ifdef CONFIG_AS_AVX2
/*
- * asmlinkage void aesni_gcm_precomp_avx_gen4()
+ * asmlinkage void aesni_gcm_init_avx_gen4()
* gcm_data *my_ctx_data, context data
* u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
*/
-asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey);
+asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data,
+ struct gcm_context_data *gdata,
+ u8 *iv,
+ u8 *hash_subkey,
+ const u8 *aad,
+ unsigned long aad_len);
+
+asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
+ const u8 *in, unsigned long plaintext_len);
+asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
+ const u8 *in,
+ unsigned long ciphertext_len);
+asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
+ struct gcm_context_data *gdata,
+ u8 *auth_tag, unsigned long auth_tag_len);
-asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long plaintext_len, u8 *iv,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
-asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out,
+asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx,
+ struct gcm_context_data *gdata, u8 *out,
const u8 *in, unsigned long ciphertext_len, u8 *iv,
const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len);
-static void aesni_gcm_enc_avx2(void *ctx,
- struct gcm_context_data *data, u8 *out,
- const u8 *in, unsigned long plaintext_len, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len)
-{
- struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
- if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
- aesni_gcm_enc(ctx, data, out, in,
- plaintext_len, iv, hash_subkey, aad,
- aad_len, auth_tag, auth_tag_len);
- } else if (plaintext_len < AVX_GEN4_OPTSIZE) {
- aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
- aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad,
- aad_len, auth_tag, auth_tag_len);
- } else {
- aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
- aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad,
- aad_len, auth_tag, auth_tag_len);
- }
-}
+struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
+ .init = &aesni_gcm_init_avx_gen4,
+ .enc_update = &aesni_gcm_enc_update_avx_gen4,
+ .dec_update = &aesni_gcm_dec_update_avx_gen4,
+ .finalize = &aesni_gcm_finalize_avx_gen4,
+};
-static void aesni_gcm_dec_avx2(void *ctx,
- struct gcm_context_data *data, u8 *out,
- const u8 *in, unsigned long ciphertext_len, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len)
-{
- struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
- if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
- aesni_gcm_dec(ctx, data, out, in,
- ciphertext_len, iv, hash_subkey,
- aad, aad_len, auth_tag, auth_tag_len);
- } else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
- aesni_gcm_precomp_avx_gen2(ctx, hash_subkey);
- aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad,
- aad_len, auth_tag, auth_tag_len);
- } else {
- aesni_gcm_precomp_avx_gen4(ctx, hash_subkey);
- aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad,
- aad_len, auth_tag, auth_tag_len);
- }
-}
#endif
-static void (*aesni_gcm_enc_tfm)(void *ctx,
- struct gcm_context_data *data, u8 *out,
- const u8 *in, unsigned long plaintext_len,
- u8 *iv, u8 *hash_subkey, const u8 *aad,
- unsigned long aad_len, u8 *auth_tag,
- unsigned long auth_tag_len);
-
-static void (*aesni_gcm_dec_tfm)(void *ctx,
- struct gcm_context_data *data, u8 *out,
- const u8 *in, unsigned long ciphertext_len,
- u8 *iv, u8 *hash_subkey, const u8 *aad,
- unsigned long aad_len, u8 *auth_tag,
- unsigned long auth_tag_len);
-
static inline struct
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
{
@@ -794,6 +778,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+ struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
struct gcm_context_data data AESNI_ALIGN_ATTR;
struct scatter_walk dst_sg_walk = {};
unsigned long left = req->cryptlen;
@@ -811,6 +796,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
if (!enc)
left -= auth_tag_len;
+#ifdef CONFIG_AS_AVX2
+ if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4)
+ gcm_tfm = &aesni_gcm_tfm_avx_gen2;
+#endif
+#ifdef CONFIG_AS_AVX
+ if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2)
+ gcm_tfm = &aesni_gcm_tfm_sse;
+#endif
+
/* Linearize assoc, if not already linear */
if (req->src->length >= assoclen && req->src->length &&
(!PageHighMem(sg_page(req->src)) ||
@@ -835,7 +829,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
}
kernel_fpu_begin();
- aesni_gcm_init(aes_ctx, &data, iv,
+ gcm_tfm->init(aes_ctx, &data, iv,
hash_subkey, assoc, assoclen);
if (req->src != req->dst) {
while (left) {
@@ -846,10 +840,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
len = min(srclen, dstlen);
if (len) {
if (enc)
- aesni_gcm_enc_update(aes_ctx, &data,
+ gcm_tfm->enc_update(aes_ctx, &data,
dst, src, len);
else
- aesni_gcm_dec_update(aes_ctx, &data,
+ gcm_tfm->dec_update(aes_ctx, &data,
dst, src, len);
}
left -= len;
@@ -867,10 +861,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
len = scatterwalk_clamp(&src_sg_walk, left);
if (len) {
if (enc)
- aesni_gcm_enc_update(aes_ctx, &data,
+ gcm_tfm->enc_update(aes_ctx, &data,
src, src, len);
else
- aesni_gcm_dec_update(aes_ctx, &data,
+ gcm_tfm->dec_update(aes_ctx, &data,
src, src, len);
}
left -= len;
@@ -879,7 +873,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
scatterwalk_done(&src_sg_walk, 1, left);
}
}
- aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len);
+ gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len);
kernel_fpu_end();
if (!assocmem)
@@ -912,147 +906,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
- u8 one_entry_in_sg = 0;
- u8 *src, *dst, *assoc;
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- unsigned long auth_tag_len = crypto_aead_authsize(tfm);
- struct scatter_walk src_sg_walk;
- struct scatter_walk dst_sg_walk = {};
- struct gcm_context_data data AESNI_ALIGN_ATTR;
-
- if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
- aesni_gcm_enc_tfm == aesni_gcm_enc ||
- req->cryptlen < AVX_GEN2_OPTSIZE) {
- return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
- aes_ctx);
- }
- if (sg_is_last(req->src) &&
- (!PageHighMem(sg_page(req->src)) ||
- req->src->offset + req->src->length <= PAGE_SIZE) &&
- sg_is_last(req->dst) &&
- (!PageHighMem(sg_page(req->dst)) ||
- req->dst->offset + req->dst->length <= PAGE_SIZE)) {
- one_entry_in_sg = 1;
- scatterwalk_start(&src_sg_walk, req->src);
- assoc = scatterwalk_map(&src_sg_walk);
- src = assoc + req->assoclen;
- dst = src;
- if (unlikely(req->src != req->dst)) {
- scatterwalk_start(&dst_sg_walk, req->dst);
- dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
- }
- } else {
- /* Allocate memory for src, dst, assoc */
- assoc = kmalloc(req->cryptlen + auth_tag_len + req->assoclen,
- GFP_ATOMIC);
- if (unlikely(!assoc))
- return -ENOMEM;
- scatterwalk_map_and_copy(assoc, req->src, 0,
- req->assoclen + req->cryptlen, 0);
- src = assoc + req->assoclen;
- dst = src;
- }
-
- kernel_fpu_begin();
- aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv,
- hash_subkey, assoc, assoclen,
- dst + req->cryptlen, auth_tag_len);
- kernel_fpu_end();
-
- /* The authTag (aka the Integrity Check Value) needs to be written
- * back to the packet. */
- if (one_entry_in_sg) {
- if (unlikely(req->src != req->dst)) {
- scatterwalk_unmap(dst - req->assoclen);
- scatterwalk_advance(&dst_sg_walk, req->dst->length);
- scatterwalk_done(&dst_sg_walk, 1, 0);
- }
- scatterwalk_unmap(assoc);
- scatterwalk_advance(&src_sg_walk, req->src->length);
- scatterwalk_done(&src_sg_walk, req->src == req->dst, 0);
- } else {
- scatterwalk_map_and_copy(dst, req->dst, req->assoclen,
- req->cryptlen + auth_tag_len, 1);
- kfree(assoc);
- }
- return 0;
+ return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
+ aes_ctx);
}
static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
- u8 one_entry_in_sg = 0;
- u8 *src, *dst, *assoc;
- unsigned long tempCipherLen = 0;
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- unsigned long auth_tag_len = crypto_aead_authsize(tfm);
- u8 authTag[16];
- struct scatter_walk src_sg_walk;
- struct scatter_walk dst_sg_walk = {};
- struct gcm_context_data data AESNI_ALIGN_ATTR;
- int retval = 0;
-
- if (((struct crypto_aes_ctx *)aes_ctx)->key_length != AES_KEYSIZE_128 ||
- aesni_gcm_enc_tfm == aesni_gcm_enc ||
- req->cryptlen < AVX_GEN2_OPTSIZE) {
- return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
- aes_ctx);
- }
- tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
-
- if (sg_is_last(req->src) &&
- (!PageHighMem(sg_page(req->src)) ||
- req->src->offset + req->src->length <= PAGE_SIZE) &&
- sg_is_last(req->dst) && req->dst->length &&
- (!PageHighMem(sg_page(req->dst)) ||
- req->dst->offset + req->dst->length <= PAGE_SIZE)) {
- one_entry_in_sg = 1;
- scatterwalk_start(&src_sg_walk, req->src);
- assoc = scatterwalk_map(&src_sg_walk);
- src = assoc + req->assoclen;
- dst = src;
- if (unlikely(req->src != req->dst)) {
- scatterwalk_start(&dst_sg_walk, req->dst);
- dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
- }
- } else {
- /* Allocate memory for src, dst, assoc */
- assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
- if (!assoc)
- return -ENOMEM;
- scatterwalk_map_and_copy(assoc, req->src, 0,
- req->assoclen + req->cryptlen, 0);
- src = assoc + req->assoclen;
- dst = src;
- }
-
-
- kernel_fpu_begin();
- aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv,
- hash_subkey, assoc, assoclen,
- authTag, auth_tag_len);
- kernel_fpu_end();
-
- /* Compare generated tag with passed in tag. */
- retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ?
- -EBADMSG : 0;
-
- if (one_entry_in_sg) {
- if (unlikely(req->src != req->dst)) {
- scatterwalk_unmap(dst - req->assoclen);
- scatterwalk_advance(&dst_sg_walk, req->dst->length);
- scatterwalk_done(&dst_sg_walk, 1, 0);
- }
- scatterwalk_unmap(assoc);
- scatterwalk_advance(&src_sg_walk, req->src->length);
- scatterwalk_done(&src_sg_walk, req->src == req->dst, 0);
- } else {
- scatterwalk_map_and_copy(dst, req->dst, req->assoclen,
- tempCipherLen, 1);
- kfree(assoc);
- }
- return retval;
-
+ return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
+ aes_ctx);
}
static int helper_rfc4106_encrypt(struct aead_request *req)
@@ -1420,21 +1282,18 @@ static int __init aesni_init(void)
#ifdef CONFIG_AS_AVX2
if (boot_cpu_has(X86_FEATURE_AVX2)) {
pr_info("AVX2 version of gcm_enc/dec engaged.\n");
- aesni_gcm_enc_tfm = aesni_gcm_enc_avx2;
- aesni_gcm_dec_tfm = aesni_gcm_dec_avx2;
+ aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4;
} else
#endif
#ifdef CONFIG_AS_AVX
if (boot_cpu_has(X86_FEATURE_AVX)) {
pr_info("AVX version of gcm_enc/dec engaged.\n");
- aesni_gcm_enc_tfm = aesni_gcm_enc_avx;
- aesni_gcm_dec_tfm = aesni_gcm_dec_avx;
+ aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2;
} else
#endif
{
pr_info("SSE version of gcm_enc/dec engaged.\n");
- aesni_gcm_enc_tfm = aesni_gcm_enc;
- aesni_gcm_dec_tfm = aesni_gcm_dec;
+ aesni_gcm_tfm = &aesni_gcm_tfm_sse;
}
aesni_ctr_enc_tfm = aesni_ctr_enc;
#ifdef CONFIG_AS_AVX
diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S
new file mode 100644
index 000000000000..32903fd450af
--- /dev/null
+++ b/arch/x86/crypto/chacha-avx2-x86_64.S
@@ -0,0 +1,1025 @@
+/*
+ * ChaCha 256-bit cipher algorithm, x64 AVX2 functions
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.ROT8, "aM", @progbits, 32
+.align 32
+ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
+ .octa 0x0e0d0c0f0a09080b0605040702010003
+
+.section .rodata.cst32.ROT16, "aM", @progbits, 32
+.align 32
+ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
+ .octa 0x0d0c0f0e09080b0a0504070601000302
+
+.section .rodata.cst32.CTRINC, "aM", @progbits, 32
+.align 32
+CTRINC: .octa 0x00000003000000020000000100000000
+ .octa 0x00000007000000060000000500000004
+
+.section .rodata.cst32.CTR2BL, "aM", @progbits, 32
+.align 32
+CTR2BL: .octa 0x00000000000000000000000000000000
+ .octa 0x00000000000000000000000000000001
+
+.section .rodata.cst32.CTR4BL, "aM", @progbits, 32
+.align 32
+CTR4BL: .octa 0x00000000000000000000000000000002
+ .octa 0x00000000000000000000000000000003
+
+.text
+
+ENTRY(chacha_2block_xor_avx2)
+ # %rdi: Input state matrix, s
+ # %rsi: up to 2 data blocks output, o
+ # %rdx: up to 2 data blocks input, i
+ # %rcx: input/output length in bytes
+ # %r8d: nrounds
+
+ # This function encrypts two ChaCha blocks by loading the state
+ # matrix twice across four AVX registers. It performs matrix operations
+ # on four words in each matrix in parallel, but requires shuffling to
+ # rearrange the words after each round.
+
+ vzeroupper
+
+ # x0..3[0-2] = s0..3
+ vbroadcasti128 0x00(%rdi),%ymm0
+ vbroadcasti128 0x10(%rdi),%ymm1
+ vbroadcasti128 0x20(%rdi),%ymm2
+ vbroadcasti128 0x30(%rdi),%ymm3
+
+ vpaddd CTR2BL(%rip),%ymm3,%ymm3
+
+ vmovdqa %ymm0,%ymm8
+ vmovdqa %ymm1,%ymm9
+ vmovdqa %ymm2,%ymm10
+ vmovdqa %ymm3,%ymm11
+
+ vmovdqa ROT8(%rip),%ymm4
+ vmovdqa ROT16(%rip),%ymm5
+
+ mov %rcx,%rax
+
+.Ldoubleround:
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vpshufb %ymm5,%ymm3,%ymm3
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vmovdqa %ymm1,%ymm6
+ vpslld $12,%ymm6,%ymm6
+ vpsrld $20,%ymm1,%ymm1
+ vpor %ymm6,%ymm1,%ymm1
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vpshufb %ymm4,%ymm3,%ymm3
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vmovdqa %ymm1,%ymm7
+ vpslld $7,%ymm7,%ymm7
+ vpsrld $25,%ymm1,%ymm1
+ vpor %ymm7,%ymm1,%ymm1
+
+ # x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+ vpshufd $0x39,%ymm1,%ymm1
+ # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vpshufd $0x4e,%ymm2,%ymm2
+ # x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+ vpshufd $0x93,%ymm3,%ymm3
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vpshufb %ymm5,%ymm3,%ymm3
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vmovdqa %ymm1,%ymm6
+ vpslld $12,%ymm6,%ymm6
+ vpsrld $20,%ymm1,%ymm1
+ vpor %ymm6,%ymm1,%ymm1
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vpshufb %ymm4,%ymm3,%ymm3
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vmovdqa %ymm1,%ymm7
+ vpslld $7,%ymm7,%ymm7
+ vpsrld $25,%ymm1,%ymm1
+ vpor %ymm7,%ymm1,%ymm1
+
+ # x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+ vpshufd $0x93,%ymm1,%ymm1
+ # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vpshufd $0x4e,%ymm2,%ymm2
+ # x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+ vpshufd $0x39,%ymm3,%ymm3
+
+ sub $2,%r8d
+ jnz .Ldoubleround
+
+ # o0 = i0 ^ (x0 + s0)
+ vpaddd %ymm8,%ymm0,%ymm7
+ cmp $0x10,%rax
+ jl .Lxorpart2
+ vpxor 0x00(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x00(%rsi)
+ vextracti128 $1,%ymm7,%xmm0
+ # o1 = i1 ^ (x1 + s1)
+ vpaddd %ymm9,%ymm1,%ymm7
+ cmp $0x20,%rax
+ jl .Lxorpart2
+ vpxor 0x10(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x10(%rsi)
+ vextracti128 $1,%ymm7,%xmm1
+ # o2 = i2 ^ (x2 + s2)
+ vpaddd %ymm10,%ymm2,%ymm7
+ cmp $0x30,%rax
+ jl .Lxorpart2
+ vpxor 0x20(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x20(%rsi)
+ vextracti128 $1,%ymm7,%xmm2
+ # o3 = i3 ^ (x3 + s3)
+ vpaddd %ymm11,%ymm3,%ymm7
+ cmp $0x40,%rax
+ jl .Lxorpart2
+ vpxor 0x30(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x30(%rsi)
+ vextracti128 $1,%ymm7,%xmm3
+
+ # xor and write second block
+ vmovdqa %xmm0,%xmm7
+ cmp $0x50,%rax
+ jl .Lxorpart2
+ vpxor 0x40(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x40(%rsi)
+
+ vmovdqa %xmm1,%xmm7
+ cmp $0x60,%rax
+ jl .Lxorpart2
+ vpxor 0x50(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x50(%rsi)
+
+ vmovdqa %xmm2,%xmm7
+ cmp $0x70,%rax
+ jl .Lxorpart2
+ vpxor 0x60(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x60(%rsi)
+
+ vmovdqa %xmm3,%xmm7
+ cmp $0x80,%rax
+ jl .Lxorpart2
+ vpxor 0x70(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x70(%rsi)
+
+.Ldone2:
+ vzeroupper
+ ret
+
+.Lxorpart2:
+ # xor remaining bytes from partial register into output
+ mov %rax,%r9
+ and $0x0f,%r9
+ jz .Ldone2
+ and $~0x0f,%rax
+
+ mov %rsi,%r11
+
+ lea 8(%rsp),%r10
+ sub $0x10,%rsp
+ and $~31,%rsp
+
+ lea (%rdx,%rax),%rsi
+ mov %rsp,%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ vpxor 0x00(%rsp),%xmm7,%xmm7
+ vmovdqa %xmm7,0x00(%rsp)
+
+ mov %rsp,%rsi
+ lea (%r11,%rax),%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ lea -8(%r10),%rsp
+ jmp .Ldone2
+
+ENDPROC(chacha_2block_xor_avx2)
+
+ENTRY(chacha_4block_xor_avx2)
+ # %rdi: Input state matrix, s
+ # %rsi: up to 4 data blocks output, o
+ # %rdx: up to 4 data blocks input, i
+ # %rcx: input/output length in bytes
+ # %r8d: nrounds
+
+ # This function encrypts four ChaCha blocks by loading the state
+ # matrix four times across eight AVX registers. It performs matrix
+ # operations on four words in two matrices in parallel, sequentially
+ # to the operations on the four words of the other two matrices. The
+ # required word shuffling has a rather high latency, we can do the
+ # arithmetic on two matrix-pairs without much slowdown.
+
+ vzeroupper
+
+ # x0..3[0-4] = s0..3
+ vbroadcasti128 0x00(%rdi),%ymm0
+ vbroadcasti128 0x10(%rdi),%ymm1
+ vbroadcasti128 0x20(%rdi),%ymm2
+ vbroadcasti128 0x30(%rdi),%ymm3
+
+ vmovdqa %ymm0,%ymm4
+ vmovdqa %ymm1,%ymm5
+ vmovdqa %ymm2,%ymm6
+ vmovdqa %ymm3,%ymm7
+
+ vpaddd CTR2BL(%rip),%ymm3,%ymm3
+ vpaddd CTR4BL(%rip),%ymm7,%ymm7
+
+ vmovdqa %ymm0,%ymm11
+ vmovdqa %ymm1,%ymm12
+ vmovdqa %ymm2,%ymm13
+ vmovdqa %ymm3,%ymm14
+ vmovdqa %ymm7,%ymm15
+
+ vmovdqa ROT8(%rip),%ymm8
+ vmovdqa ROT16(%rip),%ymm9
+
+ mov %rcx,%rax
+
+.Ldoubleround4:
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vpshufb %ymm9,%ymm3,%ymm3
+
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpxor %ymm4,%ymm7,%ymm7
+ vpshufb %ymm9,%ymm7,%ymm7
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vmovdqa %ymm1,%ymm10
+ vpslld $12,%ymm10,%ymm10
+ vpsrld $20,%ymm1,%ymm1
+ vpor %ymm10,%ymm1,%ymm1
+
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovdqa %ymm5,%ymm10
+ vpslld $12,%ymm10,%ymm10
+ vpsrld $20,%ymm5,%ymm5
+ vpor %ymm10,%ymm5,%ymm5
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vpshufb %ymm8,%ymm3,%ymm3
+
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpxor %ymm4,%ymm7,%ymm7
+ vpshufb %ymm8,%ymm7,%ymm7
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vmovdqa %ymm1,%ymm10
+ vpslld $7,%ymm10,%ymm10
+ vpsrld $25,%ymm1,%ymm1
+ vpor %ymm10,%ymm1,%ymm1
+
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovdqa %ymm5,%ymm10
+ vpslld $7,%ymm10,%ymm10
+ vpsrld $25,%ymm5,%ymm5
+ vpor %ymm10,%ymm5,%ymm5
+
+ # x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+ vpshufd $0x39,%ymm1,%ymm1
+ vpshufd $0x39,%ymm5,%ymm5
+ # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vpshufd $0x4e,%ymm2,%ymm2
+ vpshufd $0x4e,%ymm6,%ymm6
+ # x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+ vpshufd $0x93,%ymm3,%ymm3
+ vpshufd $0x93,%ymm7,%ymm7
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vpshufb %ymm9,%ymm3,%ymm3
+
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpxor %ymm4,%ymm7,%ymm7
+ vpshufb %ymm9,%ymm7,%ymm7
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vmovdqa %ymm1,%ymm10
+ vpslld $12,%ymm10,%ymm10
+ vpsrld $20,%ymm1,%ymm1
+ vpor %ymm10,%ymm1,%ymm1
+
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovdqa %ymm5,%ymm10
+ vpslld $12,%ymm10,%ymm10
+ vpsrld $20,%ymm5,%ymm5
+ vpor %ymm10,%ymm5,%ymm5
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxor %ymm0,%ymm3,%ymm3
+ vpshufb %ymm8,%ymm3,%ymm3
+
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpxor %ymm4,%ymm7,%ymm7
+ vpshufb %ymm8,%ymm7,%ymm7
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxor %ymm2,%ymm1,%ymm1
+ vmovdqa %ymm1,%ymm10
+ vpslld $7,%ymm10,%ymm10
+ vpsrld $25,%ymm1,%ymm1
+ vpor %ymm10,%ymm1,%ymm1
+
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxor %ymm6,%ymm5,%ymm5
+ vmovdqa %ymm5,%ymm10
+ vpslld $7,%ymm10,%ymm10
+ vpsrld $25,%ymm5,%ymm5
+ vpor %ymm10,%ymm5,%ymm5
+
+ # x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+ vpshufd $0x93,%ymm1,%ymm1
+ vpshufd $0x93,%ymm5,%ymm5
+ # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vpshufd $0x4e,%ymm2,%ymm2
+ vpshufd $0x4e,%ymm6,%ymm6
+ # x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+ vpshufd $0x39,%ymm3,%ymm3
+ vpshufd $0x39,%ymm7,%ymm7
+
+ sub $2,%r8d
+ jnz .Ldoubleround4
+
+ # o0 = i0 ^ (x0 + s0), first block
+ vpaddd %ymm11,%ymm0,%ymm10
+ cmp $0x10,%rax
+ jl .Lxorpart4
+ vpxor 0x00(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x00(%rsi)
+ vextracti128 $1,%ymm10,%xmm0
+ # o1 = i1 ^ (x1 + s1), first block
+ vpaddd %ymm12,%ymm1,%ymm10
+ cmp $0x20,%rax
+ jl .Lxorpart4
+ vpxor 0x10(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x10(%rsi)
+ vextracti128 $1,%ymm10,%xmm1
+ # o2 = i2 ^ (x2 + s2), first block
+ vpaddd %ymm13,%ymm2,%ymm10
+ cmp $0x30,%rax
+ jl .Lxorpart4
+ vpxor 0x20(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x20(%rsi)
+ vextracti128 $1,%ymm10,%xmm2
+ # o3 = i3 ^ (x3 + s3), first block
+ vpaddd %ymm14,%ymm3,%ymm10
+ cmp $0x40,%rax
+ jl .Lxorpart4
+ vpxor 0x30(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x30(%rsi)
+ vextracti128 $1,%ymm10,%xmm3
+
+ # xor and write second block
+ vmovdqa %xmm0,%xmm10
+ cmp $0x50,%rax
+ jl .Lxorpart4
+ vpxor 0x40(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x40(%rsi)
+
+ vmovdqa %xmm1,%xmm10
+ cmp $0x60,%rax
+ jl .Lxorpart4
+ vpxor 0x50(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x50(%rsi)
+
+ vmovdqa %xmm2,%xmm10
+ cmp $0x70,%rax
+ jl .Lxorpart4
+ vpxor 0x60(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x60(%rsi)
+
+ vmovdqa %xmm3,%xmm10
+ cmp $0x80,%rax
+ jl .Lxorpart4
+ vpxor 0x70(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x70(%rsi)
+
+ # o0 = i0 ^ (x0 + s0), third block
+ vpaddd %ymm11,%ymm4,%ymm10
+ cmp $0x90,%rax
+ jl .Lxorpart4
+ vpxor 0x80(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x80(%rsi)
+ vextracti128 $1,%ymm10,%xmm4
+ # o1 = i1 ^ (x1 + s1), third block
+ vpaddd %ymm12,%ymm5,%ymm10
+ cmp $0xa0,%rax
+ jl .Lxorpart4
+ vpxor 0x90(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x90(%rsi)
+ vextracti128 $1,%ymm10,%xmm5
+ # o2 = i2 ^ (x2 + s2), third block
+ vpaddd %ymm13,%ymm6,%ymm10
+ cmp $0xb0,%rax
+ jl .Lxorpart4
+ vpxor 0xa0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xa0(%rsi)
+ vextracti128 $1,%ymm10,%xmm6
+ # o3 = i3 ^ (x3 + s3), third block
+ vpaddd %ymm15,%ymm7,%ymm10
+ cmp $0xc0,%rax
+ jl .Lxorpart4
+ vpxor 0xb0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xb0(%rsi)
+ vextracti128 $1,%ymm10,%xmm7
+
+ # xor and write fourth block
+ vmovdqa %xmm4,%xmm10
+ cmp $0xd0,%rax
+ jl .Lxorpart4
+ vpxor 0xc0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xc0(%rsi)
+
+ vmovdqa %xmm5,%xmm10
+ cmp $0xe0,%rax
+ jl .Lxorpart4
+ vpxor 0xd0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xd0(%rsi)
+
+ vmovdqa %xmm6,%xmm10
+ cmp $0xf0,%rax
+ jl .Lxorpart4
+ vpxor 0xe0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xe0(%rsi)
+
+ vmovdqa %xmm7,%xmm10
+ cmp $0x100,%rax
+ jl .Lxorpart4
+ vpxor 0xf0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xf0(%rsi)
+
+.Ldone4:
+ vzeroupper
+ ret
+
+.Lxorpart4:
+ # xor remaining bytes from partial register into output
+ mov %rax,%r9
+ and $0x0f,%r9
+ jz .Ldone4
+ and $~0x0f,%rax
+
+ mov %rsi,%r11
+
+ lea 8(%rsp),%r10
+ sub $0x10,%rsp
+ and $~31,%rsp
+
+ lea (%rdx,%rax),%rsi
+ mov %rsp,%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ vpxor 0x00(%rsp),%xmm10,%xmm10
+ vmovdqa %xmm10,0x00(%rsp)
+
+ mov %rsp,%rsi
+ lea (%r11,%rax),%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ lea -8(%r10),%rsp
+ jmp .Ldone4
+
+ENDPROC(chacha_4block_xor_avx2)
+
+ENTRY(chacha_8block_xor_avx2)
+ # %rdi: Input state matrix, s
+ # %rsi: up to 8 data blocks output, o
+ # %rdx: up to 8 data blocks input, i
+ # %rcx: input/output length in bytes
+ # %r8d: nrounds
+
+ # This function encrypts eight consecutive ChaCha blocks by loading
+ # the state matrix in AVX registers eight times. As we need some
+ # scratch registers, we save the first four registers on the stack. The
+ # algorithm performs each operation on the corresponding word of each
+ # state matrix, hence requires no word shuffling. For final XORing step
+ # we transpose the matrix by interleaving 32-, 64- and then 128-bit
+ # words, which allows us to do XOR in AVX registers. 8/16-bit word
+ # rotation is done with the slightly better performing byte shuffling,
+ # 7/12-bit word rotation uses traditional shift+OR.
+
+ vzeroupper
+ # 4 * 32 byte stack, 32-byte aligned
+ lea 8(%rsp),%r10
+ and $~31, %rsp
+ sub $0x80, %rsp
+ mov %rcx,%rax
+
+ # x0..15[0-7] = s[0..15]
+ vpbroadcastd 0x00(%rdi),%ymm0
+ vpbroadcastd 0x04(%rdi),%ymm1
+ vpbroadcastd 0x08(%rdi),%ymm2
+ vpbroadcastd 0x0c(%rdi),%ymm3
+ vpbroadcastd 0x10(%rdi),%ymm4
+ vpbroadcastd 0x14(%rdi),%ymm5
+ vpbroadcastd 0x18(%rdi),%ymm6
+ vpbroadcastd 0x1c(%rdi),%ymm7
+ vpbroadcastd 0x20(%rdi),%ymm8
+ vpbroadcastd 0x24(%rdi),%ymm9
+ vpbroadcastd 0x28(%rdi),%ymm10
+ vpbroadcastd 0x2c(%rdi),%ymm11
+ vpbroadcastd 0x30(%rdi),%ymm12
+ vpbroadcastd 0x34(%rdi),%ymm13
+ vpbroadcastd 0x38(%rdi),%ymm14
+ vpbroadcastd 0x3c(%rdi),%ymm15
+ # x0..3 on stack
+ vmovdqa %ymm0,0x00(%rsp)
+ vmovdqa %ymm1,0x20(%rsp)
+ vmovdqa %ymm2,0x40(%rsp)
+ vmovdqa %ymm3,0x60(%rsp)
+
+ vmovdqa CTRINC(%rip),%ymm1
+ vmovdqa ROT8(%rip),%ymm2
+ vmovdqa ROT16(%rip),%ymm3
+
+ # x12 += counter values 0-3
+ vpaddd %ymm1,%ymm12,%ymm12
+
+.Ldoubleround8:
+ # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+ vpaddd 0x00(%rsp),%ymm4,%ymm0
+ vmovdqa %ymm0,0x00(%rsp)
+ vpxor %ymm0,%ymm12,%ymm12
+ vpshufb %ymm3,%ymm12,%ymm12
+ # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+ vpaddd 0x20(%rsp),%ymm5,%ymm0
+ vmovdqa %ymm0,0x20(%rsp)
+ vpxor %ymm0,%ymm13,%ymm13
+ vpshufb %ymm3,%ymm13,%ymm13
+ # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+ vpaddd 0x40(%rsp),%ymm6,%ymm0
+ vmovdqa %ymm0,0x40(%rsp)
+ vpxor %ymm0,%ymm14,%ymm14
+ vpshufb %ymm3,%ymm14,%ymm14
+ # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+ vpaddd 0x60(%rsp),%ymm7,%ymm0
+ vmovdqa %ymm0,0x60(%rsp)
+ vpxor %ymm0,%ymm15,%ymm15
+ vpshufb %ymm3,%ymm15,%ymm15
+
+ # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+ vpaddd %ymm12,%ymm8,%ymm8
+ vpxor %ymm8,%ymm4,%ymm4
+ vpslld $12,%ymm4,%ymm0
+ vpsrld $20,%ymm4,%ymm4
+ vpor %ymm0,%ymm4,%ymm4
+ # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+ vpaddd %ymm13,%ymm9,%ymm9
+ vpxor %ymm9,%ymm5,%ymm5
+ vpslld $12,%ymm5,%ymm0
+ vpsrld $20,%ymm5,%ymm5
+ vpor %ymm0,%ymm5,%ymm5
+ # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+ vpaddd %ymm14,%ymm10,%ymm10
+ vpxor %ymm10,%ymm6,%ymm6
+ vpslld $12,%ymm6,%ymm0
+ vpsrld $20,%ymm6,%ymm6
+ vpor %ymm0,%ymm6,%ymm6
+ # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+ vpaddd %ymm15,%ymm11,%ymm11
+ vpxor %ymm11,%ymm7,%ymm7
+ vpslld $12,%ymm7,%ymm0
+ vpsrld $20,%ymm7,%ymm7
+ vpor %ymm0,%ymm7,%ymm7
+
+ # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+ vpaddd 0x00(%rsp),%ymm4,%ymm0
+ vmovdqa %ymm0,0x00(%rsp)
+ vpxor %ymm0,%ymm12,%ymm12
+ vpshufb %ymm2,%ymm12,%ymm12
+ # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+ vpaddd 0x20(%rsp),%ymm5,%ymm0
+ vmovdqa %ymm0,0x20(%rsp)
+ vpxor %ymm0,%ymm13,%ymm13
+ vpshufb %ymm2,%ymm13,%ymm13
+ # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+ vpaddd 0x40(%rsp),%ymm6,%ymm0
+ vmovdqa %ymm0,0x40(%rsp)
+ vpxor %ymm0,%ymm14,%ymm14
+ vpshufb %ymm2,%ymm14,%ymm14
+ # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+ vpaddd 0x60(%rsp),%ymm7,%ymm0
+ vmovdqa %ymm0,0x60(%rsp)
+ vpxor %ymm0,%ymm15,%ymm15
+ vpshufb %ymm2,%ymm15,%ymm15
+
+ # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+ vpaddd %ymm12,%ymm8,%ymm8
+ vpxor %ymm8,%ymm4,%ymm4
+ vpslld $7,%ymm4,%ymm0
+ vpsrld $25,%ymm4,%ymm4
+ vpor %ymm0,%ymm4,%ymm4
+ # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+ vpaddd %ymm13,%ymm9,%ymm9
+ vpxor %ymm9,%ymm5,%ymm5
+ vpslld $7,%ymm5,%ymm0
+ vpsrld $25,%ymm5,%ymm5
+ vpor %ymm0,%ymm5,%ymm5
+ # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+ vpaddd %ymm14,%ymm10,%ymm10
+ vpxor %ymm10,%ymm6,%ymm6
+ vpslld $7,%ymm6,%ymm0
+ vpsrld $25,%ymm6,%ymm6
+ vpor %ymm0,%ymm6,%ymm6
+ # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+ vpaddd %ymm15,%ymm11,%ymm11
+ vpxor %ymm11,%ymm7,%ymm7
+ vpslld $7,%ymm7,%ymm0
+ vpsrld $25,%ymm7,%ymm7
+ vpor %ymm0,%ymm7,%ymm7
+
+ # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+ vpaddd 0x00(%rsp),%ymm5,%ymm0
+ vmovdqa %ymm0,0x00(%rsp)
+ vpxor %ymm0,%ymm15,%ymm15
+ vpshufb %ymm3,%ymm15,%ymm15
+ # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
+ vpaddd 0x20(%rsp),%ymm6,%ymm0
+ vmovdqa %ymm0,0x20(%rsp)
+ vpxor %ymm0,%ymm12,%ymm12
+ vpshufb %ymm3,%ymm12,%ymm12
+ # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+ vpaddd 0x40(%rsp),%ymm7,%ymm0
+ vmovdqa %ymm0,0x40(%rsp)
+ vpxor %ymm0,%ymm13,%ymm13
+ vpshufb %ymm3,%ymm13,%ymm13
+ # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+ vpaddd 0x60(%rsp),%ymm4,%ymm0
+ vmovdqa %ymm0,0x60(%rsp)
+ vpxor %ymm0,%ymm14,%ymm14
+ vpshufb %ymm3,%ymm14,%ymm14
+
+ # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+ vpaddd %ymm15,%ymm10,%ymm10
+ vpxor %ymm10,%ymm5,%ymm5
+ vpslld $12,%ymm5,%ymm0
+ vpsrld $20,%ymm5,%ymm5
+ vpor %ymm0,%ymm5,%ymm5
+ # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+ vpaddd %ymm12,%ymm11,%ymm11
+ vpxor %ymm11,%ymm6,%ymm6
+ vpslld $12,%ymm6,%ymm0
+ vpsrld $20,%ymm6,%ymm6
+ vpor %ymm0,%ymm6,%ymm6
+ # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+ vpaddd %ymm13,%ymm8,%ymm8
+ vpxor %ymm8,%ymm7,%ymm7
+ vpslld $12,%ymm7,%ymm0
+ vpsrld $20,%ymm7,%ymm7
+ vpor %ymm0,%ymm7,%ymm7
+ # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+ vpaddd %ymm14,%ymm9,%ymm9
+ vpxor %ymm9,%ymm4,%ymm4
+ vpslld $12,%ymm4,%ymm0
+ vpsrld $20,%ymm4,%ymm4
+ vpor %ymm0,%ymm4,%ymm4
+
+ # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+ vpaddd 0x00(%rsp),%ymm5,%ymm0
+ vmovdqa %ymm0,0x00(%rsp)
+ vpxor %ymm0,%ymm15,%ymm15
+ vpshufb %ymm2,%ymm15,%ymm15
+ # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+ vpaddd 0x20(%rsp),%ymm6,%ymm0
+ vmovdqa %ymm0,0x20(%rsp)
+ vpxor %ymm0,%ymm12,%ymm12
+ vpshufb %ymm2,%ymm12,%ymm12
+ # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+ vpaddd 0x40(%rsp),%ymm7,%ymm0
+ vmovdqa %ymm0,0x40(%rsp)
+ vpxor %ymm0,%ymm13,%ymm13
+ vpshufb %ymm2,%ymm13,%ymm13
+ # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+ vpaddd 0x60(%rsp),%ymm4,%ymm0
+ vmovdqa %ymm0,0x60(%rsp)
+ vpxor %ymm0,%ymm14,%ymm14
+ vpshufb %ymm2,%ymm14,%ymm14
+
+ # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+ vpaddd %ymm15,%ymm10,%ymm10
+ vpxor %ymm10,%ymm5,%ymm5
+ vpslld $7,%ymm5,%ymm0
+ vpsrld $25,%ymm5,%ymm5
+ vpor %ymm0,%ymm5,%ymm5
+ # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+ vpaddd %ymm12,%ymm11,%ymm11
+ vpxor %ymm11,%ymm6,%ymm6
+ vpslld $7,%ymm6,%ymm0
+ vpsrld $25,%ymm6,%ymm6
+ vpor %ymm0,%ymm6,%ymm6
+ # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+ vpaddd %ymm13,%ymm8,%ymm8
+ vpxor %ymm8,%ymm7,%ymm7
+ vpslld $7,%ymm7,%ymm0
+ vpsrld $25,%ymm7,%ymm7
+ vpor %ymm0,%ymm7,%ymm7
+ # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+ vpaddd %ymm14,%ymm9,%ymm9
+ vpxor %ymm9,%ymm4,%ymm4
+ vpslld $7,%ymm4,%ymm0
+ vpsrld $25,%ymm4,%ymm4
+ vpor %ymm0,%ymm4,%ymm4
+
+ sub $2,%r8d
+ jnz .Ldoubleround8
+
+ # x0..15[0-3] += s[0..15]
+ vpbroadcastd 0x00(%rdi),%ymm0
+ vpaddd 0x00(%rsp),%ymm0,%ymm0
+ vmovdqa %ymm0,0x00(%rsp)
+ vpbroadcastd 0x04(%rdi),%ymm0
+ vpaddd 0x20(%rsp),%ymm0,%ymm0
+ vmovdqa %ymm0,0x20(%rsp)
+ vpbroadcastd 0x08(%rdi),%ymm0
+ vpaddd 0x40(%rsp),%ymm0,%ymm0
+ vmovdqa %ymm0,0x40(%rsp)
+ vpbroadcastd 0x0c(%rdi),%ymm0
+ vpaddd 0x60(%rsp),%ymm0,%ymm0
+ vmovdqa %ymm0,0x60(%rsp)
+ vpbroadcastd 0x10(%rdi),%ymm0
+ vpaddd %ymm0,%ymm4,%ymm4
+ vpbroadcastd 0x14(%rdi),%ymm0
+ vpaddd %ymm0,%ymm5,%ymm5
+ vpbroadcastd 0x18(%rdi),%ymm0
+ vpaddd %ymm0,%ymm6,%ymm6
+ vpbroadcastd 0x1c(%rdi),%ymm0
+ vpaddd %ymm0,%ymm7,%ymm7
+ vpbroadcastd 0x20(%rdi),%ymm0
+ vpaddd %ymm0,%ymm8,%ymm8
+ vpbroadcastd 0x24(%rdi),%ymm0
+ vpaddd %ymm0,%ymm9,%ymm9
+ vpbroadcastd 0x28(%rdi),%ymm0
+ vpaddd %ymm0,%ymm10,%ymm10
+ vpbroadcastd 0x2c(%rdi),%ymm0
+ vpaddd %ymm0,%ymm11,%ymm11
+ vpbroadcastd 0x30(%rdi),%ymm0
+ vpaddd %ymm0,%ymm12,%ymm12
+ vpbroadcastd 0x34(%rdi),%ymm0
+ vpaddd %ymm0,%ymm13,%ymm13
+ vpbroadcastd 0x38(%rdi),%ymm0
+ vpaddd %ymm0,%ymm14,%ymm14
+ vpbroadcastd 0x3c(%rdi),%ymm0
+ vpaddd %ymm0,%ymm15,%ymm15
+
+ # x12 += counter values 0-3
+ vpaddd %ymm1,%ymm12,%ymm12
+
+ # interleave 32-bit words in state n, n+1
+ vmovdqa 0x00(%rsp),%ymm0
+ vmovdqa 0x20(%rsp),%ymm1
+ vpunpckldq %ymm1,%ymm0,%ymm2
+ vpunpckhdq %ymm1,%ymm0,%ymm1
+ vmovdqa %ymm2,0x00(%rsp)
+ vmovdqa %ymm1,0x20(%rsp)
+ vmovdqa 0x40(%rsp),%ymm0
+ vmovdqa 0x60(%rsp),%ymm1
+ vpunpckldq %ymm1,%ymm0,%ymm2
+ vpunpckhdq %ymm1,%ymm0,%ymm1
+ vmovdqa %ymm2,0x40(%rsp)
+ vmovdqa %ymm1,0x60(%rsp)
+ vmovdqa %ymm4,%ymm0
+ vpunpckldq %ymm5,%ymm0,%ymm4
+ vpunpckhdq %ymm5,%ymm0,%ymm5
+ vmovdqa %ymm6,%ymm0
+ vpunpckldq %ymm7,%ymm0,%ymm6
+ vpunpckhdq %ymm7,%ymm0,%ymm7
+ vmovdqa %ymm8,%ymm0
+ vpunpckldq %ymm9,%ymm0,%ymm8
+ vpunpckhdq %ymm9,%ymm0,%ymm9
+ vmovdqa %ymm10,%ymm0
+ vpunpckldq %ymm11,%ymm0,%ymm10
+ vpunpckhdq %ymm11,%ymm0,%ymm11
+ vmovdqa %ymm12,%ymm0
+ vpunpckldq %ymm13,%ymm0,%ymm12
+ vpunpckhdq %ymm13,%ymm0,%ymm13
+ vmovdqa %ymm14,%ymm0
+ vpunpckldq %ymm15,%ymm0,%ymm14
+ vpunpckhdq %ymm15,%ymm0,%ymm15
+
+ # interleave 64-bit words in state n, n+2
+ vmovdqa 0x00(%rsp),%ymm0
+ vmovdqa 0x40(%rsp),%ymm2
+ vpunpcklqdq %ymm2,%ymm0,%ymm1
+ vpunpckhqdq %ymm2,%ymm0,%ymm2
+ vmovdqa %ymm1,0x00(%rsp)
+ vmovdqa %ymm2,0x40(%rsp)
+ vmovdqa 0x20(%rsp),%ymm0
+ vmovdqa 0x60(%rsp),%ymm2
+ vpunpcklqdq %ymm2,%ymm0,%ymm1
+ vpunpckhqdq %ymm2,%ymm0,%ymm2
+ vmovdqa %ymm1,0x20(%rsp)
+ vmovdqa %ymm2,0x60(%rsp)
+ vmovdqa %ymm4,%ymm0
+ vpunpcklqdq %ymm6,%ymm0,%ymm4
+ vpunpckhqdq %ymm6,%ymm0,%ymm6
+ vmovdqa %ymm5,%ymm0
+ vpunpcklqdq %ymm7,%ymm0,%ymm5
+ vpunpckhqdq %ymm7,%ymm0,%ymm7
+ vmovdqa %ymm8,%ymm0
+ vpunpcklqdq %ymm10,%ymm0,%ymm8
+ vpunpckhqdq %ymm10,%ymm0,%ymm10
+ vmovdqa %ymm9,%ymm0
+ vpunpcklqdq %ymm11,%ymm0,%ymm9
+ vpunpckhqdq %ymm11,%ymm0,%ymm11
+ vmovdqa %ymm12,%ymm0
+ vpunpcklqdq %ymm14,%ymm0,%ymm12
+ vpunpckhqdq %ymm14,%ymm0,%ymm14
+ vmovdqa %ymm13,%ymm0
+ vpunpcklqdq %ymm15,%ymm0,%ymm13
+ vpunpckhqdq %ymm15,%ymm0,%ymm15
+
+ # interleave 128-bit words in state n, n+4
+ # xor/write first four blocks
+ vmovdqa 0x00(%rsp),%ymm1
+ vperm2i128 $0x20,%ymm4,%ymm1,%ymm0
+ cmp $0x0020,%rax
+ jl .Lxorpart8
+ vpxor 0x0000(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0000(%rsi)
+ vperm2i128 $0x31,%ymm4,%ymm1,%ymm4
+
+ vperm2i128 $0x20,%ymm12,%ymm8,%ymm0
+ cmp $0x0040,%rax
+ jl .Lxorpart8
+ vpxor 0x0020(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0020(%rsi)
+ vperm2i128 $0x31,%ymm12,%ymm8,%ymm12
+
+ vmovdqa 0x40(%rsp),%ymm1
+ vperm2i128 $0x20,%ymm6,%ymm1,%ymm0
+ cmp $0x0060,%rax
+ jl .Lxorpart8
+ vpxor 0x0040(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0040(%rsi)
+ vperm2i128 $0x31,%ymm6,%ymm1,%ymm6
+
+ vperm2i128 $0x20,%ymm14,%ymm10,%ymm0
+ cmp $0x0080,%rax
+ jl .Lxorpart8
+ vpxor 0x0060(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0060(%rsi)
+ vperm2i128 $0x31,%ymm14,%ymm10,%ymm14
+
+ vmovdqa 0x20(%rsp),%ymm1
+ vperm2i128 $0x20,%ymm5,%ymm1,%ymm0
+ cmp $0x00a0,%rax
+ jl .Lxorpart8
+ vpxor 0x0080(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0080(%rsi)
+ vperm2i128 $0x31,%ymm5,%ymm1,%ymm5
+
+ vperm2i128 $0x20,%ymm13,%ymm9,%ymm0
+ cmp $0x00c0,%rax
+ jl .Lxorpart8
+ vpxor 0x00a0(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x00a0(%rsi)
+ vperm2i128 $0x31,%ymm13,%ymm9,%ymm13
+
+ vmovdqa 0x60(%rsp),%ymm1
+ vperm2i128 $0x20,%ymm7,%ymm1,%ymm0
+ cmp $0x00e0,%rax
+ jl .Lxorpart8
+ vpxor 0x00c0(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x00c0(%rsi)
+ vperm2i128 $0x31,%ymm7,%ymm1,%ymm7
+
+ vperm2i128 $0x20,%ymm15,%ymm11,%ymm0
+ cmp $0x0100,%rax
+ jl .Lxorpart8
+ vpxor 0x00e0(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x00e0(%rsi)
+ vperm2i128 $0x31,%ymm15,%ymm11,%ymm15
+
+ # xor remaining blocks, write to output
+ vmovdqa %ymm4,%ymm0
+ cmp $0x0120,%rax
+ jl .Lxorpart8
+ vpxor 0x0100(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0100(%rsi)
+
+ vmovdqa %ymm12,%ymm0
+ cmp $0x0140,%rax
+ jl .Lxorpart8
+ vpxor 0x0120(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0120(%rsi)
+
+ vmovdqa %ymm6,%ymm0
+ cmp $0x0160,%rax
+ jl .Lxorpart8
+ vpxor 0x0140(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0140(%rsi)
+
+ vmovdqa %ymm14,%ymm0
+ cmp $0x0180,%rax
+ jl .Lxorpart8
+ vpxor 0x0160(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0160(%rsi)
+
+ vmovdqa %ymm5,%ymm0
+ cmp $0x01a0,%rax
+ jl .Lxorpart8
+ vpxor 0x0180(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x0180(%rsi)
+
+ vmovdqa %ymm13,%ymm0
+ cmp $0x01c0,%rax
+ jl .Lxorpart8
+ vpxor 0x01a0(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x01a0(%rsi)
+
+ vmovdqa %ymm7,%ymm0
+ cmp $0x01e0,%rax
+ jl .Lxorpart8
+ vpxor 0x01c0(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x01c0(%rsi)
+
+ vmovdqa %ymm15,%ymm0
+ cmp $0x0200,%rax
+ jl .Lxorpart8
+ vpxor 0x01e0(%rdx),%ymm0,%ymm0
+ vmovdqu %ymm0,0x01e0(%rsi)
+
+.Ldone8:
+ vzeroupper
+ lea -8(%r10),%rsp
+ ret
+
+.Lxorpart8:
+ # xor remaining bytes from partial register into output
+ mov %rax,%r9
+ and $0x1f,%r9
+ jz .Ldone8
+ and $~0x1f,%rax
+
+ mov %rsi,%r11
+
+ lea (%rdx,%rax),%rsi
+ mov %rsp,%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ vpxor 0x00(%rsp),%ymm0,%ymm0
+ vmovdqa %ymm0,0x00(%rsp)
+
+ mov %rsp,%rsi
+ lea (%r11,%rax),%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ jmp .Ldone8
+
+ENDPROC(chacha_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S
new file mode 100644
index 000000000000..848f9c75fd4f
--- /dev/null
+++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S
@@ -0,0 +1,836 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * ChaCha 256-bit cipher algorithm, x64 AVX-512VL functions
+ *
+ * Copyright (C) 2018 Martin Willi
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.CTR2BL, "aM", @progbits, 32
+.align 32
+CTR2BL: .octa 0x00000000000000000000000000000000
+ .octa 0x00000000000000000000000000000001
+
+.section .rodata.cst32.CTR4BL, "aM", @progbits, 32
+.align 32
+CTR4BL: .octa 0x00000000000000000000000000000002
+ .octa 0x00000000000000000000000000000003
+
+.section .rodata.cst32.CTR8BL, "aM", @progbits, 32
+.align 32
+CTR8BL: .octa 0x00000003000000020000000100000000
+ .octa 0x00000007000000060000000500000004
+
+.text
+
+ENTRY(chacha_2block_xor_avx512vl)
+ # %rdi: Input state matrix, s
+ # %rsi: up to 2 data blocks output, o
+ # %rdx: up to 2 data blocks input, i
+ # %rcx: input/output length in bytes
+ # %r8d: nrounds
+
+ # This function encrypts two ChaCha blocks by loading the state
+ # matrix twice across four AVX registers. It performs matrix operations
+ # on four words in each matrix in parallel, but requires shuffling to
+ # rearrange the words after each round.
+
+ vzeroupper
+
+ # x0..3[0-2] = s0..3
+ vbroadcasti128 0x00(%rdi),%ymm0
+ vbroadcasti128 0x10(%rdi),%ymm1
+ vbroadcasti128 0x20(%rdi),%ymm2
+ vbroadcasti128 0x30(%rdi),%ymm3
+
+ vpaddd CTR2BL(%rip),%ymm3,%ymm3
+
+ vmovdqa %ymm0,%ymm8
+ vmovdqa %ymm1,%ymm9
+ vmovdqa %ymm2,%ymm10
+ vmovdqa %ymm3,%ymm11
+
+.Ldoubleround:
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxord %ymm0,%ymm3,%ymm3
+ vprold $16,%ymm3,%ymm3
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxord %ymm2,%ymm1,%ymm1
+ vprold $12,%ymm1,%ymm1
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxord %ymm0,%ymm3,%ymm3
+ vprold $8,%ymm3,%ymm3
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxord %ymm2,%ymm1,%ymm1
+ vprold $7,%ymm1,%ymm1
+
+ # x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+ vpshufd $0x39,%ymm1,%ymm1
+ # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vpshufd $0x4e,%ymm2,%ymm2
+ # x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+ vpshufd $0x93,%ymm3,%ymm3
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxord %ymm0,%ymm3,%ymm3
+ vprold $16,%ymm3,%ymm3
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxord %ymm2,%ymm1,%ymm1
+ vprold $12,%ymm1,%ymm1
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxord %ymm0,%ymm3,%ymm3
+ vprold $8,%ymm3,%ymm3
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxord %ymm2,%ymm1,%ymm1
+ vprold $7,%ymm1,%ymm1
+
+ # x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+ vpshufd $0x93,%ymm1,%ymm1
+ # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vpshufd $0x4e,%ymm2,%ymm2
+ # x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+ vpshufd $0x39,%ymm3,%ymm3
+
+ sub $2,%r8d
+ jnz .Ldoubleround
+
+ # o0 = i0 ^ (x0 + s0)
+ vpaddd %ymm8,%ymm0,%ymm7
+ cmp $0x10,%rcx
+ jl .Lxorpart2
+ vpxord 0x00(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x00(%rsi)
+ vextracti128 $1,%ymm7,%xmm0
+ # o1 = i1 ^ (x1 + s1)
+ vpaddd %ymm9,%ymm1,%ymm7
+ cmp $0x20,%rcx
+ jl .Lxorpart2
+ vpxord 0x10(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x10(%rsi)
+ vextracti128 $1,%ymm7,%xmm1
+ # o2 = i2 ^ (x2 + s2)
+ vpaddd %ymm10,%ymm2,%ymm7
+ cmp $0x30,%rcx
+ jl .Lxorpart2
+ vpxord 0x20(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x20(%rsi)
+ vextracti128 $1,%ymm7,%xmm2
+ # o3 = i3 ^ (x3 + s3)
+ vpaddd %ymm11,%ymm3,%ymm7
+ cmp $0x40,%rcx
+ jl .Lxorpart2
+ vpxord 0x30(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x30(%rsi)
+ vextracti128 $1,%ymm7,%xmm3
+
+ # xor and write second block
+ vmovdqa %xmm0,%xmm7
+ cmp $0x50,%rcx
+ jl .Lxorpart2
+ vpxord 0x40(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x40(%rsi)
+
+ vmovdqa %xmm1,%xmm7
+ cmp $0x60,%rcx
+ jl .Lxorpart2
+ vpxord 0x50(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x50(%rsi)
+
+ vmovdqa %xmm2,%xmm7
+ cmp $0x70,%rcx
+ jl .Lxorpart2
+ vpxord 0x60(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x60(%rsi)
+
+ vmovdqa %xmm3,%xmm7
+ cmp $0x80,%rcx
+ jl .Lxorpart2
+ vpxord 0x70(%rdx),%xmm7,%xmm6
+ vmovdqu %xmm6,0x70(%rsi)
+
+.Ldone2:
+ vzeroupper
+ ret
+
+.Lxorpart2:
+ # xor remaining bytes from partial register into output
+ mov %rcx,%rax
+ and $0xf,%rcx
+ jz .Ldone8
+ mov %rax,%r9
+ and $~0xf,%r9
+
+ mov $1,%rax
+ shld %cl,%rax,%rax
+ sub $1,%rax
+ kmovq %rax,%k1
+
+ vmovdqu8 (%rdx,%r9),%xmm1{%k1}{z}
+ vpxord %xmm7,%xmm1,%xmm1
+ vmovdqu8 %xmm1,(%rsi,%r9){%k1}
+
+ jmp .Ldone2
+
+ENDPROC(chacha_2block_xor_avx512vl)
+
+ENTRY(chacha_4block_xor_avx512vl)
+ # %rdi: Input state matrix, s
+ # %rsi: up to 4 data blocks output, o
+ # %rdx: up to 4 data blocks input, i
+ # %rcx: input/output length in bytes
+ # %r8d: nrounds
+
+ # This function encrypts four ChaCha blocks by loading the state
+ # matrix four times across eight AVX registers. It performs matrix
+ # operations on four words in two matrices in parallel, sequentially
+ # to the operations on the four words of the other two matrices. The
+ # required word shuffling has a rather high latency, we can do the
+ # arithmetic on two matrix-pairs without much slowdown.
+
+ vzeroupper
+
+ # x0..3[0-4] = s0..3
+ vbroadcasti128 0x00(%rdi),%ymm0
+ vbroadcasti128 0x10(%rdi),%ymm1
+ vbroadcasti128 0x20(%rdi),%ymm2
+ vbroadcasti128 0x30(%rdi),%ymm3
+
+ vmovdqa %ymm0,%ymm4
+ vmovdqa %ymm1,%ymm5
+ vmovdqa %ymm2,%ymm6
+ vmovdqa %ymm3,%ymm7
+
+ vpaddd CTR2BL(%rip),%ymm3,%ymm3
+ vpaddd CTR4BL(%rip),%ymm7,%ymm7
+
+ vmovdqa %ymm0,%ymm11
+ vmovdqa %ymm1,%ymm12
+ vmovdqa %ymm2,%ymm13
+ vmovdqa %ymm3,%ymm14
+ vmovdqa %ymm7,%ymm15
+
+.Ldoubleround4:
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxord %ymm0,%ymm3,%ymm3
+ vprold $16,%ymm3,%ymm3
+
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpxord %ymm4,%ymm7,%ymm7
+ vprold $16,%ymm7,%ymm7
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxord %ymm2,%ymm1,%ymm1
+ vprold $12,%ymm1,%ymm1
+
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxord %ymm6,%ymm5,%ymm5
+ vprold $12,%ymm5,%ymm5
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxord %ymm0,%ymm3,%ymm3
+ vprold $8,%ymm3,%ymm3
+
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpxord %ymm4,%ymm7,%ymm7
+ vprold $8,%ymm7,%ymm7
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxord %ymm2,%ymm1,%ymm1
+ vprold $7,%ymm1,%ymm1
+
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxord %ymm6,%ymm5,%ymm5
+ vprold $7,%ymm5,%ymm5
+
+ # x1 = shuffle32(x1, MASK(0, 3, 2, 1))
+ vpshufd $0x39,%ymm1,%ymm1
+ vpshufd $0x39,%ymm5,%ymm5
+ # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vpshufd $0x4e,%ymm2,%ymm2
+ vpshufd $0x4e,%ymm6,%ymm6
+ # x3 = shuffle32(x3, MASK(2, 1, 0, 3))
+ vpshufd $0x93,%ymm3,%ymm3
+ vpshufd $0x93,%ymm7,%ymm7
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 16)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxord %ymm0,%ymm3,%ymm3
+ vprold $16,%ymm3,%ymm3
+
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpxord %ymm4,%ymm7,%ymm7
+ vprold $16,%ymm7,%ymm7
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 12)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxord %ymm2,%ymm1,%ymm1
+ vprold $12,%ymm1,%ymm1
+
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxord %ymm6,%ymm5,%ymm5
+ vprold $12,%ymm5,%ymm5
+
+ # x0 += x1, x3 = rotl32(x3 ^ x0, 8)
+ vpaddd %ymm1,%ymm0,%ymm0
+ vpxord %ymm0,%ymm3,%ymm3
+ vprold $8,%ymm3,%ymm3
+
+ vpaddd %ymm5,%ymm4,%ymm4
+ vpxord %ymm4,%ymm7,%ymm7
+ vprold $8,%ymm7,%ymm7
+
+ # x2 += x3, x1 = rotl32(x1 ^ x2, 7)
+ vpaddd %ymm3,%ymm2,%ymm2
+ vpxord %ymm2,%ymm1,%ymm1
+ vprold $7,%ymm1,%ymm1
+
+ vpaddd %ymm7,%ymm6,%ymm6
+ vpxord %ymm6,%ymm5,%ymm5
+ vprold $7,%ymm5,%ymm5
+
+ # x1 = shuffle32(x1, MASK(2, 1, 0, 3))
+ vpshufd $0x93,%ymm1,%ymm1
+ vpshufd $0x93,%ymm5,%ymm5
+ # x2 = shuffle32(x2, MASK(1, 0, 3, 2))
+ vpshufd $0x4e,%ymm2,%ymm2
+ vpshufd $0x4e,%ymm6,%ymm6
+ # x3 = shuffle32(x3, MASK(0, 3, 2, 1))
+ vpshufd $0x39,%ymm3,%ymm3
+ vpshufd $0x39,%ymm7,%ymm7
+
+ sub $2,%r8d
+ jnz .Ldoubleround4
+
+ # o0 = i0 ^ (x0 + s0), first block
+ vpaddd %ymm11,%ymm0,%ymm10
+ cmp $0x10,%rcx
+ jl .Lxorpart4
+ vpxord 0x00(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x00(%rsi)
+ vextracti128 $1,%ymm10,%xmm0
+ # o1 = i1 ^ (x1 + s1), first block
+ vpaddd %ymm12,%ymm1,%ymm10
+ cmp $0x20,%rcx
+ jl .Lxorpart4
+ vpxord 0x10(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x10(%rsi)
+ vextracti128 $1,%ymm10,%xmm1
+ # o2 = i2 ^ (x2 + s2), first block
+ vpaddd %ymm13,%ymm2,%ymm10
+ cmp $0x30,%rcx
+ jl .Lxorpart4
+ vpxord 0x20(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x20(%rsi)
+ vextracti128 $1,%ymm10,%xmm2
+ # o3 = i3 ^ (x3 + s3), first block
+ vpaddd %ymm14,%ymm3,%ymm10
+ cmp $0x40,%rcx
+ jl .Lxorpart4
+ vpxord 0x30(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x30(%rsi)
+ vextracti128 $1,%ymm10,%xmm3
+
+ # xor and write second block
+ vmovdqa %xmm0,%xmm10
+ cmp $0x50,%rcx
+ jl .Lxorpart4
+ vpxord 0x40(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x40(%rsi)
+
+ vmovdqa %xmm1,%xmm10
+ cmp $0x60,%rcx
+ jl .Lxorpart4
+ vpxord 0x50(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x50(%rsi)
+
+ vmovdqa %xmm2,%xmm10
+ cmp $0x70,%rcx
+ jl .Lxorpart4
+ vpxord 0x60(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x60(%rsi)
+
+ vmovdqa %xmm3,%xmm10
+ cmp $0x80,%rcx
+ jl .Lxorpart4
+ vpxord 0x70(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x70(%rsi)
+
+ # o0 = i0 ^ (x0 + s0), third block
+ vpaddd %ymm11,%ymm4,%ymm10
+ cmp $0x90,%rcx
+ jl .Lxorpart4
+ vpxord 0x80(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x80(%rsi)
+ vextracti128 $1,%ymm10,%xmm4
+ # o1 = i1 ^ (x1 + s1), third block
+ vpaddd %ymm12,%ymm5,%ymm10
+ cmp $0xa0,%rcx
+ jl .Lxorpart4
+ vpxord 0x90(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0x90(%rsi)
+ vextracti128 $1,%ymm10,%xmm5
+ # o2 = i2 ^ (x2 + s2), third block
+ vpaddd %ymm13,%ymm6,%ymm10
+ cmp $0xb0,%rcx
+ jl .Lxorpart4
+ vpxord 0xa0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xa0(%rsi)
+ vextracti128 $1,%ymm10,%xmm6
+ # o3 = i3 ^ (x3 + s3), third block
+ vpaddd %ymm15,%ymm7,%ymm10
+ cmp $0xc0,%rcx
+ jl .Lxorpart4
+ vpxord 0xb0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xb0(%rsi)
+ vextracti128 $1,%ymm10,%xmm7
+
+ # xor and write fourth block
+ vmovdqa %xmm4,%xmm10
+ cmp $0xd0,%rcx
+ jl .Lxorpart4
+ vpxord 0xc0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xc0(%rsi)
+
+ vmovdqa %xmm5,%xmm10
+ cmp $0xe0,%rcx
+ jl .Lxorpart4
+ vpxord 0xd0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xd0(%rsi)
+
+ vmovdqa %xmm6,%xmm10
+ cmp $0xf0,%rcx
+ jl .Lxorpart4
+ vpxord 0xe0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xe0(%rsi)
+
+ vmovdqa %xmm7,%xmm10
+ cmp $0x100,%rcx
+ jl .Lxorpart4
+ vpxord 0xf0(%rdx),%xmm10,%xmm9
+ vmovdqu %xmm9,0xf0(%rsi)
+
+.Ldone4:
+ vzeroupper
+ ret
+
+.Lxorpart4:
+ # xor remaining bytes from partial register into output
+ mov %rcx,%rax
+ and $0xf,%rcx
+ jz .Ldone8
+ mov %rax,%r9
+ and $~0xf,%r9
+
+ mov $1,%rax
+ shld %cl,%rax,%rax
+ sub $1,%rax
+ kmovq %rax,%k1
+
+ vmovdqu8 (%rdx,%r9),%xmm1{%k1}{z}
+ vpxord %xmm10,%xmm1,%xmm1
+ vmovdqu8 %xmm1,(%rsi,%r9){%k1}
+
+ jmp .Ldone4
+
+ENDPROC(chacha_4block_xor_avx512vl)
+
+ENTRY(chacha_8block_xor_avx512vl)
+ # %rdi: Input state matrix, s
+ # %rsi: up to 8 data blocks output, o
+ # %rdx: up to 8 data blocks input, i
+ # %rcx: input/output length in bytes
+ # %r8d: nrounds
+
+ # This function encrypts eight consecutive ChaCha blocks by loading
+ # the state matrix in AVX registers eight times. Compared to AVX2, this
+ # mostly benefits from the new rotate instructions in VL and the
+ # additional registers.
+
+ vzeroupper
+
+ # x0..15[0-7] = s[0..15]
+ vpbroadcastd 0x00(%rdi),%ymm0
+ vpbroadcastd 0x04(%rdi),%ymm1
+ vpbroadcastd 0x08(%rdi),%ymm2
+ vpbroadcastd 0x0c(%rdi),%ymm3
+ vpbroadcastd 0x10(%rdi),%ymm4
+ vpbroadcastd 0x14(%rdi),%ymm5
+ vpbroadcastd 0x18(%rdi),%ymm6
+ vpbroadcastd 0x1c(%rdi),%ymm7
+ vpbroadcastd 0x20(%rdi),%ymm8
+ vpbroadcastd 0x24(%rdi),%ymm9
+ vpbroadcastd 0x28(%rdi),%ymm10
+ vpbroadcastd 0x2c(%rdi),%ymm11
+ vpbroadcastd 0x30(%rdi),%ymm12
+ vpbroadcastd 0x34(%rdi),%ymm13
+ vpbroadcastd 0x38(%rdi),%ymm14
+ vpbroadcastd 0x3c(%rdi),%ymm15
+
+ # x12 += counter values 0-3
+ vpaddd CTR8BL(%rip),%ymm12,%ymm12
+
+ vmovdqa64 %ymm0,%ymm16
+ vmovdqa64 %ymm1,%ymm17
+ vmovdqa64 %ymm2,%ymm18
+ vmovdqa64 %ymm3,%ymm19
+ vmovdqa64 %ymm4,%ymm20
+ vmovdqa64 %ymm5,%ymm21
+ vmovdqa64 %ymm6,%ymm22
+ vmovdqa64 %ymm7,%ymm23
+ vmovdqa64 %ymm8,%ymm24
+ vmovdqa64 %ymm9,%ymm25
+ vmovdqa64 %ymm10,%ymm26
+ vmovdqa64 %ymm11,%ymm27
+ vmovdqa64 %ymm12,%ymm28
+ vmovdqa64 %ymm13,%ymm29
+ vmovdqa64 %ymm14,%ymm30
+ vmovdqa64 %ymm15,%ymm31
+
+.Ldoubleround8:
+ # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
+ vpaddd %ymm0,%ymm4,%ymm0
+ vpxord %ymm0,%ymm12,%ymm12
+ vprold $16,%ymm12,%ymm12
+ # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
+ vpaddd %ymm1,%ymm5,%ymm1
+ vpxord %ymm1,%ymm13,%ymm13
+ vprold $16,%ymm13,%ymm13
+ # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
+ vpaddd %ymm2,%ymm6,%ymm2
+ vpxord %ymm2,%ymm14,%ymm14
+ vprold $16,%ymm14,%ymm14
+ # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
+ vpaddd %ymm3,%ymm7,%ymm3
+ vpxord %ymm3,%ymm15,%ymm15
+ vprold $16,%ymm15,%ymm15
+
+ # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
+ vpaddd %ymm12,%ymm8,%ymm8
+ vpxord %ymm8,%ymm4,%ymm4
+ vprold $12,%ymm4,%ymm4
+ # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
+ vpaddd %ymm13,%ymm9,%ymm9
+ vpxord %ymm9,%ymm5,%ymm5
+ vprold $12,%ymm5,%ymm5
+ # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
+ vpaddd %ymm14,%ymm10,%ymm10
+ vpxord %ymm10,%ymm6,%ymm6
+ vprold $12,%ymm6,%ymm6
+ # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
+ vpaddd %ymm15,%ymm11,%ymm11
+ vpxord %ymm11,%ymm7,%ymm7
+ vprold $12,%ymm7,%ymm7
+
+ # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
+ vpaddd %ymm0,%ymm4,%ymm0
+ vpxord %ymm0,%ymm12,%ymm12
+ vprold $8,%ymm12,%ymm12
+ # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
+ vpaddd %ymm1,%ymm5,%ymm1
+ vpxord %ymm1,%ymm13,%ymm13
+ vprold $8,%ymm13,%ymm13
+ # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
+ vpaddd %ymm2,%ymm6,%ymm2
+ vpxord %ymm2,%ymm14,%ymm14
+ vprold $8,%ymm14,%ymm14
+ # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
+ vpaddd %ymm3,%ymm7,%ymm3
+ vpxord %ymm3,%ymm15,%ymm15
+ vprold $8,%ymm15,%ymm15
+
+ # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
+ vpaddd %ymm12,%ymm8,%ymm8
+ vpxord %ymm8,%ymm4,%ymm4
+ vprold $7,%ymm4,%ymm4
+ # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
+ vpaddd %ymm13,%ymm9,%ymm9
+ vpxord %ymm9,%ymm5,%ymm5
+ vprold $7,%ymm5,%ymm5
+ # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
+ vpaddd %ymm14,%ymm10,%ymm10
+ vpxord %ymm10,%ymm6,%ymm6
+ vprold $7,%ymm6,%ymm6
+ # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
+ vpaddd %ymm15,%ymm11,%ymm11
+ vpxord %ymm11,%ymm7,%ymm7
+ vprold $7,%ymm7,%ymm7
+
+ # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
+ vpaddd %ymm0,%ymm5,%ymm0
+ vpxord %ymm0,%ymm15,%ymm15
+ vprold $16,%ymm15,%ymm15
+ # x1 += x6, x12 = rotl32(x12 ^ x1, 16)
+ vpaddd %ymm1,%ymm6,%ymm1
+ vpxord %ymm1,%ymm12,%ymm12
+ vprold $16,%ymm12,%ymm12
+ # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
+ vpaddd %ymm2,%ymm7,%ymm2
+ vpxord %ymm2,%ymm13,%ymm13
+ vprold $16,%ymm13,%ymm13
+ # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
+ vpaddd %ymm3,%ymm4,%ymm3
+ vpxord %ymm3,%ymm14,%ymm14
+ vprold $16,%ymm14,%ymm14
+
+ # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
+ vpaddd %ymm15,%ymm10,%ymm10
+ vpxord %ymm10,%ymm5,%ymm5
+ vprold $12,%ymm5,%ymm5
+ # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
+ vpaddd %ymm12,%ymm11,%ymm11
+ vpxord %ymm11,%ymm6,%ymm6
+ vprold $12,%ymm6,%ymm6
+ # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
+ vpaddd %ymm13,%ymm8,%ymm8
+ vpxord %ymm8,%ymm7,%ymm7
+ vprold $12,%ymm7,%ymm7
+ # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
+ vpaddd %ymm14,%ymm9,%ymm9
+ vpxord %ymm9,%ymm4,%ymm4
+ vprold $12,%ymm4,%ymm4
+
+ # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
+ vpaddd %ymm0,%ymm5,%ymm0
+ vpxord %ymm0,%ymm15,%ymm15
+ vprold $8,%ymm15,%ymm15
+ # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
+ vpaddd %ymm1,%ymm6,%ymm1
+ vpxord %ymm1,%ymm12,%ymm12
+ vprold $8,%ymm12,%ymm12
+ # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
+ vpaddd %ymm2,%ymm7,%ymm2
+ vpxord %ymm2,%ymm13,%ymm13
+ vprold $8,%ymm13,%ymm13
+ # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
+ vpaddd %ymm3,%ymm4,%ymm3
+ vpxord %ymm3,%ymm14,%ymm14
+ vprold $8,%ymm14,%ymm14
+
+ # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
+ vpaddd %ymm15,%ymm10,%ymm10
+ vpxord %ymm10,%ymm5,%ymm5
+ vprold $7,%ymm5,%ymm5
+ # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
+ vpaddd %ymm12,%ymm11,%ymm11
+ vpxord %ymm11,%ymm6,%ymm6
+ vprold $7,%ymm6,%ymm6
+ # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
+ vpaddd %ymm13,%ymm8,%ymm8
+ vpxord %ymm8,%ymm7,%ymm7
+ vprold $7,%ymm7,%ymm7
+ # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
+ vpaddd %ymm14,%ymm9,%ymm9
+ vpxord %ymm9,%ymm4,%ymm4
+ vprold $7,%ymm4,%ymm4
+
+ sub $2,%r8d
+ jnz .Ldoubleround8
+
+ # x0..15[0-3] += s[0..15]
+ vpaddd %ymm16,%ymm0,%ymm0
+ vpaddd %ymm17,%ymm1,%ymm1
+ vpaddd %ymm18,%ymm2,%ymm2
+ vpaddd %ymm19,%ymm3,%ymm3
+ vpaddd %ymm20,%ymm4,%ymm4
+ vpaddd %ymm21,%ymm5,%ymm5
+ vpaddd %ymm22,%ymm6,%ymm6
+ vpaddd %ymm23,%ymm7,%ymm7
+ vpaddd %ymm24,%ymm8,%ymm8
+ vpaddd %ymm25,%ymm9,%ymm9
+ vpaddd %ymm26,%ymm10,%ymm10
+ vpaddd %ymm27,%ymm11,%ymm11
+ vpaddd %ymm28,%ymm12,%ymm12
+ vpaddd %ymm29,%ymm13,%ymm13
+ vpaddd %ymm30,%ymm14,%ymm14
+ vpaddd %ymm31,%ymm15,%ymm15
+
+ # interleave 32-bit words in state n, n+1
+ vpunpckldq %ymm1,%ymm0,%ymm16
+ vpunpckhdq %ymm1,%ymm0,%ymm17
+ vpunpckldq %ymm3,%ymm2,%ymm18
+ vpunpckhdq %ymm3,%ymm2,%ymm19
+ vpunpckldq %ymm5,%ymm4,%ymm20
+ vpunpckhdq %ymm5,%ymm4,%ymm21
+ vpunpckldq %ymm7,%ymm6,%ymm22
+ vpunpckhdq %ymm7,%ymm6,%ymm23
+ vpunpckldq %ymm9,%ymm8,%ymm24
+ vpunpckhdq %ymm9,%ymm8,%ymm25
+ vpunpckldq %ymm11,%ymm10,%ymm26
+ vpunpckhdq %ymm11,%ymm10,%ymm27
+ vpunpckldq %ymm13,%ymm12,%ymm28
+ vpunpckhdq %ymm13,%ymm12,%ymm29
+ vpunpckldq %ymm15,%ymm14,%ymm30
+ vpunpckhdq %ymm15,%ymm14,%ymm31
+
+ # interleave 64-bit words in state n, n+2
+ vpunpcklqdq %ymm18,%ymm16,%ymm0
+ vpunpcklqdq %ymm19,%ymm17,%ymm1
+ vpunpckhqdq %ymm18,%ymm16,%ymm2
+ vpunpckhqdq %ymm19,%ymm17,%ymm3
+ vpunpcklqdq %ymm22,%ymm20,%ymm4
+ vpunpcklqdq %ymm23,%ymm21,%ymm5
+ vpunpckhqdq %ymm22,%ymm20,%ymm6
+ vpunpckhqdq %ymm23,%ymm21,%ymm7
+ vpunpcklqdq %ymm26,%ymm24,%ymm8
+ vpunpcklqdq %ymm27,%ymm25,%ymm9
+ vpunpckhqdq %ymm26,%ymm24,%ymm10
+ vpunpckhqdq %ymm27,%ymm25,%ymm11
+ vpunpcklqdq %ymm30,%ymm28,%ymm12
+ vpunpcklqdq %ymm31,%ymm29,%ymm13
+ vpunpckhqdq %ymm30,%ymm28,%ymm14
+ vpunpckhqdq %ymm31,%ymm29,%ymm15
+
+ # interleave 128-bit words in state n, n+4
+ # xor/write first four blocks
+ vmovdqa64 %ymm0,%ymm16
+ vperm2i128 $0x20,%ymm4,%ymm0,%ymm0
+ cmp $0x0020,%rcx
+ jl .Lxorpart8
+ vpxord 0x0000(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0000(%rsi)
+ vmovdqa64 %ymm16,%ymm0
+ vperm2i128 $0x31,%ymm4,%ymm0,%ymm4
+
+ vperm2i128 $0x20,%ymm12,%ymm8,%ymm0
+ cmp $0x0040,%rcx
+ jl .Lxorpart8
+ vpxord 0x0020(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0020(%rsi)
+ vperm2i128 $0x31,%ymm12,%ymm8,%ymm12
+
+ vperm2i128 $0x20,%ymm6,%ymm2,%ymm0
+ cmp $0x0060,%rcx
+ jl .Lxorpart8
+ vpxord 0x0040(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0040(%rsi)
+ vperm2i128 $0x31,%ymm6,%ymm2,%ymm6
+
+ vperm2i128 $0x20,%ymm14,%ymm10,%ymm0
+ cmp $0x0080,%rcx
+ jl .Lxorpart8
+ vpxord 0x0060(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0060(%rsi)
+ vperm2i128 $0x31,%ymm14,%ymm10,%ymm14
+
+ vperm2i128 $0x20,%ymm5,%ymm1,%ymm0
+ cmp $0x00a0,%rcx
+ jl .Lxorpart8
+ vpxord 0x0080(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0080(%rsi)
+ vperm2i128 $0x31,%ymm5,%ymm1,%ymm5
+
+ vperm2i128 $0x20,%ymm13,%ymm9,%ymm0
+ cmp $0x00c0,%rcx
+ jl .Lxorpart8
+ vpxord 0x00a0(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x00a0(%rsi)
+ vperm2i128 $0x31,%ymm13,%ymm9,%ymm13
+
+ vperm2i128 $0x20,%ymm7,%ymm3,%ymm0
+ cmp $0x00e0,%rcx
+ jl .Lxorpart8
+ vpxord 0x00c0(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x00c0(%rsi)
+ vperm2i128 $0x31,%ymm7,%ymm3,%ymm7
+
+ vperm2i128 $0x20,%ymm15,%ymm11,%ymm0
+ cmp $0x0100,%rcx
+ jl .Lxorpart8
+ vpxord 0x00e0(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x00e0(%rsi)
+ vperm2i128 $0x31,%ymm15,%ymm11,%ymm15
+
+ # xor remaining blocks, write to output
+ vmovdqa64 %ymm4,%ymm0
+ cmp $0x0120,%rcx
+ jl .Lxorpart8
+ vpxord 0x0100(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0100(%rsi)
+
+ vmovdqa64 %ymm12,%ymm0
+ cmp $0x0140,%rcx
+ jl .Lxorpart8
+ vpxord 0x0120(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0120(%rsi)
+
+ vmovdqa64 %ymm6,%ymm0
+ cmp $0x0160,%rcx
+ jl .Lxorpart8
+ vpxord 0x0140(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0140(%rsi)
+
+ vmovdqa64 %ymm14,%ymm0
+ cmp $0x0180,%rcx
+ jl .Lxorpart8
+ vpxord 0x0160(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0160(%rsi)
+
+ vmovdqa64 %ymm5,%ymm0
+ cmp $0x01a0,%rcx
+ jl .Lxorpart8
+ vpxord 0x0180(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x0180(%rsi)
+
+ vmovdqa64 %ymm13,%ymm0
+ cmp $0x01c0,%rcx
+ jl .Lxorpart8
+ vpxord 0x01a0(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x01a0(%rsi)
+
+ vmovdqa64 %ymm7,%ymm0
+ cmp $0x01e0,%rcx
+ jl .Lxorpart8
+ vpxord 0x01c0(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x01c0(%rsi)
+
+ vmovdqa64 %ymm15,%ymm0
+ cmp $0x0200,%rcx
+ jl .Lxorpart8
+ vpxord 0x01e0(%rdx),%ymm0,%ymm0
+ vmovdqu64 %ymm0,0x01e0(%rsi)
+
+.Ldone8:
+ vzeroupper
+ ret
+
+.Lxorpart8:
+ # xor remaining bytes from partial register into output
+ mov %rcx,%rax
+ and $0x1f,%rcx
+ jz .Ldone8
+ mov %rax,%r9
+ and $~0x1f,%r9
+
+ mov $1,%rax
+ shld %cl,%rax,%rax
+ sub $1,%rax
+ kmovq %rax,%k1
+
+ vmovdqu8 (%rdx,%r9),%ymm1{%k1}{z}
+ vpxord %ymm0,%ymm1,%ymm1
+ vmovdqu8 %ymm1,(%rsi,%r9){%k1}
+
+ jmp .Ldone8
+
+ENDPROC(chacha_8block_xor_avx512vl)
diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
index 512a2b500fd1..c05a7a963dc3 100644
--- a/arch/x86/crypto/chacha20-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -1,5 +1,5 @@
/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
+ * ChaCha 256-bit cipher algorithm, x64 SSSE3 functions
*
* Copyright (C) 2015 Martin Willi
*
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
+#include <asm/frame.h>
.section .rodata.cst16.ROT8, "aM", @progbits, 16
.align 16
@@ -23,35 +24,25 @@ CTRINC: .octa 0x00000003000000020000000100000000
.text
-ENTRY(chacha20_block_xor_ssse3)
- # %rdi: Input state matrix, s
- # %rsi: 1 data block output, o
- # %rdx: 1 data block input, i
-
- # This function encrypts one ChaCha20 block by loading the state matrix
- # in four SSE registers. It performs matrix operation on four words in
- # parallel, but requireds shuffling to rearrange the words after each
- # round. 8/16-bit word rotation is done with the slightly better
- # performing SSSE3 byte shuffling, 7/12-bit word rotation uses
- # traditional shift+OR.
-
- # x0..3 = s0..3
- movdqa 0x00(%rdi),%xmm0
- movdqa 0x10(%rdi),%xmm1
- movdqa 0x20(%rdi),%xmm2
- movdqa 0x30(%rdi),%xmm3
- movdqa %xmm0,%xmm8
- movdqa %xmm1,%xmm9
- movdqa %xmm2,%xmm10
- movdqa %xmm3,%xmm11
+/*
+ * chacha_permute - permute one block
+ *
+ * Permute one 64-byte block where the state matrix is in %xmm0-%xmm3. This
+ * function performs matrix operations on four words in parallel, but requires
+ * shuffling to rearrange the words after each round. 8/16-bit word rotation is
+ * done with the slightly better performing SSSE3 byte shuffling, 7/12-bit word
+ * rotation uses traditional shift+OR.
+ *
+ * The round count is given in %r8d.
+ *
+ * Clobbers: %r8d, %xmm4-%xmm7
+ */
+chacha_permute:
movdqa ROT8(%rip),%xmm4
movdqa ROT16(%rip),%xmm5
- mov $10,%ecx
-
.Ldoubleround:
-
# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
paddd %xmm1,%xmm0
pxor %xmm0,%xmm3
@@ -118,39 +109,129 @@ ENTRY(chacha20_block_xor_ssse3)
# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
pshufd $0x39,%xmm3,%xmm3
- dec %ecx
+ sub $2,%r8d
jnz .Ldoubleround
+ ret
+ENDPROC(chacha_permute)
+
+ENTRY(chacha_block_xor_ssse3)
+ # %rdi: Input state matrix, s
+ # %rsi: up to 1 data block output, o
+ # %rdx: up to 1 data block input, i
+ # %rcx: input/output length in bytes
+ # %r8d: nrounds
+ FRAME_BEGIN
+
+ # x0..3 = s0..3
+ movdqa 0x00(%rdi),%xmm0
+ movdqa 0x10(%rdi),%xmm1
+ movdqa 0x20(%rdi),%xmm2
+ movdqa 0x30(%rdi),%xmm3
+ movdqa %xmm0,%xmm8
+ movdqa %xmm1,%xmm9
+ movdqa %xmm2,%xmm10
+ movdqa %xmm3,%xmm11
+
+ mov %rcx,%rax
+ call chacha_permute
+
# o0 = i0 ^ (x0 + s0)
- movdqu 0x00(%rdx),%xmm4
paddd %xmm8,%xmm0
+ cmp $0x10,%rax
+ jl .Lxorpart
+ movdqu 0x00(%rdx),%xmm4
pxor %xmm4,%xmm0
movdqu %xmm0,0x00(%rsi)
# o1 = i1 ^ (x1 + s1)
- movdqu 0x10(%rdx),%xmm5
paddd %xmm9,%xmm1
- pxor %xmm5,%xmm1
- movdqu %xmm1,0x10(%rsi)
+ movdqa %xmm1,%xmm0
+ cmp $0x20,%rax
+ jl .Lxorpart
+ movdqu 0x10(%rdx),%xmm0
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0x10(%rsi)
# o2 = i2 ^ (x2 + s2)
- movdqu 0x20(%rdx),%xmm6
paddd %xmm10,%xmm2
- pxor %xmm6,%xmm2
- movdqu %xmm2,0x20(%rsi)
+ movdqa %xmm2,%xmm0
+ cmp $0x30,%rax
+ jl .Lxorpart
+ movdqu 0x20(%rdx),%xmm0
+ pxor %xmm2,%xmm0
+ movdqu %xmm0,0x20(%rsi)
# o3 = i3 ^ (x3 + s3)
- movdqu 0x30(%rdx),%xmm7
paddd %xmm11,%xmm3
- pxor %xmm7,%xmm3
- movdqu %xmm3,0x30(%rsi)
+ movdqa %xmm3,%xmm0
+ cmp $0x40,%rax
+ jl .Lxorpart
+ movdqu 0x30(%rdx),%xmm0
+ pxor %xmm3,%xmm0
+ movdqu %xmm0,0x30(%rsi)
+
+.Ldone:
+ FRAME_END
+ ret
+
+.Lxorpart:
+ # xor remaining bytes from partial register into output
+ mov %rax,%r9
+ and $0x0f,%r9
+ jz .Ldone
+ and $~0x0f,%rax
+
+ mov %rsi,%r11
+
+ lea 8(%rsp),%r10
+ sub $0x10,%rsp
+ and $~31,%rsp
+
+ lea (%rdx,%rax),%rsi
+ mov %rsp,%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ pxor 0x00(%rsp),%xmm0
+ movdqa %xmm0,0x00(%rsp)
+ mov %rsp,%rsi
+ lea (%r11,%rax),%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ lea -8(%r10),%rsp
+ jmp .Ldone
+
+ENDPROC(chacha_block_xor_ssse3)
+
+ENTRY(hchacha_block_ssse3)
+ # %rdi: Input state matrix, s
+ # %rsi: output (8 32-bit words)
+ # %edx: nrounds
+ FRAME_BEGIN
+
+ movdqa 0x00(%rdi),%xmm0
+ movdqa 0x10(%rdi),%xmm1
+ movdqa 0x20(%rdi),%xmm2
+ movdqa 0x30(%rdi),%xmm3
+
+ mov %edx,%r8d
+ call chacha_permute
+
+ movdqu %xmm0,0x00(%rsi)
+ movdqu %xmm3,0x10(%rsi)
+
+ FRAME_END
ret
-ENDPROC(chacha20_block_xor_ssse3)
+ENDPROC(hchacha_block_ssse3)
-ENTRY(chacha20_4block_xor_ssse3)
+ENTRY(chacha_4block_xor_ssse3)
# %rdi: Input state matrix, s
- # %rsi: 4 data blocks output, o
- # %rdx: 4 data blocks input, i
+ # %rsi: up to 4 data blocks output, o
+ # %rdx: up to 4 data blocks input, i
+ # %rcx: input/output length in bytes
+ # %r8d: nrounds
- # This function encrypts four consecutive ChaCha20 blocks by loading the
+ # This function encrypts four consecutive ChaCha blocks by loading the
# the state matrix in SSE registers four times. As we need some scratch
# registers, we save the first four registers on the stack. The
# algorithm performs each operation on the corresponding word of each
@@ -163,6 +244,7 @@ ENTRY(chacha20_4block_xor_ssse3)
lea 8(%rsp),%r10
sub $0x80,%rsp
and $~63,%rsp
+ mov %rcx,%rax
# x0..15[0-3] = s0..3[0..3]
movq 0x00(%rdi),%xmm1
@@ -202,8 +284,6 @@ ENTRY(chacha20_4block_xor_ssse3)
# x12 += counter values 0-3
paddd %xmm1,%xmm12
- mov $10,%ecx
-
.Ldoubleround4:
# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
movdqa 0x00(%rsp),%xmm0
@@ -421,7 +501,7 @@ ENTRY(chacha20_4block_xor_ssse3)
psrld $25,%xmm4
por %xmm0,%xmm4
- dec %ecx
+ sub $2,%r8d
jnz .Ldoubleround4
# x0[0-3] += s0[0]
@@ -573,58 +653,143 @@ ENTRY(chacha20_4block_xor_ssse3)
# xor with corresponding input, write to output
movdqa 0x00(%rsp),%xmm0
+ cmp $0x10,%rax
+ jl .Lxorpart4
movdqu 0x00(%rdx),%xmm1
pxor %xmm1,%xmm0
movdqu %xmm0,0x00(%rsi)
- movdqa 0x10(%rsp),%xmm0
- movdqu 0x80(%rdx),%xmm1
+
+ movdqu %xmm4,%xmm0
+ cmp $0x20,%rax
+ jl .Lxorpart4
+ movdqu 0x10(%rdx),%xmm1
pxor %xmm1,%xmm0
- movdqu %xmm0,0x80(%rsi)
+ movdqu %xmm0,0x10(%rsi)
+
+ movdqu %xmm8,%xmm0
+ cmp $0x30,%rax
+ jl .Lxorpart4
+ movdqu 0x20(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0x20(%rsi)
+
+ movdqu %xmm12,%xmm0
+ cmp $0x40,%rax
+ jl .Lxorpart4
+ movdqu 0x30(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0x30(%rsi)
+
movdqa 0x20(%rsp),%xmm0
+ cmp $0x50,%rax
+ jl .Lxorpart4
movdqu 0x40(%rdx),%xmm1
pxor %xmm1,%xmm0
movdqu %xmm0,0x40(%rsi)
+
+ movdqu %xmm6,%xmm0
+ cmp $0x60,%rax
+ jl .Lxorpart4
+ movdqu 0x50(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0x50(%rsi)
+
+ movdqu %xmm10,%xmm0
+ cmp $0x70,%rax
+ jl .Lxorpart4
+ movdqu 0x60(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0x60(%rsi)
+
+ movdqu %xmm14,%xmm0
+ cmp $0x80,%rax
+ jl .Lxorpart4
+ movdqu 0x70(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0x70(%rsi)
+
+ movdqa 0x10(%rsp),%xmm0
+ cmp $0x90,%rax
+ jl .Lxorpart4
+ movdqu 0x80(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0x80(%rsi)
+
+ movdqu %xmm5,%xmm0
+ cmp $0xa0,%rax
+ jl .Lxorpart4
+ movdqu 0x90(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0x90(%rsi)
+
+ movdqu %xmm9,%xmm0
+ cmp $0xb0,%rax
+ jl .Lxorpart4
+ movdqu 0xa0(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0xa0(%rsi)
+
+ movdqu %xmm13,%xmm0
+ cmp $0xc0,%rax
+ jl .Lxorpart4
+ movdqu 0xb0(%rdx),%xmm1
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0xb0(%rsi)
+
movdqa 0x30(%rsp),%xmm0
+ cmp $0xd0,%rax
+ jl .Lxorpart4
movdqu 0xc0(%rdx),%xmm1
pxor %xmm1,%xmm0
movdqu %xmm0,0xc0(%rsi)
- movdqu 0x10(%rdx),%xmm1
- pxor %xmm1,%xmm4
- movdqu %xmm4,0x10(%rsi)
- movdqu 0x90(%rdx),%xmm1
- pxor %xmm1,%xmm5
- movdqu %xmm5,0x90(%rsi)
- movdqu 0x50(%rdx),%xmm1
- pxor %xmm1,%xmm6
- movdqu %xmm6,0x50(%rsi)
+
+ movdqu %xmm7,%xmm0
+ cmp $0xe0,%rax
+ jl .Lxorpart4
movdqu 0xd0(%rdx),%xmm1
- pxor %xmm1,%xmm7
- movdqu %xmm7,0xd0(%rsi)
- movdqu 0x20(%rdx),%xmm1
- pxor %xmm1,%xmm8
- movdqu %xmm8,0x20(%rsi)
- movdqu 0xa0(%rdx),%xmm1
- pxor %xmm1,%xmm9
- movdqu %xmm9,0xa0(%rsi)
- movdqu 0x60(%rdx),%xmm1
- pxor %xmm1,%xmm10
- movdqu %xmm10,0x60(%rsi)
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0xd0(%rsi)
+
+ movdqu %xmm11,%xmm0
+ cmp $0xf0,%rax
+ jl .Lxorpart4
movdqu 0xe0(%rdx),%xmm1
- pxor %xmm1,%xmm11
- movdqu %xmm11,0xe0(%rsi)
- movdqu 0x30(%rdx),%xmm1
- pxor %xmm1,%xmm12
- movdqu %xmm12,0x30(%rsi)
- movdqu 0xb0(%rdx),%xmm1
- pxor %xmm1,%xmm13
- movdqu %xmm13,0xb0(%rsi)
- movdqu 0x70(%rdx),%xmm1
- pxor %xmm1,%xmm14
- movdqu %xmm14,0x70(%rsi)
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0xe0(%rsi)
+
+ movdqu %xmm15,%xmm0
+ cmp $0x100,%rax
+ jl .Lxorpart4
movdqu 0xf0(%rdx),%xmm1
- pxor %xmm1,%xmm15
- movdqu %xmm15,0xf0(%rsi)
+ pxor %xmm1,%xmm0
+ movdqu %xmm0,0xf0(%rsi)
+.Ldone4:
lea -8(%r10),%rsp
ret
-ENDPROC(chacha20_4block_xor_ssse3)
+
+.Lxorpart4:
+ # xor remaining bytes from partial register into output
+ mov %rax,%r9
+ and $0x0f,%r9
+ jz .Ldone4
+ and $~0x0f,%rax
+
+ mov %rsi,%r11
+
+ lea (%rdx,%rax),%rsi
+ mov %rsp,%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ pxor 0x00(%rsp),%xmm0
+ movdqa %xmm0,0x00(%rsp)
+
+ mov %rsp,%rsi
+ lea (%r11,%rax),%rdi
+ mov %r9,%rcx
+ rep movsb
+
+ jmp .Ldone4
+
+ENDPROC(chacha_4block_xor_ssse3)
diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S
deleted file mode 100644
index f3cd26f48332..000000000000
--- a/arch/x86/crypto/chacha20-avx2-x86_64.S
+++ /dev/null
@@ -1,448 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <linux/linkage.h>
-
-.section .rodata.cst32.ROT8, "aM", @progbits, 32
-.align 32
-ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003
- .octa 0x0e0d0c0f0a09080b0605040702010003
-
-.section .rodata.cst32.ROT16, "aM", @progbits, 32
-.align 32
-ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302
- .octa 0x0d0c0f0e09080b0a0504070601000302
-
-.section .rodata.cst32.CTRINC, "aM", @progbits, 32
-.align 32
-CTRINC: .octa 0x00000003000000020000000100000000
- .octa 0x00000007000000060000000500000004
-
-.text
-
-ENTRY(chacha20_8block_xor_avx2)
- # %rdi: Input state matrix, s
- # %rsi: 8 data blocks output, o
- # %rdx: 8 data blocks input, i
-
- # This function encrypts eight consecutive ChaCha20 blocks by loading
- # the state matrix in AVX registers eight times. As we need some
- # scratch registers, we save the first four registers on the stack. The
- # algorithm performs each operation on the corresponding word of each
- # state matrix, hence requires no word shuffling. For final XORing step
- # we transpose the matrix by interleaving 32-, 64- and then 128-bit
- # words, which allows us to do XOR in AVX registers. 8/16-bit word
- # rotation is done with the slightly better performing byte shuffling,
- # 7/12-bit word rotation uses traditional shift+OR.
-
- vzeroupper
- # 4 * 32 byte stack, 32-byte aligned
- lea 8(%rsp),%r10
- and $~31, %rsp
- sub $0x80, %rsp
-
- # x0..15[0-7] = s[0..15]
- vpbroadcastd 0x00(%rdi),%ymm0
- vpbroadcastd 0x04(%rdi),%ymm1
- vpbroadcastd 0x08(%rdi),%ymm2
- vpbroadcastd 0x0c(%rdi),%ymm3
- vpbroadcastd 0x10(%rdi),%ymm4
- vpbroadcastd 0x14(%rdi),%ymm5
- vpbroadcastd 0x18(%rdi),%ymm6
- vpbroadcastd 0x1c(%rdi),%ymm7
- vpbroadcastd 0x20(%rdi),%ymm8
- vpbroadcastd 0x24(%rdi),%ymm9
- vpbroadcastd 0x28(%rdi),%ymm10
- vpbroadcastd 0x2c(%rdi),%ymm11
- vpbroadcastd 0x30(%rdi),%ymm12
- vpbroadcastd 0x34(%rdi),%ymm13
- vpbroadcastd 0x38(%rdi),%ymm14
- vpbroadcastd 0x3c(%rdi),%ymm15
- # x0..3 on stack
- vmovdqa %ymm0,0x00(%rsp)
- vmovdqa %ymm1,0x20(%rsp)
- vmovdqa %ymm2,0x40(%rsp)
- vmovdqa %ymm3,0x60(%rsp)
-
- vmovdqa CTRINC(%rip),%ymm1
- vmovdqa ROT8(%rip),%ymm2
- vmovdqa ROT16(%rip),%ymm3
-
- # x12 += counter values 0-3
- vpaddd %ymm1,%ymm12,%ymm12
-
- mov $10,%ecx
-
-.Ldoubleround8:
- # x0 += x4, x12 = rotl32(x12 ^ x0, 16)
- vpaddd 0x00(%rsp),%ymm4,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpxor %ymm0,%ymm12,%ymm12
- vpshufb %ymm3,%ymm12,%ymm12
- # x1 += x5, x13 = rotl32(x13 ^ x1, 16)
- vpaddd 0x20(%rsp),%ymm5,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpxor %ymm0,%ymm13,%ymm13
- vpshufb %ymm3,%ymm13,%ymm13
- # x2 += x6, x14 = rotl32(x14 ^ x2, 16)
- vpaddd 0x40(%rsp),%ymm6,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpxor %ymm0,%ymm14,%ymm14
- vpshufb %ymm3,%ymm14,%ymm14
- # x3 += x7, x15 = rotl32(x15 ^ x3, 16)
- vpaddd 0x60(%rsp),%ymm7,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpxor %ymm0,%ymm15,%ymm15
- vpshufb %ymm3,%ymm15,%ymm15
-
- # x8 += x12, x4 = rotl32(x4 ^ x8, 12)
- vpaddd %ymm12,%ymm8,%ymm8
- vpxor %ymm8,%ymm4,%ymm4
- vpslld $12,%ymm4,%ymm0
- vpsrld $20,%ymm4,%ymm4
- vpor %ymm0,%ymm4,%ymm4
- # x9 += x13, x5 = rotl32(x5 ^ x9, 12)
- vpaddd %ymm13,%ymm9,%ymm9
- vpxor %ymm9,%ymm5,%ymm5
- vpslld $12,%ymm5,%ymm0
- vpsrld $20,%ymm5,%ymm5
- vpor %ymm0,%ymm5,%ymm5
- # x10 += x14, x6 = rotl32(x6 ^ x10, 12)
- vpaddd %ymm14,%ymm10,%ymm10
- vpxor %ymm10,%ymm6,%ymm6
- vpslld $12,%ymm6,%ymm0
- vpsrld $20,%ymm6,%ymm6
- vpor %ymm0,%ymm6,%ymm6
- # x11 += x15, x7 = rotl32(x7 ^ x11, 12)
- vpaddd %ymm15,%ymm11,%ymm11
- vpxor %ymm11,%ymm7,%ymm7
- vpslld $12,%ymm7,%ymm0
- vpsrld $20,%ymm7,%ymm7
- vpor %ymm0,%ymm7,%ymm7
-
- # x0 += x4, x12 = rotl32(x12 ^ x0, 8)
- vpaddd 0x00(%rsp),%ymm4,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpxor %ymm0,%ymm12,%ymm12
- vpshufb %ymm2,%ymm12,%ymm12
- # x1 += x5, x13 = rotl32(x13 ^ x1, 8)
- vpaddd 0x20(%rsp),%ymm5,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpxor %ymm0,%ymm13,%ymm13
- vpshufb %ymm2,%ymm13,%ymm13
- # x2 += x6, x14 = rotl32(x14 ^ x2, 8)
- vpaddd 0x40(%rsp),%ymm6,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpxor %ymm0,%ymm14,%ymm14
- vpshufb %ymm2,%ymm14,%ymm14
- # x3 += x7, x15 = rotl32(x15 ^ x3, 8)
- vpaddd 0x60(%rsp),%ymm7,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpxor %ymm0,%ymm15,%ymm15
- vpshufb %ymm2,%ymm15,%ymm15
-
- # x8 += x12, x4 = rotl32(x4 ^ x8, 7)
- vpaddd %ymm12,%ymm8,%ymm8
- vpxor %ymm8,%ymm4,%ymm4
- vpslld $7,%ymm4,%ymm0
- vpsrld $25,%ymm4,%ymm4
- vpor %ymm0,%ymm4,%ymm4
- # x9 += x13, x5 = rotl32(x5 ^ x9, 7)
- vpaddd %ymm13,%ymm9,%ymm9
- vpxor %ymm9,%ymm5,%ymm5
- vpslld $7,%ymm5,%ymm0
- vpsrld $25,%ymm5,%ymm5
- vpor %ymm0,%ymm5,%ymm5
- # x10 += x14, x6 = rotl32(x6 ^ x10, 7)
- vpaddd %ymm14,%ymm10,%ymm10
- vpxor %ymm10,%ymm6,%ymm6
- vpslld $7,%ymm6,%ymm0
- vpsrld $25,%ymm6,%ymm6
- vpor %ymm0,%ymm6,%ymm6
- # x11 += x15, x7 = rotl32(x7 ^ x11, 7)
- vpaddd %ymm15,%ymm11,%ymm11
- vpxor %ymm11,%ymm7,%ymm7
- vpslld $7,%ymm7,%ymm0
- vpsrld $25,%ymm7,%ymm7
- vpor %ymm0,%ymm7,%ymm7
-
- # x0 += x5, x15 = rotl32(x15 ^ x0, 16)
- vpaddd 0x00(%rsp),%ymm5,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpxor %ymm0,%ymm15,%ymm15
- vpshufb %ymm3,%ymm15,%ymm15
- # x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
- vpaddd 0x20(%rsp),%ymm6,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpxor %ymm0,%ymm12,%ymm12
- vpshufb %ymm3,%ymm12,%ymm12
- # x2 += x7, x13 = rotl32(x13 ^ x2, 16)
- vpaddd 0x40(%rsp),%ymm7,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpxor %ymm0,%ymm13,%ymm13
- vpshufb %ymm3,%ymm13,%ymm13
- # x3 += x4, x14 = rotl32(x14 ^ x3, 16)
- vpaddd 0x60(%rsp),%ymm4,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpxor %ymm0,%ymm14,%ymm14
- vpshufb %ymm3,%ymm14,%ymm14
-
- # x10 += x15, x5 = rotl32(x5 ^ x10, 12)
- vpaddd %ymm15,%ymm10,%ymm10
- vpxor %ymm10,%ymm5,%ymm5
- vpslld $12,%ymm5,%ymm0
- vpsrld $20,%ymm5,%ymm5
- vpor %ymm0,%ymm5,%ymm5
- # x11 += x12, x6 = rotl32(x6 ^ x11, 12)
- vpaddd %ymm12,%ymm11,%ymm11
- vpxor %ymm11,%ymm6,%ymm6
- vpslld $12,%ymm6,%ymm0
- vpsrld $20,%ymm6,%ymm6
- vpor %ymm0,%ymm6,%ymm6
- # x8 += x13, x7 = rotl32(x7 ^ x8, 12)
- vpaddd %ymm13,%ymm8,%ymm8
- vpxor %ymm8,%ymm7,%ymm7
- vpslld $12,%ymm7,%ymm0
- vpsrld $20,%ymm7,%ymm7
- vpor %ymm0,%ymm7,%ymm7
- # x9 += x14, x4 = rotl32(x4 ^ x9, 12)
- vpaddd %ymm14,%ymm9,%ymm9
- vpxor %ymm9,%ymm4,%ymm4
- vpslld $12,%ymm4,%ymm0
- vpsrld $20,%ymm4,%ymm4
- vpor %ymm0,%ymm4,%ymm4
-
- # x0 += x5, x15 = rotl32(x15 ^ x0, 8)
- vpaddd 0x00(%rsp),%ymm5,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpxor %ymm0,%ymm15,%ymm15
- vpshufb %ymm2,%ymm15,%ymm15
- # x1 += x6, x12 = rotl32(x12 ^ x1, 8)
- vpaddd 0x20(%rsp),%ymm6,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpxor %ymm0,%ymm12,%ymm12
- vpshufb %ymm2,%ymm12,%ymm12
- # x2 += x7, x13 = rotl32(x13 ^ x2, 8)
- vpaddd 0x40(%rsp),%ymm7,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpxor %ymm0,%ymm13,%ymm13
- vpshufb %ymm2,%ymm13,%ymm13
- # x3 += x4, x14 = rotl32(x14 ^ x3, 8)
- vpaddd 0x60(%rsp),%ymm4,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpxor %ymm0,%ymm14,%ymm14
- vpshufb %ymm2,%ymm14,%ymm14
-
- # x10 += x15, x5 = rotl32(x5 ^ x10, 7)
- vpaddd %ymm15,%ymm10,%ymm10
- vpxor %ymm10,%ymm5,%ymm5
- vpslld $7,%ymm5,%ymm0
- vpsrld $25,%ymm5,%ymm5
- vpor %ymm0,%ymm5,%ymm5
- # x11 += x12, x6 = rotl32(x6 ^ x11, 7)
- vpaddd %ymm12,%ymm11,%ymm11
- vpxor %ymm11,%ymm6,%ymm6
- vpslld $7,%ymm6,%ymm0
- vpsrld $25,%ymm6,%ymm6
- vpor %ymm0,%ymm6,%ymm6
- # x8 += x13, x7 = rotl32(x7 ^ x8, 7)
- vpaddd %ymm13,%ymm8,%ymm8
- vpxor %ymm8,%ymm7,%ymm7
- vpslld $7,%ymm7,%ymm0
- vpsrld $25,%ymm7,%ymm7
- vpor %ymm0,%ymm7,%ymm7
- # x9 += x14, x4 = rotl32(x4 ^ x9, 7)
- vpaddd %ymm14,%ymm9,%ymm9
- vpxor %ymm9,%ymm4,%ymm4
- vpslld $7,%ymm4,%ymm0
- vpsrld $25,%ymm4,%ymm4
- vpor %ymm0,%ymm4,%ymm4
-
- dec %ecx
- jnz .Ldoubleround8
-
- # x0..15[0-3] += s[0..15]
- vpbroadcastd 0x00(%rdi),%ymm0
- vpaddd 0x00(%rsp),%ymm0,%ymm0
- vmovdqa %ymm0,0x00(%rsp)
- vpbroadcastd 0x04(%rdi),%ymm0
- vpaddd 0x20(%rsp),%ymm0,%ymm0
- vmovdqa %ymm0,0x20(%rsp)
- vpbroadcastd 0x08(%rdi),%ymm0
- vpaddd 0x40(%rsp),%ymm0,%ymm0
- vmovdqa %ymm0,0x40(%rsp)
- vpbroadcastd 0x0c(%rdi),%ymm0
- vpaddd 0x60(%rsp),%ymm0,%ymm0
- vmovdqa %ymm0,0x60(%rsp)
- vpbroadcastd 0x10(%rdi),%ymm0
- vpaddd %ymm0,%ymm4,%ymm4
- vpbroadcastd 0x14(%rdi),%ymm0
- vpaddd %ymm0,%ymm5,%ymm5
- vpbroadcastd 0x18(%rdi),%ymm0
- vpaddd %ymm0,%ymm6,%ymm6
- vpbroadcastd 0x1c(%rdi),%ymm0
- vpaddd %ymm0,%ymm7,%ymm7
- vpbroadcastd 0x20(%rdi),%ymm0
- vpaddd %ymm0,%ymm8,%ymm8
- vpbroadcastd 0x24(%rdi),%ymm0
- vpaddd %ymm0,%ymm9,%ymm9
- vpbroadcastd 0x28(%rdi),%ymm0
- vpaddd %ymm0,%ymm10,%ymm10
- vpbroadcastd 0x2c(%rdi),%ymm0
- vpaddd %ymm0,%ymm11,%ymm11
- vpbroadcastd 0x30(%rdi),%ymm0
- vpaddd %ymm0,%ymm12,%ymm12
- vpbroadcastd 0x34(%rdi),%ymm0
- vpaddd %ymm0,%ymm13,%ymm13
- vpbroadcastd 0x38(%rdi),%ymm0
- vpaddd %ymm0,%ymm14,%ymm14
- vpbroadcastd 0x3c(%rdi),%ymm0
- vpaddd %ymm0,%ymm15,%ymm15
-
- # x12 += counter values 0-3
- vpaddd %ymm1,%ymm12,%ymm12
-
- # interleave 32-bit words in state n, n+1
- vmovdqa 0x00(%rsp),%ymm0
- vmovdqa 0x20(%rsp),%ymm1
- vpunpckldq %ymm1,%ymm0,%ymm2
- vpunpckhdq %ymm1,%ymm0,%ymm1
- vmovdqa %ymm2,0x00(%rsp)
- vmovdqa %ymm1,0x20(%rsp)
- vmovdqa 0x40(%rsp),%ymm0
- vmovdqa 0x60(%rsp),%ymm1
- vpunpckldq %ymm1,%ymm0,%ymm2
- vpunpckhdq %ymm1,%ymm0,%ymm1
- vmovdqa %ymm2,0x40(%rsp)
- vmovdqa %ymm1,0x60(%rsp)
- vmovdqa %ymm4,%ymm0
- vpunpckldq %ymm5,%ymm0,%ymm4
- vpunpckhdq %ymm5,%ymm0,%ymm5
- vmovdqa %ymm6,%ymm0
- vpunpckldq %ymm7,%ymm0,%ymm6
- vpunpckhdq %ymm7,%ymm0,%ymm7
- vmovdqa %ymm8,%ymm0
- vpunpckldq %ymm9,%ymm0,%ymm8
- vpunpckhdq %ymm9,%ymm0,%ymm9
- vmovdqa %ymm10,%ymm0
- vpunpckldq %ymm11,%ymm0,%ymm10
- vpunpckhdq %ymm11,%ymm0,%ymm11
- vmovdqa %ymm12,%ymm0
- vpunpckldq %ymm13,%ymm0,%ymm12
- vpunpckhdq %ymm13,%ymm0,%ymm13
- vmovdqa %ymm14,%ymm0
- vpunpckldq %ymm15,%ymm0,%ymm14
- vpunpckhdq %ymm15,%ymm0,%ymm15
-
- # interleave 64-bit words in state n, n+2
- vmovdqa 0x00(%rsp),%ymm0
- vmovdqa 0x40(%rsp),%ymm2
- vpunpcklqdq %ymm2,%ymm0,%ymm1
- vpunpckhqdq %ymm2,%ymm0,%ymm2
- vmovdqa %ymm1,0x00(%rsp)
- vmovdqa %ymm2,0x40(%rsp)
- vmovdqa 0x20(%rsp),%ymm0
- vmovdqa 0x60(%rsp),%ymm2
- vpunpcklqdq %ymm2,%ymm0,%ymm1
- vpunpckhqdq %ymm2,%ymm0,%ymm2
- vmovdqa %ymm1,0x20(%rsp)
- vmovdqa %ymm2,0x60(%rsp)
- vmovdqa %ymm4,%ymm0
- vpunpcklqdq %ymm6,%ymm0,%ymm4
- vpunpckhqdq %ymm6,%ymm0,%ymm6
- vmovdqa %ymm5,%ymm0
- vpunpcklqdq %ymm7,%ymm0,%ymm5
- vpunpckhqdq %ymm7,%ymm0,%ymm7
- vmovdqa %ymm8,%ymm0
- vpunpcklqdq %ymm10,%ymm0,%ymm8
- vpunpckhqdq %ymm10,%ymm0,%ymm10
- vmovdqa %ymm9,%ymm0
- vpunpcklqdq %ymm11,%ymm0,%ymm9
- vpunpckhqdq %ymm11,%ymm0,%ymm11
- vmovdqa %ymm12,%ymm0
- vpunpcklqdq %ymm14,%ymm0,%ymm12
- vpunpckhqdq %ymm14,%ymm0,%ymm14
- vmovdqa %ymm13,%ymm0
- vpunpcklqdq %ymm15,%ymm0,%ymm13
- vpunpckhqdq %ymm15,%ymm0,%ymm15
-
- # interleave 128-bit words in state n, n+4
- vmovdqa 0x00(%rsp),%ymm0
- vperm2i128 $0x20,%ymm4,%ymm0,%ymm1
- vperm2i128 $0x31,%ymm4,%ymm0,%ymm4
- vmovdqa %ymm1,0x00(%rsp)
- vmovdqa 0x20(%rsp),%ymm0
- vperm2i128 $0x20,%ymm5,%ymm0,%ymm1
- vperm2i128 $0x31,%ymm5,%ymm0,%ymm5
- vmovdqa %ymm1,0x20(%rsp)
- vmovdqa 0x40(%rsp),%ymm0
- vperm2i128 $0x20,%ymm6,%ymm0,%ymm1
- vperm2i128 $0x31,%ymm6,%ymm0,%ymm6
- vmovdqa %ymm1,0x40(%rsp)
- vmovdqa 0x60(%rsp),%ymm0
- vperm2i128 $0x20,%ymm7,%ymm0,%ymm1
- vperm2i128 $0x31,%ymm7,%ymm0,%ymm7
- vmovdqa %ymm1,0x60(%rsp)
- vperm2i128 $0x20,%ymm12,%ymm8,%ymm0
- vperm2i128 $0x31,%ymm12,%ymm8,%ymm12
- vmovdqa %ymm0,%ymm8
- vperm2i128 $0x20,%ymm13,%ymm9,%ymm0
- vperm2i128 $0x31,%ymm13,%ymm9,%ymm13
- vmovdqa %ymm0,%ymm9
- vperm2i128 $0x20,%ymm14,%ymm10,%ymm0
- vperm2i128 $0x31,%ymm14,%ymm10,%ymm14
- vmovdqa %ymm0,%ymm10
- vperm2i128 $0x20,%ymm15,%ymm11,%ymm0
- vperm2i128 $0x31,%ymm15,%ymm11,%ymm15
- vmovdqa %ymm0,%ymm11
-
- # xor with corresponding input, write to output
- vmovdqa 0x00(%rsp),%ymm0
- vpxor 0x0000(%rdx),%ymm0,%ymm0
- vmovdqu %ymm0,0x0000(%rsi)
- vmovdqa 0x20(%rsp),%ymm0
- vpxor 0x0080(%rdx),%ymm0,%ymm0
- vmovdqu %ymm0,0x0080(%rsi)
- vmovdqa 0x40(%rsp),%ymm0
- vpxor 0x0040(%rdx),%ymm0,%ymm0
- vmovdqu %ymm0,0x0040(%rsi)
- vmovdqa 0x60(%rsp),%ymm0
- vpxor 0x00c0(%rdx),%ymm0,%ymm0
- vmovdqu %ymm0,0x00c0(%rsi)
- vpxor 0x0100(%rdx),%ymm4,%ymm4
- vmovdqu %ymm4,0x0100(%rsi)
- vpxor 0x0180(%rdx),%ymm5,%ymm5
- vmovdqu %ymm5,0x00180(%rsi)
- vpxor 0x0140(%rdx),%ymm6,%ymm6
- vmovdqu %ymm6,0x0140(%rsi)
- vpxor 0x01c0(%rdx),%ymm7,%ymm7
- vmovdqu %ymm7,0x01c0(%rsi)
- vpxor 0x0020(%rdx),%ymm8,%ymm8
- vmovdqu %ymm8,0x0020(%rsi)
- vpxor 0x00a0(%rdx),%ymm9,%ymm9
- vmovdqu %ymm9,0x00a0(%rsi)
- vpxor 0x0060(%rdx),%ymm10,%ymm10
- vmovdqu %ymm10,0x0060(%rsi)
- vpxor 0x00e0(%rdx),%ymm11,%ymm11
- vmovdqu %ymm11,0x00e0(%rsi)
- vpxor 0x0120(%rdx),%ymm12,%ymm12
- vmovdqu %ymm12,0x0120(%rsi)
- vpxor 0x01a0(%rdx),%ymm13,%ymm13
- vmovdqu %ymm13,0x01a0(%rsi)
- vpxor 0x0160(%rdx),%ymm14,%ymm14
- vmovdqu %ymm14,0x0160(%rsi)
- vpxor 0x01e0(%rdx),%ymm15,%ymm15
- vmovdqu %ymm15,0x01e0(%rsi)
-
- vzeroupper
- lea -8(%r10),%rsp
- ret
-ENDPROC(chacha20_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
deleted file mode 100644
index dce7c5d39c2f..000000000000
--- a/arch/x86/crypto/chacha20_glue.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/simd.h>
-
-#define CHACHA20_STATE_ALIGN 16
-
-asmlinkage void chacha20_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
-asmlinkage void chacha20_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
-#ifdef CONFIG_AS_AVX2
-asmlinkage void chacha20_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
-static bool chacha20_use_avx2;
-#endif
-
-static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
-{
- u8 buf[CHACHA20_BLOCK_SIZE];
-
-#ifdef CONFIG_AS_AVX2
- if (chacha20_use_avx2) {
- while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
- chacha20_8block_xor_avx2(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE * 8;
- src += CHACHA20_BLOCK_SIZE * 8;
- dst += CHACHA20_BLOCK_SIZE * 8;
- state[12] += 8;
- }
- }
-#endif
- while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
- chacha20_4block_xor_ssse3(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE * 4;
- src += CHACHA20_BLOCK_SIZE * 4;
- dst += CHACHA20_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block_xor_ssse3(state, dst, src);
- bytes -= CHACHA20_BLOCK_SIZE;
- src += CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha20_block_xor_ssse3(state, buf, buf);
- memcpy(dst, buf, bytes);
- }
-}
-
-static int chacha20_simd(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- u32 *state, state_buf[16 + 2] __aligned(8);
- struct skcipher_walk walk;
- int err;
-
- BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16);
- state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN);
-
- if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd())
- return crypto_chacha20_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_chacha20_init(state, ctx, walk.iv);
-
- kernel_fpu_begin();
-
- while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
- rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
- err = skcipher_walk_done(&walk,
- walk.nbytes % CHACHA20_BLOCK_SIZE);
- }
-
- if (walk.nbytes) {
- chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
- walk.nbytes);
- err = skcipher_walk_done(&walk, 0);
- }
-
- kernel_fpu_end();
-
- return err;
-}
-
-static struct skcipher_alg alg = {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-simd",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA20_KEY_SIZE,
- .max_keysize = CHACHA20_KEY_SIZE,
- .ivsize = CHACHA20_IV_SIZE,
- .chunksize = CHACHA20_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha20_simd,
- .decrypt = chacha20_simd,
-};
-
-static int __init chacha20_simd_mod_init(void)
-{
- if (!boot_cpu_has(X86_FEATURE_SSSE3))
- return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
- chacha20_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#endif
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_simd_mod_fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_simd_mod_init);
-module_exit(chacha20_simd_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm, SIMD accelerated");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-simd");
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
new file mode 100644
index 000000000000..45c1c4143176
--- /dev/null
+++ b/arch/x86/crypto/chacha_glue.c
@@ -0,0 +1,304 @@
+/*
+ * x64 SIMD accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+
+#define CHACHA_STATE_ALIGN 16
+
+asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
+#ifdef CONFIG_AS_AVX2
+asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+static bool chacha_use_avx2;
+#ifdef CONFIG_AS_AVX512
+asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
+ unsigned int len, int nrounds);
+static bool chacha_use_avx512vl;
+#endif
+#endif
+
+static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
+{
+ len = min(len, maxblocks * CHACHA_BLOCK_SIZE);
+ return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE;
+}
+
+static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+#ifdef CONFIG_AS_AVX2
+#ifdef CONFIG_AS_AVX512
+ if (chacha_use_avx512vl) {
+ while (bytes >= CHACHA_BLOCK_SIZE * 8) {
+ chacha_8block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 8;
+ src += CHACHA_BLOCK_SIZE * 8;
+ dst += CHACHA_BLOCK_SIZE * 8;
+ state[12] += 8;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE * 4) {
+ chacha_8block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state[12] += chacha_advance(bytes, 8);
+ return;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE * 2) {
+ chacha_4block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state[12] += chacha_advance(bytes, 4);
+ return;
+ }
+ if (bytes) {
+ chacha_2block_xor_avx512vl(state, dst, src, bytes,
+ nrounds);
+ state[12] += chacha_advance(bytes, 2);
+ return;
+ }
+ }
+#endif
+ if (chacha_use_avx2) {
+ while (bytes >= CHACHA_BLOCK_SIZE * 8) {
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 8;
+ src += CHACHA_BLOCK_SIZE * 8;
+ dst += CHACHA_BLOCK_SIZE * 8;
+ state[12] += 8;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE * 4) {
+ chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 8);
+ return;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE * 2) {
+ chacha_4block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 4);
+ return;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE) {
+ chacha_2block_xor_avx2(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 2);
+ return;
+ }
+ }
+#endif
+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 4;
+ src += CHACHA_BLOCK_SIZE * 4;
+ dst += CHACHA_BLOCK_SIZE * 4;
+ state[12] += 4;
+ }
+ if (bytes > CHACHA_BLOCK_SIZE) {
+ chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
+ state[12] += chacha_advance(bytes, 4);
+ return;
+ }
+ if (bytes) {
+ chacha_block_xor_ssse3(state, dst, src, bytes, nrounds);
+ state[12]++;
+ }
+}
+
+static int chacha_simd_stream_xor(struct skcipher_walk *walk,
+ struct chacha_ctx *ctx, u8 *iv)
+{
+ u32 *state, state_buf[16 + 2] __aligned(8);
+ int next_yield = 4096; /* bytes until next FPU yield */
+ int err = 0;
+
+ BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
+ state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
+
+ crypto_chacha_init(state, ctx, iv);
+
+ while (walk->nbytes > 0) {
+ unsigned int nbytes = walk->nbytes;
+
+ if (nbytes < walk->total) {
+ nbytes = round_down(nbytes, walk->stride);
+ next_yield -= nbytes;
+ }
+
+ chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
+ nbytes, ctx->nrounds);
+
+ if (next_yield <= 0) {
+ /* temporarily allow preemption */
+ kernel_fpu_end();
+ kernel_fpu_begin();
+ next_yield = 4096;
+ }
+
+ err = skcipher_walk_done(walk, walk->nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int chacha_simd(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ int err;
+
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
+ return crypto_chacha_crypt(req);
+
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
+
+ kernel_fpu_begin();
+ err = chacha_simd_stream_xor(&walk, ctx, req->iv);
+ kernel_fpu_end();
+ return err;
+}
+
+static int xchacha_simd(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ struct chacha_ctx subctx;
+ u32 *state, state_buf[16 + 2] __aligned(8);
+ u8 real_iv[16];
+ int err;
+
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
+ return crypto_xchacha_crypt(req);
+
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
+
+ BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
+ state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
+ crypto_chacha_init(state, ctx, req->iv);
+
+ kernel_fpu_begin();
+
+ hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
+
+ kernel_fpu_end();
+
+ return err;
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-simd",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = chacha_simd,
+ .decrypt = chacha_simd,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-simd",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = xchacha_simd,
+ .decrypt = xchacha_simd,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-simd",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha12_setkey,
+ .encrypt = xchacha_simd,
+ .decrypt = xchacha_simd,
+ },
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
+ return -ENODEV;
+
+#ifdef CONFIG_AS_AVX2
+ chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+#ifdef CONFIG_AS_AVX512
+ chacha_use_avx512vl = chacha_use_avx2 &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
+#endif
+#endif
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (x64 SIMD accelerated)");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-simd");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-simd");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-simd");
diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S
new file mode 100644
index 000000000000..f7946ea1b704
--- /dev/null
+++ b/arch/x86/crypto/nh-avx2-x86_64.S
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NH - ε-almost-universal hash function, x86_64 AVX2 accelerated
+ *
+ * Copyright 2018 Google LLC
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+#define PASS0_SUMS %ymm0
+#define PASS1_SUMS %ymm1
+#define PASS2_SUMS %ymm2
+#define PASS3_SUMS %ymm3
+#define K0 %ymm4
+#define K0_XMM %xmm4
+#define K1 %ymm5
+#define K1_XMM %xmm5
+#define K2 %ymm6
+#define K2_XMM %xmm6
+#define K3 %ymm7
+#define K3_XMM %xmm7
+#define T0 %ymm8
+#define T1 %ymm9
+#define T2 %ymm10
+#define T2_XMM %xmm10
+#define T3 %ymm11
+#define T3_XMM %xmm11
+#define T4 %ymm12
+#define T5 %ymm13
+#define T6 %ymm14
+#define T7 %ymm15
+#define KEY %rdi
+#define MESSAGE %rsi
+#define MESSAGE_LEN %rdx
+#define HASH %rcx
+
+.macro _nh_2xstride k0, k1, k2, k3
+
+ // Add message words to key words
+ vpaddd \k0, T3, T0
+ vpaddd \k1, T3, T1
+ vpaddd \k2, T3, T2
+ vpaddd \k3, T3, T3
+
+ // Multiply 32x32 => 64 and accumulate
+ vpshufd $0x10, T0, T4
+ vpshufd $0x32, T0, T0
+ vpshufd $0x10, T1, T5
+ vpshufd $0x32, T1, T1
+ vpshufd $0x10, T2, T6
+ vpshufd $0x32, T2, T2
+ vpshufd $0x10, T3, T7
+ vpshufd $0x32, T3, T3
+ vpmuludq T4, T0, T0
+ vpmuludq T5, T1, T1
+ vpmuludq T6, T2, T2
+ vpmuludq T7, T3, T3
+ vpaddq T0, PASS0_SUMS, PASS0_SUMS
+ vpaddq T1, PASS1_SUMS, PASS1_SUMS
+ vpaddq T2, PASS2_SUMS, PASS2_SUMS
+ vpaddq T3, PASS3_SUMS, PASS3_SUMS
+.endm
+
+/*
+ * void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
+ * u8 hash[NH_HASH_BYTES])
+ *
+ * It's guaranteed that message_len % 16 == 0.
+ */
+ENTRY(nh_avx2)
+
+ vmovdqu 0x00(KEY), K0
+ vmovdqu 0x10(KEY), K1
+ add $0x20, KEY
+ vpxor PASS0_SUMS, PASS0_SUMS, PASS0_SUMS
+ vpxor PASS1_SUMS, PASS1_SUMS, PASS1_SUMS
+ vpxor PASS2_SUMS, PASS2_SUMS, PASS2_SUMS
+ vpxor PASS3_SUMS, PASS3_SUMS, PASS3_SUMS
+
+ sub $0x40, MESSAGE_LEN
+ jl .Lloop4_done
+.Lloop4:
+ vmovdqu (MESSAGE), T3
+ vmovdqu 0x00(KEY), K2
+ vmovdqu 0x10(KEY), K3
+ _nh_2xstride K0, K1, K2, K3
+
+ vmovdqu 0x20(MESSAGE), T3
+ vmovdqu 0x20(KEY), K0
+ vmovdqu 0x30(KEY), K1
+ _nh_2xstride K2, K3, K0, K1
+
+ add $0x40, MESSAGE
+ add $0x40, KEY
+ sub $0x40, MESSAGE_LEN
+ jge .Lloop4
+
+.Lloop4_done:
+ and $0x3f, MESSAGE_LEN
+ jz .Ldone
+
+ cmp $0x20, MESSAGE_LEN
+ jl .Llast
+
+ // 2 or 3 strides remain; do 2 more.
+ vmovdqu (MESSAGE), T3
+ vmovdqu 0x00(KEY), K2
+ vmovdqu 0x10(KEY), K3
+ _nh_2xstride K0, K1, K2, K3
+ add $0x20, MESSAGE
+ add $0x20, KEY
+ sub $0x20, MESSAGE_LEN
+ jz .Ldone
+ vmovdqa K2, K0
+ vmovdqa K3, K1
+.Llast:
+ // Last stride. Zero the high 128 bits of the message and keys so they
+ // don't affect the result when processing them like 2 strides.
+ vmovdqu (MESSAGE), T3_XMM
+ vmovdqa K0_XMM, K0_XMM
+ vmovdqa K1_XMM, K1_XMM
+ vmovdqu 0x00(KEY), K2_XMM
+ vmovdqu 0x10(KEY), K3_XMM
+ _nh_2xstride K0, K1, K2, K3
+
+.Ldone:
+ // Sum the accumulators for each pass, then store the sums to 'hash'
+
+ // PASS0_SUMS is (0A 0B 0C 0D)
+ // PASS1_SUMS is (1A 1B 1C 1D)
+ // PASS2_SUMS is (2A 2B 2C 2D)
+ // PASS3_SUMS is (3A 3B 3C 3D)
+ // We need the horizontal sums:
+ // (0A + 0B + 0C + 0D,
+ // 1A + 1B + 1C + 1D,
+ // 2A + 2B + 2C + 2D,
+ // 3A + 3B + 3C + 3D)
+ //
+
+ vpunpcklqdq PASS1_SUMS, PASS0_SUMS, T0 // T0 = (0A 1A 0C 1C)
+ vpunpckhqdq PASS1_SUMS, PASS0_SUMS, T1 // T1 = (0B 1B 0D 1D)
+ vpunpcklqdq PASS3_SUMS, PASS2_SUMS, T2 // T2 = (2A 3A 2C 3C)
+ vpunpckhqdq PASS3_SUMS, PASS2_SUMS, T3 // T3 = (2B 3B 2D 3D)
+
+ vinserti128 $0x1, T2_XMM, T0, T4 // T4 = (0A 1A 2A 3A)
+ vinserti128 $0x1, T3_XMM, T1, T5 // T5 = (0B 1B 2B 3B)
+ vperm2i128 $0x31, T2, T0, T0 // T0 = (0C 1C 2C 3C)
+ vperm2i128 $0x31, T3, T1, T1 // T1 = (0D 1D 2D 3D)
+
+ vpaddq T5, T4, T4
+ vpaddq T1, T0, T0
+ vpaddq T4, T0, T0
+ vmovdqu T0, (HASH)
+ ret
+ENDPROC(nh_avx2)
diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S
new file mode 100644
index 000000000000..51f52d4ab4bb
--- /dev/null
+++ b/arch/x86/crypto/nh-sse2-x86_64.S
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
+ *
+ * Copyright 2018 Google LLC
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+
+#include <linux/linkage.h>
+
+#define PASS0_SUMS %xmm0
+#define PASS1_SUMS %xmm1
+#define PASS2_SUMS %xmm2
+#define PASS3_SUMS %xmm3
+#define K0 %xmm4
+#define K1 %xmm5
+#define K2 %xmm6
+#define K3 %xmm7
+#define T0 %xmm8
+#define T1 %xmm9
+#define T2 %xmm10
+#define T3 %xmm11
+#define T4 %xmm12
+#define T5 %xmm13
+#define T6 %xmm14
+#define T7 %xmm15
+#define KEY %rdi
+#define MESSAGE %rsi
+#define MESSAGE_LEN %rdx
+#define HASH %rcx
+
+.macro _nh_stride k0, k1, k2, k3, offset
+
+ // Load next message stride
+ movdqu \offset(MESSAGE), T1
+
+ // Load next key stride
+ movdqu \offset(KEY), \k3
+
+ // Add message words to key words
+ movdqa T1, T2
+ movdqa T1, T3
+ paddd T1, \k0 // reuse k0 to avoid a move
+ paddd \k1, T1
+ paddd \k2, T2
+ paddd \k3, T3
+
+ // Multiply 32x32 => 64 and accumulate
+ pshufd $0x10, \k0, T4
+ pshufd $0x32, \k0, \k0
+ pshufd $0x10, T1, T5
+ pshufd $0x32, T1, T1
+ pshufd $0x10, T2, T6
+ pshufd $0x32, T2, T2
+ pshufd $0x10, T3, T7
+ pshufd $0x32, T3, T3
+ pmuludq T4, \k0
+ pmuludq T5, T1
+ pmuludq T6, T2
+ pmuludq T7, T3
+ paddq \k0, PASS0_SUMS
+ paddq T1, PASS1_SUMS
+ paddq T2, PASS2_SUMS
+ paddq T3, PASS3_SUMS
+.endm
+
+/*
+ * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
+ * u8 hash[NH_HASH_BYTES])
+ *
+ * It's guaranteed that message_len % 16 == 0.
+ */
+ENTRY(nh_sse2)
+
+ movdqu 0x00(KEY), K0
+ movdqu 0x10(KEY), K1
+ movdqu 0x20(KEY), K2
+ add $0x30, KEY
+ pxor PASS0_SUMS, PASS0_SUMS
+ pxor PASS1_SUMS, PASS1_SUMS
+ pxor PASS2_SUMS, PASS2_SUMS
+ pxor PASS3_SUMS, PASS3_SUMS
+
+ sub $0x40, MESSAGE_LEN
+ jl .Lloop4_done
+.Lloop4:
+ _nh_stride K0, K1, K2, K3, 0x00
+ _nh_stride K1, K2, K3, K0, 0x10
+ _nh_stride K2, K3, K0, K1, 0x20
+ _nh_stride K3, K0, K1, K2, 0x30
+ add $0x40, KEY
+ add $0x40, MESSAGE
+ sub $0x40, MESSAGE_LEN
+ jge .Lloop4
+
+.Lloop4_done:
+ and $0x3f, MESSAGE_LEN
+ jz .Ldone
+ _nh_stride K0, K1, K2, K3, 0x00
+
+ sub $0x10, MESSAGE_LEN
+ jz .Ldone
+ _nh_stride K1, K2, K3, K0, 0x10
+
+ sub $0x10, MESSAGE_LEN
+ jz .Ldone
+ _nh_stride K2, K3, K0, K1, 0x20
+
+.Ldone:
+ // Sum the accumulators for each pass, then store the sums to 'hash'
+ movdqa PASS0_SUMS, T0
+ movdqa PASS2_SUMS, T1
+ punpcklqdq PASS1_SUMS, T0 // => (PASS0_SUM_A PASS1_SUM_A)
+ punpcklqdq PASS3_SUMS, T1 // => (PASS2_SUM_A PASS3_SUM_A)
+ punpckhqdq PASS1_SUMS, PASS0_SUMS // => (PASS0_SUM_B PASS1_SUM_B)
+ punpckhqdq PASS3_SUMS, PASS2_SUMS // => (PASS2_SUM_B PASS3_SUM_B)
+ paddq PASS0_SUMS, T0
+ paddq PASS2_SUMS, T1
+ movdqu T0, 0x00(HASH)
+ movdqu T1, 0x10(HASH)
+ ret
+ENDPROC(nh_sse2)
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
new file mode 100644
index 000000000000..20d815ea4b6a
--- /dev/null
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ * (AVX2 accelerated version)
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+
+asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
+ u8 hash[NH_HASH_BYTES]);
+
+/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
+static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len,
+ __le64 hash[NH_NUM_PASSES])
+{
+ nh_avx2(key, message, message_len, (u8 *)hash);
+}
+
+static int nhpoly1305_avx2_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ if (srclen < 64 || !irq_fpu_usable())
+ return crypto_nhpoly1305_update(desc, src, srclen);
+
+ do {
+ unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+
+ kernel_fpu_begin();
+ crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);
+ kernel_fpu_end();
+ src += n;
+ srclen -= n;
+ } while (srclen);
+ return 0;
+}
+
+static struct shash_alg nhpoly1305_alg = {
+ .base.cra_name = "nhpoly1305",
+ .base.cra_driver_name = "nhpoly1305-avx2",
+ .base.cra_priority = 300,
+ .base.cra_ctxsize = sizeof(struct nhpoly1305_key),
+ .base.cra_module = THIS_MODULE,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .init = crypto_nhpoly1305_init,
+ .update = nhpoly1305_avx2_update,
+ .final = crypto_nhpoly1305_final,
+ .setkey = crypto_nhpoly1305_setkey,
+ .descsize = sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_AVX2) ||
+ !boot_cpu_has(X86_FEATURE_OSXSAVE))
+ return -ENODEV;
+
+ return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (AVX2-accelerated)");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-avx2");
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
new file mode 100644
index 000000000000..ed68d164ce14
--- /dev/null
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ * (SSE2 accelerated version)
+ *
+ * Copyright 2018 Google LLC
+ */
+
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/module.h>
+#include <asm/fpu/api.h>
+
+asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
+ u8 hash[NH_HASH_BYTES]);
+
+/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
+static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len,
+ __le64 hash[NH_NUM_PASSES])
+{
+ nh_sse2(key, message, message_len, (u8 *)hash);
+}
+
+static int nhpoly1305_sse2_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ if (srclen < 64 || !irq_fpu_usable())
+ return crypto_nhpoly1305_update(desc, src, srclen);
+
+ do {
+ unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
+
+ kernel_fpu_begin();
+ crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);
+ kernel_fpu_end();
+ src += n;
+ srclen -= n;
+ } while (srclen);
+ return 0;
+}
+
+static struct shash_alg nhpoly1305_alg = {
+ .base.cra_name = "nhpoly1305",
+ .base.cra_driver_name = "nhpoly1305-sse2",
+ .base.cra_priority = 200,
+ .base.cra_ctxsize = sizeof(struct nhpoly1305_key),
+ .base.cra_module = THIS_MODULE,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .init = crypto_nhpoly1305_init,
+ .update = nhpoly1305_sse2_update,
+ .final = crypto_nhpoly1305_final,
+ .setkey = crypto_nhpoly1305_setkey,
+ .descsize = sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_XMM2))
+ return -ENODEV;
+
+ return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (SSE2-accelerated)");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-sse2");
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index f012b7e28ad1..88cc01506c84 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -83,35 +83,37 @@ static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
if (unlikely(!sctx->wset)) {
if (!sctx->uset) {
- memcpy(sctx->u, dctx->r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r);
+ memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+ poly1305_simd_mult(sctx->u, dctx->r.r);
sctx->uset = true;
}
memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 5, dctx->r);
+ poly1305_simd_mult(sctx->u + 5, dctx->r.r);
memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 10, dctx->r);
+ poly1305_simd_mult(sctx->u + 10, dctx->r.r);
sctx->wset = true;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
- poly1305_4block_avx2(dctx->h, src, dctx->r, blocks, sctx->u);
+ poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
+ sctx->u);
src += POLY1305_BLOCK_SIZE * 4 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
}
#endif
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
if (unlikely(!sctx->uset)) {
- memcpy(sctx->u, dctx->r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r);
+ memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
+ poly1305_simd_mult(sctx->u, dctx->r.r);
sctx->uset = true;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
- poly1305_2block_sse2(dctx->h, src, dctx->r, blocks, sctx->u);
+ poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
+ sctx->u);
src += POLY1305_BLOCK_SIZE * 2 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
}
if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_block_sse2(dctx->h, src, dctx->r, 1);
+ poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
srclen -= POLY1305_BLOCK_SIZE;
}
return srclen;
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 05c91eb10ca1..045af6eeb7e2 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -430,11 +430,14 @@ config CRYPTO_CTS
help
CTS: Cipher Text Stealing
This is the Cipher Text Stealing mode as described by
- Section 8 of rfc2040 and referenced by rfc3962.
- (rfc3962 includes errata information in its Appendix A)
+ Section 8 of rfc2040 and referenced by rfc3962
+ (rfc3962 includes errata information in its Appendix A) or
+ CBC-CS3 as defined by NIST in Sp800-38A addendum from Oct 2010.
This mode is required for Kerberos gss mechanism support
for AES encryption.
+ See: https://csrc.nist.gov/publications/detail/sp/800-38a/addendum/final
+
config CRYPTO_ECB
tristate "ECB support"
select CRYPTO_BLKCIPHER
@@ -493,6 +496,50 @@ config CRYPTO_KEYWRAP
Support for key wrapping (NIST SP800-38F / RFC3394) without
padding.
+config CRYPTO_NHPOLY1305
+ tristate
+ select CRYPTO_HASH
+ select CRYPTO_POLY1305
+
+config CRYPTO_NHPOLY1305_SSE2
+ tristate "NHPoly1305 hash function (x86_64 SSE2 implementation)"
+ depends on X86 && 64BIT
+ select CRYPTO_NHPOLY1305
+ help
+ SSE2 optimized implementation of the hash function used by the
+ Adiantum encryption mode.
+
+config CRYPTO_NHPOLY1305_AVX2
+ tristate "NHPoly1305 hash function (x86_64 AVX2 implementation)"
+ depends on X86 && 64BIT
+ select CRYPTO_NHPOLY1305
+ help
+ AVX2 optimized implementation of the hash function used by the
+ Adiantum encryption mode.
+
+config CRYPTO_ADIANTUM
+ tristate "Adiantum support"
+ select CRYPTO_CHACHA20
+ select CRYPTO_POLY1305
+ select CRYPTO_NHPOLY1305
+ help
+ Adiantum is a tweakable, length-preserving encryption mode
+ designed for fast and secure disk encryption, especially on
+ CPUs without dedicated crypto instructions. It encrypts
+ each sector using the XChaCha12 stream cipher, two passes of
+ an ε-almost-∆-universal hash function, and an invocation of
+ the AES-256 block cipher on a single 16-byte block. On CPUs
+ without AES instructions, Adiantum is much faster than
+ AES-XTS.
+
+ Adiantum's security is provably reducible to that of its
+ underlying stream and block ciphers, subject to a security
+ bound. Unlike XTS, Adiantum is a true wide-block encryption
+ mode, so it actually provides an even stronger notion of
+ security than XTS, subject to the security bound.
+
+ If unsure, say N.
+
comment "Hash modes"
config CRYPTO_CMAC
@@ -936,6 +983,18 @@ config CRYPTO_SM3
http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash
+config CRYPTO_STREEBOG
+ tristate "Streebog Hash Function"
+ select CRYPTO_HASH
+ help
+ Streebog Hash Function (GOST R 34.11-2012, RFC 6986) is one of the Russian
+ cryptographic standard algorithms (called GOST algorithms).
+ This setting enables two hash algorithms with 256 and 512 bits output.
+
+ References:
+ https://tc26.ru/upload/iblock/fed/feddbb4d26b685903faa2ba11aea43f6.pdf
+ https://tools.ietf.org/html/rfc6986
+
config CRYPTO_TGR192
tristate "Tiger digest algorithms"
select CRYPTO_HASH
@@ -1006,7 +1065,8 @@ config CRYPTO_AES_TI
8 for decryption), this implementation only uses just two S-boxes of
256 bytes each, and attempts to eliminate data dependent latencies by
prefetching the entire table into the cache at the start of each
- block.
+ block. Interrupts are also disabled to avoid races where cachelines
+ are evicted when the CPU is interrupted to do something else.
config CRYPTO_AES_586
tristate "AES cipher algorithms (i586)"
@@ -1387,32 +1447,34 @@ config CRYPTO_SALSA20
Bernstein <djb@cr.yp.to>. See <http://cr.yp.to/snuffle.html>
config CRYPTO_CHACHA20
- tristate "ChaCha20 cipher algorithm"
+ tristate "ChaCha stream cipher algorithms"
select CRYPTO_BLKCIPHER
help
- ChaCha20 cipher algorithm, RFC7539.
+ The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
Bernstein and further specified in RFC7539 for use in IETF protocols.
- This is the portable C implementation of ChaCha20.
-
- See also:
+ This is the portable C implementation of ChaCha20. See also:
<http://cr.yp.to/chacha/chacha-20080128.pdf>
+ XChaCha20 is the application of the XSalsa20 construction to ChaCha20
+ rather than to Salsa20. XChaCha20 extends ChaCha20's nonce length
+ from 64 bits (or 96 bits using the RFC7539 convention) to 192 bits,
+ while provably retaining ChaCha20's security. See also:
+ <https://cr.yp.to/snuffle/xsalsa-20081128.pdf>
+
+ XChaCha12 is XChaCha20 reduced to 12 rounds, with correspondingly
+ reduced security margin but increased performance. It can be needed
+ in some performance-sensitive scenarios.
+
config CRYPTO_CHACHA20_X86_64
- tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
+ tristate "ChaCha stream cipher algorithms (x86_64/SSSE3/AVX2/AVX-512VL)"
depends on X86 && 64BIT
select CRYPTO_BLKCIPHER
select CRYPTO_CHACHA20
help
- ChaCha20 cipher algorithm, RFC7539.
-
- ChaCha20 is a 256-bit high-speed stream cipher designed by Daniel J.
- Bernstein and further specified in RFC7539 for use in IETF protocols.
- This is the x86_64 assembler implementation using SIMD instructions.
-
- See also:
- <http://cr.yp.to/chacha/chacha-20080128.pdf>
+ SSSE3, AVX2, and AVX-512VL optimized implementations of the ChaCha20,
+ XChaCha20, and XChaCha12 stream ciphers.
config CRYPTO_SEED
tristate "SEED cipher algorithm"
@@ -1812,7 +1874,8 @@ config CRYPTO_USER_API_AEAD
cipher algorithms.
config CRYPTO_STATS
- bool
+ bool "Crypto usage statistics for User-space"
+ depends on CRYPTO_USER
help
This option enables the gathering of crypto stats.
This will collect:
diff --git a/crypto/Makefile b/crypto/Makefile
index 5c207c76abf7..799ed5e94606 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -54,7 +54,8 @@ cryptomgr-y := algboss.o testmgr.o
obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
-crypto_user-y := crypto_user_base.o crypto_user_stat.o
+crypto_user-y := crypto_user_base.o
+crypto_user-$(CONFIG_CRYPTO_STATS) += crypto_user_stat.o
obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
@@ -71,6 +72,7 @@ obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
obj-$(CONFIG_CRYPTO_SM3) += sm3_generic.o
+obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
@@ -84,6 +86,8 @@ obj-$(CONFIG_CRYPTO_LRW) += lrw.o
obj-$(CONFIG_CRYPTO_XTS) += xts.o
obj-$(CONFIG_CRYPTO_CTR) += ctr.o
obj-$(CONFIG_CRYPTO_KEYWRAP) += keywrap.o
+obj-$(CONFIG_CRYPTO_ADIANTUM) += adiantum.o
+obj-$(CONFIG_CRYPTO_NHPOLY1305) += nhpoly1305.o
obj-$(CONFIG_CRYPTO_GCM) += gcm.o
obj-$(CONFIG_CRYPTO_CCM) += ccm.o
obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
@@ -116,7 +120,7 @@ obj-$(CONFIG_CRYPTO_KHAZAD) += khazad.o
obj-$(CONFIG_CRYPTO_ANUBIS) += anubis.o
obj-$(CONFIG_CRYPTO_SEED) += seed.o
obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
-obj-$(CONFIG_CRYPTO_CHACHA20) += chacha20_generic.o
+obj-$(CONFIG_CRYPTO_CHACHA20) += chacha_generic.o
obj-$(CONFIG_CRYPTO_POLY1305) += poly1305_generic.o
obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index 8882e90e868e..b339587073c3 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -365,23 +365,18 @@ static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_blkcipher rblkcipher;
- strncpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type));
- strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "<default>",
- sizeof(rblkcipher.geniv));
- rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0';
+ memset(&rblkcipher, 0, sizeof(rblkcipher));
+
+ strscpy(rblkcipher.type, "ablkcipher", sizeof(rblkcipher.type));
+ strscpy(rblkcipher.geniv, "<default>", sizeof(rblkcipher.geniv));
rblkcipher.blocksize = alg->cra_blocksize;
rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize;
rblkcipher.max_keysize = alg->cra_ablkcipher.max_keysize;
rblkcipher.ivsize = alg->cra_ablkcipher.ivsize;
- if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
- sizeof(struct crypto_report_blkcipher), &rblkcipher))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
+ sizeof(rblkcipher), &rblkcipher);
}
#else
static int crypto_ablkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
@@ -403,7 +398,7 @@ static void crypto_ablkcipher_show(struct seq_file *m, struct crypto_alg *alg)
seq_printf(m, "min keysize : %u\n", ablkcipher->min_keysize);
seq_printf(m, "max keysize : %u\n", ablkcipher->max_keysize);
seq_printf(m, "ivsize : %u\n", ablkcipher->ivsize);
- seq_printf(m, "geniv : %s\n", ablkcipher->geniv ?: "<default>");
+ seq_printf(m, "geniv : <default>\n");
}
const struct crypto_type crypto_ablkcipher_type = {
@@ -415,78 +410,3 @@ const struct crypto_type crypto_ablkcipher_type = {
.report = crypto_ablkcipher_report,
};
EXPORT_SYMBOL_GPL(crypto_ablkcipher_type);
-
-static int crypto_init_givcipher_ops(struct crypto_tfm *tfm, u32 type,
- u32 mask)
-{
- struct ablkcipher_alg *alg = &tfm->__crt_alg->cra_ablkcipher;
- struct ablkcipher_tfm *crt = &tfm->crt_ablkcipher;
-
- if (alg->ivsize > PAGE_SIZE / 8)
- return -EINVAL;
-
- crt->setkey = tfm->__crt_alg->cra_flags & CRYPTO_ALG_GENIV ?
- alg->setkey : setkey;
- crt->encrypt = alg->encrypt;
- crt->decrypt = alg->decrypt;
- crt->base = __crypto_ablkcipher_cast(tfm);
- crt->ivsize = alg->ivsize;
-
- return 0;
-}
-
-#ifdef CONFIG_NET
-static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
-{
- struct crypto_report_blkcipher rblkcipher;
-
- strncpy(rblkcipher.type, "givcipher", sizeof(rblkcipher.type));
- strncpy(rblkcipher.geniv, alg->cra_ablkcipher.geniv ?: "<built-in>",
- sizeof(rblkcipher.geniv));
- rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0';
-
- rblkcipher.blocksize = alg->cra_blocksize;
- rblkcipher.min_keysize = alg->cra_ablkcipher.min_keysize;
- rblkcipher.max_keysize = alg->cra_ablkcipher.max_keysize;
- rblkcipher.ivsize = alg->cra_ablkcipher.ivsize;
-
- if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
- sizeof(struct crypto_report_blkcipher), &rblkcipher))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-#else
-static int crypto_givcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
-{
- return -ENOSYS;
-}
-#endif
-
-static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
- __maybe_unused;
-static void crypto_givcipher_show(struct seq_file *m, struct crypto_alg *alg)
-{
- struct ablkcipher_alg *ablkcipher = &alg->cra_ablkcipher;
-
- seq_printf(m, "type : givcipher\n");
- seq_printf(m, "async : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
- "yes" : "no");
- seq_printf(m, "blocksize : %u\n", alg->cra_blocksize);
- seq_printf(m, "min keysize : %u\n", ablkcipher->min_keysize);
- seq_printf(m, "max keysize : %u\n", ablkcipher->max_keysize);
- seq_printf(m, "ivsize : %u\n", ablkcipher->ivsize);
- seq_printf(m, "geniv : %s\n", ablkcipher->geniv ?: "<built-in>");
-}
-
-const struct crypto_type crypto_givcipher_type = {
- .ctxsize = crypto_ablkcipher_ctxsize,
- .init = crypto_init_givcipher_ops,
-#ifdef CONFIG_PROC_FS
- .show = crypto_givcipher_show,
-#endif
- .report = crypto_givcipher_report,
-};
-EXPORT_SYMBOL_GPL(crypto_givcipher_type);
diff --git a/crypto/acompress.c b/crypto/acompress.c
index 1544b7c057fb..0c5bedd06e70 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -33,15 +33,11 @@ static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_acomp racomp;
- strncpy(racomp.type, "acomp", sizeof(racomp.type));
+ memset(&racomp, 0, sizeof(racomp));
- if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
- sizeof(struct crypto_report_acomp), &racomp))
- goto nla_put_failure;
- return 0;
+ strscpy(racomp.type, "acomp", sizeof(racomp.type));
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_ACOMP, sizeof(racomp), &racomp);
}
#else
static int crypto_acomp_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/adiantum.c b/crypto/adiantum.c
new file mode 100644
index 000000000000..6651e713c45d
--- /dev/null
+++ b/crypto/adiantum.c
@@ -0,0 +1,664 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Adiantum length-preserving encryption mode
+ *
+ * Copyright 2018 Google LLC
+ */
+
+/*
+ * Adiantum is a tweakable, length-preserving encryption mode designed for fast
+ * and secure disk encryption, especially on CPUs without dedicated crypto
+ * instructions. Adiantum encrypts each sector using the XChaCha12 stream
+ * cipher, two passes of an ε-almost-∆-universal (ε-∆U) hash function based on
+ * NH and Poly1305, and an invocation of the AES-256 block cipher on a single
+ * 16-byte block. See the paper for details:
+ *
+ * Adiantum: length-preserving encryption for entry-level processors
+ * (https://eprint.iacr.org/2018/720.pdf)
+ *
+ * For flexibility, this implementation also allows other ciphers:
+ *
+ * - Stream cipher: XChaCha12 or XChaCha20
+ * - Block cipher: any with a 128-bit block size and 256-bit key
+ *
+ * This implementation doesn't currently allow other ε-∆U hash functions, i.e.
+ * HPolyC is not supported. This is because Adiantum is ~20% faster than HPolyC
+ * but still provably as secure, and also the ε-∆U hash function of HBSH is
+ * formally defined to take two inputs (tweak, message) which makes it difficult
+ * to wrap with the crypto_shash API. Rather, some details need to be handled
+ * here. Nevertheless, if needed in the future, support for other ε-∆U hash
+ * functions could be added here.
+ */
+
+#include <crypto/b128ops.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/nhpoly1305.h>
+#include <crypto/scatterwalk.h>
+#include <linux/module.h>
+
+#include "internal.h"
+
+/*
+ * Size of right-hand part of input data, in bytes; also the size of the block
+ * cipher's block size and the hash function's output.
+ */
+#define BLOCKCIPHER_BLOCK_SIZE 16
+
+/* Size of the block cipher key (K_E) in bytes */
+#define BLOCKCIPHER_KEY_SIZE 32
+
+/* Size of the hash key (K_H) in bytes */
+#define HASH_KEY_SIZE (POLY1305_BLOCK_SIZE + NHPOLY1305_KEY_SIZE)
+
+/*
+ * The specification allows variable-length tweaks, but Linux's crypto API
+ * currently only allows algorithms to support a single length. The "natural"
+ * tweak length for Adiantum is 16, since that fits into one Poly1305 block for
+ * the best performance. But longer tweaks are useful for fscrypt, to avoid
+ * needing to derive per-file keys. So instead we use two blocks, or 32 bytes.
+ */
+#define TWEAK_SIZE 32
+
+struct adiantum_instance_ctx {
+ struct crypto_skcipher_spawn streamcipher_spawn;
+ struct crypto_spawn blockcipher_spawn;
+ struct crypto_shash_spawn hash_spawn;
+};
+
+struct adiantum_tfm_ctx {
+ struct crypto_skcipher *streamcipher;
+ struct crypto_cipher *blockcipher;
+ struct crypto_shash *hash;
+ struct poly1305_key header_hash_key;
+};
+
+struct adiantum_request_ctx {
+
+ /*
+ * Buffer for right-hand part of data, i.e.
+ *
+ * P_L => P_M => C_M => C_R when encrypting, or
+ * C_R => C_M => P_M => P_L when decrypting.
+ *
+ * Also used to build the IV for the stream cipher.
+ */
+ union {
+ u8 bytes[XCHACHA_IV_SIZE];
+ __le32 words[XCHACHA_IV_SIZE / sizeof(__le32)];
+ le128 bignum; /* interpret as element of Z/(2^{128}Z) */
+ } rbuf;
+
+ bool enc; /* true if encrypting, false if decrypting */
+
+ /*
+ * The result of the Poly1305 ε-∆U hash function applied to
+ * (bulk length, tweak)
+ */
+ le128 header_hash;
+
+ /* Sub-requests, must be last */
+ union {
+ struct shash_desc hash_desc;
+ struct skcipher_request streamcipher_req;
+ } u;
+};
+
+/*
+ * Given the XChaCha stream key K_S, derive the block cipher key K_E and the
+ * hash key K_H as follows:
+ *
+ * K_E || K_H || ... = XChaCha(key=K_S, nonce=1||0^191)
+ *
+ * Note that this denotes using bits from the XChaCha keystream, which here we
+ * get indirectly by encrypting a buffer containing all 0's.
+ */
+static int adiantum_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+ struct {
+ u8 iv[XCHACHA_IV_SIZE];
+ u8 derived_keys[BLOCKCIPHER_KEY_SIZE + HASH_KEY_SIZE];
+ struct scatterlist sg;
+ struct crypto_wait wait;
+ struct skcipher_request req; /* must be last */
+ } *data;
+ u8 *keyp;
+ int err;
+
+ /* Set the stream cipher key (K_S) */
+ crypto_skcipher_clear_flags(tctx->streamcipher, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(tctx->streamcipher,
+ crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ err = crypto_skcipher_setkey(tctx->streamcipher, key, keylen);
+ crypto_skcipher_set_flags(tfm,
+ crypto_skcipher_get_flags(tctx->streamcipher) &
+ CRYPTO_TFM_RES_MASK);
+ if (err)
+ return err;
+
+ /* Derive the subkeys */
+ data = kzalloc(sizeof(*data) +
+ crypto_skcipher_reqsize(tctx->streamcipher), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ data->iv[0] = 1;
+ sg_init_one(&data->sg, data->derived_keys, sizeof(data->derived_keys));
+ crypto_init_wait(&data->wait);
+ skcipher_request_set_tfm(&data->req, tctx->streamcipher);
+ skcipher_request_set_callback(&data->req, CRYPTO_TFM_REQ_MAY_SLEEP |
+ CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, &data->wait);
+ skcipher_request_set_crypt(&data->req, &data->sg, &data->sg,
+ sizeof(data->derived_keys), data->iv);
+ err = crypto_wait_req(crypto_skcipher_encrypt(&data->req), &data->wait);
+ if (err)
+ goto out;
+ keyp = data->derived_keys;
+
+ /* Set the block cipher key (K_E) */
+ crypto_cipher_clear_flags(tctx->blockcipher, CRYPTO_TFM_REQ_MASK);
+ crypto_cipher_set_flags(tctx->blockcipher,
+ crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ err = crypto_cipher_setkey(tctx->blockcipher, keyp,
+ BLOCKCIPHER_KEY_SIZE);
+ crypto_skcipher_set_flags(tfm,
+ crypto_cipher_get_flags(tctx->blockcipher) &
+ CRYPTO_TFM_RES_MASK);
+ if (err)
+ goto out;
+ keyp += BLOCKCIPHER_KEY_SIZE;
+
+ /* Set the hash key (K_H) */
+ poly1305_core_setkey(&tctx->header_hash_key, keyp);
+ keyp += POLY1305_BLOCK_SIZE;
+
+ crypto_shash_clear_flags(tctx->hash, CRYPTO_TFM_REQ_MASK);
+ crypto_shash_set_flags(tctx->hash, crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ err = crypto_shash_setkey(tctx->hash, keyp, NHPOLY1305_KEY_SIZE);
+ crypto_skcipher_set_flags(tfm, crypto_shash_get_flags(tctx->hash) &
+ CRYPTO_TFM_RES_MASK);
+ keyp += NHPOLY1305_KEY_SIZE;
+ WARN_ON(keyp != &data->derived_keys[ARRAY_SIZE(data->derived_keys)]);
+out:
+ kzfree(data);
+ return err;
+}
+
+/* Addition in Z/(2^{128}Z) */
+static inline void le128_add(le128 *r, const le128 *v1, const le128 *v2)
+{
+ u64 x = le64_to_cpu(v1->b);
+ u64 y = le64_to_cpu(v2->b);
+
+ r->b = cpu_to_le64(x + y);
+ r->a = cpu_to_le64(le64_to_cpu(v1->a) + le64_to_cpu(v2->a) +
+ (x + y < x));
+}
+
+/* Subtraction in Z/(2^{128}Z) */
+static inline void le128_sub(le128 *r, const le128 *v1, const le128 *v2)
+{
+ u64 x = le64_to_cpu(v1->b);
+ u64 y = le64_to_cpu(v2->b);
+
+ r->b = cpu_to_le64(x - y);
+ r->a = cpu_to_le64(le64_to_cpu(v1->a) - le64_to_cpu(v2->a) -
+ (x - y > x));
+}
+
+/*
+ * Apply the Poly1305 ε-∆U hash function to (bulk length, tweak) and save the
+ * result to rctx->header_hash. This is the calculation
+ *
+ * H_T ← Poly1305_{K_T}(bin_{128}(|L|) || T)
+ *
+ * from the procedure in section 6.4 of the Adiantum paper. The resulting value
+ * is reused in both the first and second hash steps. Specifically, it's added
+ * to the result of an independently keyed ε-∆U hash function (for equal length
+ * inputs only) taken over the left-hand part (the "bulk") of the message, to
+ * give the overall Adiantum hash of the (tweak, left-hand part) pair.
+ */
+static void adiantum_hash_header(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+ struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+ const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+ struct {
+ __le64 message_bits;
+ __le64 padding;
+ } header = {
+ .message_bits = cpu_to_le64((u64)bulk_len * 8)
+ };
+ struct poly1305_state state;
+
+ poly1305_core_init(&state);
+
+ BUILD_BUG_ON(sizeof(header) % POLY1305_BLOCK_SIZE != 0);
+ poly1305_core_blocks(&state, &tctx->header_hash_key,
+ &header, sizeof(header) / POLY1305_BLOCK_SIZE);
+
+ BUILD_BUG_ON(TWEAK_SIZE % POLY1305_BLOCK_SIZE != 0);
+ poly1305_core_blocks(&state, &tctx->header_hash_key, req->iv,
+ TWEAK_SIZE / POLY1305_BLOCK_SIZE);
+
+ poly1305_core_emit(&state, &rctx->header_hash);
+}
+
+/* Hash the left-hand part (the "bulk") of the message using NHPoly1305 */
+static int adiantum_hash_message(struct skcipher_request *req,
+ struct scatterlist *sgl, le128 *digest)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+ struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+ const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+ struct shash_desc *hash_desc = &rctx->u.hash_desc;
+ struct sg_mapping_iter miter;
+ unsigned int i, n;
+ int err;
+
+ hash_desc->tfm = tctx->hash;
+ hash_desc->flags = 0;
+
+ err = crypto_shash_init(hash_desc);
+ if (err)
+ return err;
+
+ sg_miter_start(&miter, sgl, sg_nents(sgl),
+ SG_MITER_FROM_SG | SG_MITER_ATOMIC);
+ for (i = 0; i < bulk_len; i += n) {
+ sg_miter_next(&miter);
+ n = min_t(unsigned int, miter.length, bulk_len - i);
+ err = crypto_shash_update(hash_desc, miter.addr, n);
+ if (err)
+ break;
+ }
+ sg_miter_stop(&miter);
+ if (err)
+ return err;
+
+ return crypto_shash_final(hash_desc, (u8 *)digest);
+}
+
+/* Continue Adiantum encryption/decryption after the stream cipher step */
+static int adiantum_finish(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+ struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+ const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+ le128 digest;
+ int err;
+
+ /* If decrypting, decrypt C_M with the block cipher to get P_M */
+ if (!rctx->enc)
+ crypto_cipher_decrypt_one(tctx->blockcipher, rctx->rbuf.bytes,
+ rctx->rbuf.bytes);
+
+ /*
+ * Second hash step
+ * enc: C_R = C_M - H_{K_H}(T, C_L)
+ * dec: P_R = P_M - H_{K_H}(T, P_L)
+ */
+ err = adiantum_hash_message(req, req->dst, &digest);
+ if (err)
+ return err;
+ le128_add(&digest, &digest, &rctx->header_hash);
+ le128_sub(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest);
+ scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->dst,
+ bulk_len, BLOCKCIPHER_BLOCK_SIZE, 1);
+ return 0;
+}
+
+static void adiantum_streamcipher_done(struct crypto_async_request *areq,
+ int err)
+{
+ struct skcipher_request *req = areq->data;
+
+ if (!err)
+ err = adiantum_finish(req);
+
+ skcipher_request_complete(req, err);
+}
+
+static int adiantum_crypt(struct skcipher_request *req, bool enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+ struct adiantum_request_ctx *rctx = skcipher_request_ctx(req);
+ const unsigned int bulk_len = req->cryptlen - BLOCKCIPHER_BLOCK_SIZE;
+ unsigned int stream_len;
+ le128 digest;
+ int err;
+
+ if (req->cryptlen < BLOCKCIPHER_BLOCK_SIZE)
+ return -EINVAL;
+
+ rctx->enc = enc;
+
+ /*
+ * First hash step
+ * enc: P_M = P_R + H_{K_H}(T, P_L)
+ * dec: C_M = C_R + H_{K_H}(T, C_L)
+ */
+ adiantum_hash_header(req);
+ err = adiantum_hash_message(req, req->src, &digest);
+ if (err)
+ return err;
+ le128_add(&digest, &digest, &rctx->header_hash);
+ scatterwalk_map_and_copy(&rctx->rbuf.bignum, req->src,
+ bulk_len, BLOCKCIPHER_BLOCK_SIZE, 0);
+ le128_add(&rctx->rbuf.bignum, &rctx->rbuf.bignum, &digest);
+
+ /* If encrypting, encrypt P_M with the block cipher to get C_M */
+ if (enc)
+ crypto_cipher_encrypt_one(tctx->blockcipher, rctx->rbuf.bytes,
+ rctx->rbuf.bytes);
+
+ /* Initialize the rest of the XChaCha IV (first part is C_M) */
+ BUILD_BUG_ON(BLOCKCIPHER_BLOCK_SIZE != 16);
+ BUILD_BUG_ON(XCHACHA_IV_SIZE != 32); /* nonce || stream position */
+ rctx->rbuf.words[4] = cpu_to_le32(1);
+ rctx->rbuf.words[5] = 0;
+ rctx->rbuf.words[6] = 0;
+ rctx->rbuf.words[7] = 0;
+
+ /*
+ * XChaCha needs to be done on all the data except the last 16 bytes;
+ * for disk encryption that usually means 4080 or 496 bytes. But ChaCha
+ * implementations tend to be most efficient when passed a whole number
+ * of 64-byte ChaCha blocks, or sometimes even a multiple of 256 bytes.
+ * And here it doesn't matter whether the last 16 bytes are written to,
+ * as the second hash step will overwrite them. Thus, round the XChaCha
+ * length up to the next 64-byte boundary if possible.
+ */
+ stream_len = bulk_len;
+ if (round_up(stream_len, CHACHA_BLOCK_SIZE) <= req->cryptlen)
+ stream_len = round_up(stream_len, CHACHA_BLOCK_SIZE);
+
+ skcipher_request_set_tfm(&rctx->u.streamcipher_req, tctx->streamcipher);
+ skcipher_request_set_crypt(&rctx->u.streamcipher_req, req->src,
+ req->dst, stream_len, &rctx->rbuf);
+ skcipher_request_set_callback(&rctx->u.streamcipher_req,
+ req->base.flags,
+ adiantum_streamcipher_done, req);
+ return crypto_skcipher_encrypt(&rctx->u.streamcipher_req) ?:
+ adiantum_finish(req);
+}
+
+static int adiantum_encrypt(struct skcipher_request *req)
+{
+ return adiantum_crypt(req, true);
+}
+
+static int adiantum_decrypt(struct skcipher_request *req)
+{
+ return adiantum_crypt(req, false);
+}
+
+static int adiantum_init_tfm(struct crypto_skcipher *tfm)
+{
+ struct skcipher_instance *inst = skcipher_alg_instance(tfm);
+ struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst);
+ struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *streamcipher;
+ struct crypto_cipher *blockcipher;
+ struct crypto_shash *hash;
+ unsigned int subreq_size;
+ int err;
+
+ streamcipher = crypto_spawn_skcipher(&ictx->streamcipher_spawn);
+ if (IS_ERR(streamcipher))
+ return PTR_ERR(streamcipher);
+
+ blockcipher = crypto_spawn_cipher(&ictx->blockcipher_spawn);
+ if (IS_ERR(blockcipher)) {
+ err = PTR_ERR(blockcipher);
+ goto err_free_streamcipher;
+ }
+
+ hash = crypto_spawn_shash(&ictx->hash_spawn);
+ if (IS_ERR(hash)) {
+ err = PTR_ERR(hash);
+ goto err_free_blockcipher;
+ }
+
+ tctx->streamcipher = streamcipher;
+ tctx->blockcipher = blockcipher;
+ tctx->hash = hash;
+
+ BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) !=
+ sizeof(struct adiantum_request_ctx));
+ subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx,
+ u.hash_desc) +
+ crypto_shash_descsize(hash),
+ FIELD_SIZEOF(struct adiantum_request_ctx,
+ u.streamcipher_req) +
+ crypto_skcipher_reqsize(streamcipher));
+
+ crypto_skcipher_set_reqsize(tfm,
+ offsetof(struct adiantum_request_ctx, u) +
+ subreq_size);
+ return 0;
+
+err_free_blockcipher:
+ crypto_free_cipher(blockcipher);
+err_free_streamcipher:
+ crypto_free_skcipher(streamcipher);
+ return err;
+}
+
+static void adiantum_exit_tfm(struct crypto_skcipher *tfm)
+{
+ struct adiantum_tfm_ctx *tctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_skcipher(tctx->streamcipher);
+ crypto_free_cipher(tctx->blockcipher);
+ crypto_free_shash(tctx->hash);
+}
+
+static void adiantum_free_instance(struct skcipher_instance *inst)
+{
+ struct adiantum_instance_ctx *ictx = skcipher_instance_ctx(inst);
+
+ crypto_drop_skcipher(&ictx->streamcipher_spawn);
+ crypto_drop_spawn(&ictx->blockcipher_spawn);
+ crypto_drop_shash(&ictx->hash_spawn);
+ kfree(inst);
+}
+
+/*
+ * Check for a supported set of inner algorithms.
+ * See the comment at the beginning of this file.
+ */
+static bool adiantum_supported_algorithms(struct skcipher_alg *streamcipher_alg,
+ struct crypto_alg *blockcipher_alg,
+ struct shash_alg *hash_alg)
+{
+ if (strcmp(streamcipher_alg->base.cra_name, "xchacha12") != 0 &&
+ strcmp(streamcipher_alg->base.cra_name, "xchacha20") != 0)
+ return false;
+
+ if (blockcipher_alg->cra_cipher.cia_min_keysize > BLOCKCIPHER_KEY_SIZE ||
+ blockcipher_alg->cra_cipher.cia_max_keysize < BLOCKCIPHER_KEY_SIZE)
+ return false;
+ if (blockcipher_alg->cra_blocksize != BLOCKCIPHER_BLOCK_SIZE)
+ return false;
+
+ if (strcmp(hash_alg->base.cra_name, "nhpoly1305") != 0)
+ return false;
+
+ return true;
+}
+
+static int adiantum_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+ struct crypto_attr_type *algt;
+ const char *streamcipher_name;
+ const char *blockcipher_name;
+ const char *nhpoly1305_name;
+ struct skcipher_instance *inst;
+ struct adiantum_instance_ctx *ictx;
+ struct skcipher_alg *streamcipher_alg;
+ struct crypto_alg *blockcipher_alg;
+ struct crypto_alg *_hash_alg;
+ struct shash_alg *hash_alg;
+ int err;
+
+ algt = crypto_get_attr_type(tb);
+ if (IS_ERR(algt))
+ return PTR_ERR(algt);
+
+ if ((algt->type ^ CRYPTO_ALG_TYPE_SKCIPHER) & algt->mask)
+ return -EINVAL;
+
+ streamcipher_name = crypto_attr_alg_name(tb[1]);
+ if (IS_ERR(streamcipher_name))
+ return PTR_ERR(streamcipher_name);
+
+ blockcipher_name = crypto_attr_alg_name(tb[2]);
+ if (IS_ERR(blockcipher_name))
+ return PTR_ERR(blockcipher_name);
+
+ nhpoly1305_name = crypto_attr_alg_name(tb[3]);
+ if (nhpoly1305_name == ERR_PTR(-ENOENT))
+ nhpoly1305_name = "nhpoly1305";
+ if (IS_ERR(nhpoly1305_name))
+ return PTR_ERR(nhpoly1305_name);
+
+ inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL);
+ if (!inst)
+ return -ENOMEM;
+ ictx = skcipher_instance_ctx(inst);
+
+ /* Stream cipher, e.g. "xchacha12" */
+ err = crypto_grab_skcipher(&ictx->streamcipher_spawn, streamcipher_name,
+ 0, crypto_requires_sync(algt->type,
+ algt->mask));
+ if (err)
+ goto out_free_inst;
+ streamcipher_alg = crypto_spawn_skcipher_alg(&ictx->streamcipher_spawn);
+
+ /* Block cipher, e.g. "aes" */
+ err = crypto_grab_spawn(&ictx->blockcipher_spawn, blockcipher_name,
+ CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK);
+ if (err)
+ goto out_drop_streamcipher;
+ blockcipher_alg = ictx->blockcipher_spawn.alg;
+
+ /* NHPoly1305 ε-∆U hash function */
+ _hash_alg = crypto_alg_mod_lookup(nhpoly1305_name,
+ CRYPTO_ALG_TYPE_SHASH,
+ CRYPTO_ALG_TYPE_MASK);
+ if (IS_ERR(_hash_alg)) {
+ err = PTR_ERR(_hash_alg);
+ goto out_drop_blockcipher;
+ }
+ hash_alg = __crypto_shash_alg(_hash_alg);
+ err = crypto_init_shash_spawn(&ictx->hash_spawn, hash_alg,
+ skcipher_crypto_instance(inst));
+ if (err)
+ goto out_put_hash;
+
+ /* Check the set of algorithms */
+ if (!adiantum_supported_algorithms(streamcipher_alg, blockcipher_alg,
+ hash_alg)) {
+ pr_warn("Unsupported Adiantum instantiation: (%s,%s,%s)\n",
+ streamcipher_alg->base.cra_name,
+ blockcipher_alg->cra_name, hash_alg->base.cra_name);
+ err = -EINVAL;
+ goto out_drop_hash;
+ }
+
+ /* Instance fields */
+
+ err = -ENAMETOOLONG;
+ if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME,
+ "adiantum(%s,%s)", streamcipher_alg->base.cra_name,
+ blockcipher_alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
+ goto out_drop_hash;
+ if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
+ "adiantum(%s,%s,%s)",
+ streamcipher_alg->base.cra_driver_name,
+ blockcipher_alg->cra_driver_name,
+ hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
+ goto out_drop_hash;
+
+ inst->alg.base.cra_flags = streamcipher_alg->base.cra_flags &
+ CRYPTO_ALG_ASYNC;
+ inst->alg.base.cra_blocksize = BLOCKCIPHER_BLOCK_SIZE;
+ inst->alg.base.cra_ctxsize = sizeof(struct adiantum_tfm_ctx);
+ inst->alg.base.cra_alignmask = streamcipher_alg->base.cra_alignmask |
+ hash_alg->base.cra_alignmask;
+ /*
+ * The block cipher is only invoked once per message, so for long
+ * messages (e.g. sectors for disk encryption) its performance doesn't
+ * matter as much as that of the stream cipher and hash function. Thus,
+ * weigh the block cipher's ->cra_priority less.
+ */
+ inst->alg.base.cra_priority = (4 * streamcipher_alg->base.cra_priority +
+ 2 * hash_alg->base.cra_priority +
+ blockcipher_alg->cra_priority) / 7;
+
+ inst->alg.setkey = adiantum_setkey;
+ inst->alg.encrypt = adiantum_encrypt;
+ inst->alg.decrypt = adiantum_decrypt;
+ inst->alg.init = adiantum_init_tfm;
+ inst->alg.exit = adiantum_exit_tfm;
+ inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(streamcipher_alg);
+ inst->alg.max_keysize = crypto_skcipher_alg_max_keysize(streamcipher_alg);
+ inst->alg.ivsize = TWEAK_SIZE;
+
+ inst->free = adiantum_free_instance;
+
+ err = skcipher_register_instance(tmpl, inst);
+ if (err)
+ goto out_drop_hash;
+
+ crypto_mod_put(_hash_alg);
+ return 0;
+
+out_drop_hash:
+ crypto_drop_shash(&ictx->hash_spawn);
+out_put_hash:
+ crypto_mod_put(_hash_alg);
+out_drop_blockcipher:
+ crypto_drop_spawn(&ictx->blockcipher_spawn);
+out_drop_streamcipher:
+ crypto_drop_skcipher(&ictx->streamcipher_spawn);
+out_free_inst:
+ kfree(inst);
+ return err;
+}
+
+/* adiantum(streamcipher_name, blockcipher_name [, nhpoly1305_name]) */
+static struct crypto_template adiantum_tmpl = {
+ .name = "adiantum",
+ .create = adiantum_create,
+ .module = THIS_MODULE,
+};
+
+static int __init adiantum_module_init(void)
+{
+ return crypto_register_template(&adiantum_tmpl);
+}
+
+static void __exit adiantum_module_exit(void)
+{
+ crypto_unregister_template(&adiantum_tmpl);
+}
+
+module_init(adiantum_module_init);
+module_exit(adiantum_module_exit);
+
+MODULE_DESCRIPTION("Adiantum length-preserving encryption mode");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("adiantum");
diff --git a/crypto/aead.c b/crypto/aead.c
index 60b3bbe973e7..189c52d1f63a 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -119,20 +119,16 @@ static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
struct crypto_report_aead raead;
struct aead_alg *aead = container_of(alg, struct aead_alg, base);
- strncpy(raead.type, "aead", sizeof(raead.type));
- strncpy(raead.geniv, "<none>", sizeof(raead.geniv));
+ memset(&raead, 0, sizeof(raead));
+
+ strscpy(raead.type, "aead", sizeof(raead.type));
+ strscpy(raead.geniv, "<none>", sizeof(raead.geniv));
raead.blocksize = alg->cra_blocksize;
raead.maxauthsize = aead->maxauthsize;
raead.ivsize = aead->ivsize;
- if (nla_put(skb, CRYPTOCFGA_REPORT_AEAD,
- sizeof(struct crypto_report_aead), &raead))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_AEAD, sizeof(raead), &raead);
}
#else
static int crypto_aead_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index ca554d57d01e..13df33aca463 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -63,7 +63,8 @@ static inline u8 byte(const u32 x, const unsigned n)
static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };
-__visible const u32 crypto_ft_tab[4][256] = {
+/* cacheline-aligned to facilitate prefetching into cache */
+__visible const u32 crypto_ft_tab[4][256] __cacheline_aligned = {
{
0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
@@ -327,7 +328,7 @@ __visible const u32 crypto_ft_tab[4][256] = {
}
};
-__visible const u32 crypto_fl_tab[4][256] = {
+__visible const u32 crypto_fl_tab[4][256] __cacheline_aligned = {
{
0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
@@ -591,7 +592,7 @@ __visible const u32 crypto_fl_tab[4][256] = {
}
};
-__visible const u32 crypto_it_tab[4][256] = {
+__visible const u32 crypto_it_tab[4][256] __cacheline_aligned = {
{
0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
@@ -855,7 +856,7 @@ __visible const u32 crypto_it_tab[4][256] = {
}
};
-__visible const u32 crypto_il_tab[4][256] = {
+__visible const u32 crypto_il_tab[4][256] __cacheline_aligned = {
{
0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
0x00000030, 0x00000036, 0x000000a5, 0x00000038,
diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
index 03023b2290e8..1ff9785b30f5 100644
--- a/crypto/aes_ti.c
+++ b/crypto/aes_ti.c
@@ -269,6 +269,7 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
const u32 *rkp = ctx->key_enc + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
+ unsigned long flags;
int round;
st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
@@ -276,6 +277,12 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
+ /*
+ * Temporarily disable interrupts to avoid races where cachelines are
+ * evicted when the CPU is interrupted to do something else.
+ */
+ local_irq_save(flags);
+
st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
@@ -300,6 +307,8 @@ static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
+
+ local_irq_restore(flags);
}
static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
@@ -308,6 +317,7 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
const u32 *rkp = ctx->key_dec + 4;
int rounds = 6 + ctx->key_length / 4;
u32 st0[4], st1[4];
+ unsigned long flags;
int round;
st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
@@ -315,6 +325,12 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
+ /*
+ * Temporarily disable interrupts to avoid races where cachelines are
+ * evicted when the CPU is interrupted to do something else.
+ */
+ local_irq_save(flags);
+
st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
@@ -339,6 +355,8 @@ static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
+
+ local_irq_restore(flags);
}
static struct crypto_alg aes_alg = {
diff --git a/crypto/ahash.c b/crypto/ahash.c
index e21667b4e10a..5d320a811f75 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -364,20 +364,28 @@ static int crypto_ahash_op(struct ahash_request *req,
int crypto_ahash_final(struct ahash_request *req)
{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct crypto_alg *alg = tfm->base.__crt_alg;
+ unsigned int nbytes = req->nbytes;
int ret;
+ crypto_stats_get(alg);
ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
- crypto_stat_ahash_final(req, ret);
+ crypto_stats_ahash_final(nbytes, ret, alg);
return ret;
}
EXPORT_SYMBOL_GPL(crypto_ahash_final);
int crypto_ahash_finup(struct ahash_request *req)
{
+ struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct crypto_alg *alg = tfm->base.__crt_alg;
+ unsigned int nbytes = req->nbytes;
int ret;
+ crypto_stats_get(alg);
ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
- crypto_stat_ahash_final(req, ret);
+ crypto_stats_ahash_final(nbytes, ret, alg);
return ret;
}
EXPORT_SYMBOL_GPL(crypto_ahash_finup);
@@ -385,13 +393,16 @@ EXPORT_SYMBOL_GPL(crypto_ahash_finup);
int crypto_ahash_digest(struct ahash_request *req)
{
struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+ struct crypto_alg *alg = tfm->base.__crt_alg;
+ unsigned int nbytes = req->nbytes;
int ret;
+ crypto_stats_get(alg);
if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
ret = -ENOKEY;
else
ret = crypto_ahash_op(req, tfm->digest);
- crypto_stat_ahash_final(req, ret);
+ crypto_stats_ahash_final(nbytes, ret, alg);
return ret;
}
EXPORT_SYMBOL_GPL(crypto_ahash_digest);
@@ -498,18 +509,14 @@ static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_hash rhash;
- strncpy(rhash.type, "ahash", sizeof(rhash.type));
+ memset(&rhash, 0, sizeof(rhash));
+
+ strscpy(rhash.type, "ahash", sizeof(rhash.type));
rhash.blocksize = alg->cra_blocksize;
rhash.digestsize = __crypto_hash_alg_common(alg)->digestsize;
- if (nla_put(skb, CRYPTOCFGA_REPORT_HASH,
- sizeof(struct crypto_report_hash), &rhash))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_HASH, sizeof(rhash), &rhash);
}
#else
static int crypto_ahash_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/akcipher.c b/crypto/akcipher.c
index cfbdb06d8ca8..0cbeae137e0a 100644
--- a/crypto/akcipher.c
+++ b/crypto/akcipher.c
@@ -30,15 +30,12 @@ static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_akcipher rakcipher;
- strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+ memset(&rakcipher, 0, sizeof(rakcipher));
- if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER,
- sizeof(struct crypto_report_akcipher), &rakcipher))
- goto nla_put_failure;
- return 0;
+ strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER,
+ sizeof(rakcipher), &rakcipher);
}
#else
static int crypto_akcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 2545c5f89c4c..8b65ada33e5d 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -258,13 +258,7 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
list_add(&alg->cra_list, &crypto_alg_list);
list_add(&larval->alg.cra_list, &crypto_alg_list);
- atomic_set(&alg->encrypt_cnt, 0);
- atomic_set(&alg->decrypt_cnt, 0);
- atomic64_set(&alg->encrypt_tlen, 0);
- atomic64_set(&alg->decrypt_tlen, 0);
- atomic_set(&alg->verify_cnt, 0);
- atomic_set(&alg->cipher_err_cnt, 0);
- atomic_set(&alg->sign_cnt, 0);
+ crypto_stats_init(alg);
out:
return larval;
@@ -1076,6 +1070,245 @@ int crypto_type_has_alg(const char *name, const struct crypto_type *frontend,
}
EXPORT_SYMBOL_GPL(crypto_type_has_alg);
+#ifdef CONFIG_CRYPTO_STATS
+void crypto_stats_init(struct crypto_alg *alg)
+{
+ memset(&alg->stats, 0, sizeof(alg->stats));
+}
+EXPORT_SYMBOL_GPL(crypto_stats_init);
+
+void crypto_stats_get(struct crypto_alg *alg)
+{
+ crypto_alg_get(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_get);
+
+void crypto_stats_ablkcipher_encrypt(unsigned int nbytes, int ret,
+ struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.cipher.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.cipher.encrypt_cnt);
+ atomic64_add(nbytes, &alg->stats.cipher.encrypt_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_ablkcipher_encrypt);
+
+void crypto_stats_ablkcipher_decrypt(unsigned int nbytes, int ret,
+ struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.cipher.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.cipher.decrypt_cnt);
+ atomic64_add(nbytes, &alg->stats.cipher.decrypt_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_ablkcipher_decrypt);
+
+void crypto_stats_aead_encrypt(unsigned int cryptlen, struct crypto_alg *alg,
+ int ret)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.aead.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.aead.encrypt_cnt);
+ atomic64_add(cryptlen, &alg->stats.aead.encrypt_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_aead_encrypt);
+
+void crypto_stats_aead_decrypt(unsigned int cryptlen, struct crypto_alg *alg,
+ int ret)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.aead.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.aead.decrypt_cnt);
+ atomic64_add(cryptlen, &alg->stats.aead.decrypt_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_aead_decrypt);
+
+void crypto_stats_akcipher_encrypt(unsigned int src_len, int ret,
+ struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.akcipher.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.akcipher.encrypt_cnt);
+ atomic64_add(src_len, &alg->stats.akcipher.encrypt_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_akcipher_encrypt);
+
+void crypto_stats_akcipher_decrypt(unsigned int src_len, int ret,
+ struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.akcipher.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.akcipher.decrypt_cnt);
+ atomic64_add(src_len, &alg->stats.akcipher.decrypt_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_akcipher_decrypt);
+
+void crypto_stats_akcipher_sign(int ret, struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+ atomic64_inc(&alg->stats.akcipher.err_cnt);
+ else
+ atomic64_inc(&alg->stats.akcipher.sign_cnt);
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_akcipher_sign);
+
+void crypto_stats_akcipher_verify(int ret, struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+ atomic64_inc(&alg->stats.akcipher.err_cnt);
+ else
+ atomic64_inc(&alg->stats.akcipher.verify_cnt);
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_akcipher_verify);
+
+void crypto_stats_compress(unsigned int slen, int ret, struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.compress.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.compress.compress_cnt);
+ atomic64_add(slen, &alg->stats.compress.compress_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_compress);
+
+void crypto_stats_decompress(unsigned int slen, int ret, struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.compress.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.compress.decompress_cnt);
+ atomic64_add(slen, &alg->stats.compress.decompress_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_decompress);
+
+void crypto_stats_ahash_update(unsigned int nbytes, int ret,
+ struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+ atomic64_inc(&alg->stats.hash.err_cnt);
+ else
+ atomic64_add(nbytes, &alg->stats.hash.hash_tlen);
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_ahash_update);
+
+void crypto_stats_ahash_final(unsigned int nbytes, int ret,
+ struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.hash.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.hash.hash_cnt);
+ atomic64_add(nbytes, &alg->stats.hash.hash_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_ahash_final);
+
+void crypto_stats_kpp_set_secret(struct crypto_alg *alg, int ret)
+{
+ if (ret)
+ atomic64_inc(&alg->stats.kpp.err_cnt);
+ else
+ atomic64_inc(&alg->stats.kpp.setsecret_cnt);
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_kpp_set_secret);
+
+void crypto_stats_kpp_generate_public_key(struct crypto_alg *alg, int ret)
+{
+ if (ret)
+ atomic64_inc(&alg->stats.kpp.err_cnt);
+ else
+ atomic64_inc(&alg->stats.kpp.generate_public_key_cnt);
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_kpp_generate_public_key);
+
+void crypto_stats_kpp_compute_shared_secret(struct crypto_alg *alg, int ret)
+{
+ if (ret)
+ atomic64_inc(&alg->stats.kpp.err_cnt);
+ else
+ atomic64_inc(&alg->stats.kpp.compute_shared_secret_cnt);
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_kpp_compute_shared_secret);
+
+void crypto_stats_rng_seed(struct crypto_alg *alg, int ret)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+ atomic64_inc(&alg->stats.rng.err_cnt);
+ else
+ atomic64_inc(&alg->stats.rng.seed_cnt);
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_rng_seed);
+
+void crypto_stats_rng_generate(struct crypto_alg *alg, unsigned int dlen,
+ int ret)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.rng.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.rng.generate_cnt);
+ atomic64_add(dlen, &alg->stats.rng.generate_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_rng_generate);
+
+void crypto_stats_skcipher_encrypt(unsigned int cryptlen, int ret,
+ struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.cipher.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.cipher.encrypt_cnt);
+ atomic64_add(cryptlen, &alg->stats.cipher.encrypt_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_skcipher_encrypt);
+
+void crypto_stats_skcipher_decrypt(unsigned int cryptlen, int ret,
+ struct crypto_alg *alg)
+{
+ if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+ atomic64_inc(&alg->stats.cipher.err_cnt);
+ } else {
+ atomic64_inc(&alg->stats.cipher.decrypt_cnt);
+ atomic64_add(cryptlen, &alg->stats.cipher.decrypt_tlen);
+ }
+ crypto_alg_put(alg);
+}
+EXPORT_SYMBOL_GPL(crypto_stats_skcipher_decrypt);
+#endif
+
static int __init crypto_algapi_init(void)
{
crypto_init_proc();
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index f93abf13b5d4..c5398bd54942 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -507,23 +507,18 @@ static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_blkcipher rblkcipher;
- strncpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type));
- strncpy(rblkcipher.geniv, alg->cra_blkcipher.geniv ?: "<default>",
- sizeof(rblkcipher.geniv));
- rblkcipher.geniv[sizeof(rblkcipher.geniv) - 1] = '\0';
+ memset(&rblkcipher, 0, sizeof(rblkcipher));
+
+ strscpy(rblkcipher.type, "blkcipher", sizeof(rblkcipher.type));
+ strscpy(rblkcipher.geniv, "<default>", sizeof(rblkcipher.geniv));
rblkcipher.blocksize = alg->cra_blocksize;
rblkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
rblkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
rblkcipher.ivsize = alg->cra_blkcipher.ivsize;
- if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
- sizeof(struct crypto_report_blkcipher), &rblkcipher))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
+ sizeof(rblkcipher), &rblkcipher);
}
#else
static int crypto_blkcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
@@ -541,8 +536,7 @@ static void crypto_blkcipher_show(struct seq_file *m, struct crypto_alg *alg)
seq_printf(m, "min keysize : %u\n", alg->cra_blkcipher.min_keysize);
seq_printf(m, "max keysize : %u\n", alg->cra_blkcipher.max_keysize);
seq_printf(m, "ivsize : %u\n", alg->cra_blkcipher.ivsize);
- seq_printf(m, "geniv : %s\n", alg->cra_blkcipher.geniv ?:
- "<default>");
+ seq_printf(m, "geniv : <default>\n");
}
const struct crypto_type crypto_blkcipher_type = {
diff --git a/crypto/cfb.c b/crypto/cfb.c
index 20987d0e09d8..e81e45673498 100644
--- a/crypto/cfb.c
+++ b/crypto/cfb.c
@@ -144,7 +144,7 @@ static int crypto_cfb_decrypt_segment(struct skcipher_walk *walk,
do {
crypto_cfb_encrypt_one(tfm, iv, dst);
- crypto_xor(dst, iv, bsize);
+ crypto_xor(dst, src, bsize);
iv = src;
src += bsize;
diff --git a/crypto/chacha20_generic.c b/crypto/chacha20_generic.c
deleted file mode 100644
index 3ae96587caf9..000000000000
--- a/crypto/chacha20_generic.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * ChaCha20 256-bit cipher algorithm, RFC7539
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/chacha20.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/module.h>
-
-static void chacha20_docrypt(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes)
-{
- /* aligned to potentially speed up crypto_xor() */
- u8 stream[CHACHA20_BLOCK_SIZE] __aligned(sizeof(long));
-
- if (dst != src)
- memcpy(dst, src, bytes);
-
- while (bytes >= CHACHA20_BLOCK_SIZE) {
- chacha20_block(state, stream);
- crypto_xor(dst, stream, CHACHA20_BLOCK_SIZE);
- bytes -= CHACHA20_BLOCK_SIZE;
- dst += CHACHA20_BLOCK_SIZE;
- }
- if (bytes) {
- chacha20_block(state, stream);
- crypto_xor(dst, stream, bytes);
- }
-}
-
-void crypto_chacha20_init(u32 *state, struct chacha20_ctx *ctx, u8 *iv)
-{
- state[0] = 0x61707865; /* "expa" */
- state[1] = 0x3320646e; /* "nd 3" */
- state[2] = 0x79622d32; /* "2-by" */
- state[3] = 0x6b206574; /* "te k" */
- state[4] = ctx->key[0];
- state[5] = ctx->key[1];
- state[6] = ctx->key[2];
- state[7] = ctx->key[3];
- state[8] = ctx->key[4];
- state[9] = ctx->key[5];
- state[10] = ctx->key[6];
- state[11] = ctx->key[7];
- state[12] = get_unaligned_le32(iv + 0);
- state[13] = get_unaligned_le32(iv + 4);
- state[14] = get_unaligned_le32(iv + 8);
- state[15] = get_unaligned_le32(iv + 12);
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_init);
-
-int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keysize)
-{
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- int i;
-
- if (keysize != CHACHA20_KEY_SIZE)
- return -EINVAL;
-
- for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
- ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
-
-int crypto_chacha20_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, true);
-
- crypto_chacha20_init(state, ctx, walk.iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- chacha20_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes);
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(crypto_chacha20_crypt);
-
-static struct skcipher_alg alg = {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-generic",
- .base.cra_priority = 100,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha20_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA20_KEY_SIZE,
- .max_keysize = CHACHA20_KEY_SIZE,
- .ivsize = CHACHA20_IV_SIZE,
- .chunksize = CHACHA20_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = crypto_chacha20_crypt,
- .decrypt = crypto_chacha20_crypt,
-};
-
-static int __init chacha20_generic_mod_init(void)
-{
- return crypto_register_skcipher(&alg);
-}
-
-static void __exit chacha20_generic_mod_fini(void)
-{
- crypto_unregister_skcipher(&alg);
-}
-
-module_init(chacha20_generic_mod_init);
-module_exit(chacha20_generic_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
-MODULE_DESCRIPTION("chacha20 cipher algorithm");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-generic");
diff --git a/crypto/chacha20poly1305.c b/crypto/chacha20poly1305.c
index 600afa99941f..fef11446ab1b 100644
--- a/crypto/chacha20poly1305.c
+++ b/crypto/chacha20poly1305.c
@@ -13,7 +13,7 @@
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
-#include <crypto/chacha20.h>
+#include <crypto/chacha.h>
#include <crypto/poly1305.h>
#include <linux/err.h>
#include <linux/init.h>
@@ -22,8 +22,6 @@
#include "internal.h"
-#define CHACHAPOLY_IV_SIZE 12
-
struct chachapoly_instance_ctx {
struct crypto_skcipher_spawn chacha;
struct crypto_ahash_spawn poly;
@@ -51,7 +49,7 @@ struct poly_req {
};
struct chacha_req {
- u8 iv[CHACHA20_IV_SIZE];
+ u8 iv[CHACHA_IV_SIZE];
struct scatterlist src[1];
struct skcipher_request req; /* must be last member */
};
@@ -91,7 +89,7 @@ static void chacha_iv(u8 *iv, struct aead_request *req, u32 icb)
memcpy(iv, &leicb, sizeof(leicb));
memcpy(iv + sizeof(leicb), ctx->salt, ctx->saltlen);
memcpy(iv + sizeof(leicb) + ctx->saltlen, req->iv,
- CHACHA20_IV_SIZE - sizeof(leicb) - ctx->saltlen);
+ CHACHA_IV_SIZE - sizeof(leicb) - ctx->saltlen);
}
static int poly_verify_tag(struct aead_request *req)
@@ -494,7 +492,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
struct chachapoly_ctx *ctx = crypto_aead_ctx(aead);
int err;
- if (keylen != ctx->saltlen + CHACHA20_KEY_SIZE)
+ if (keylen != ctx->saltlen + CHACHA_KEY_SIZE)
return -EINVAL;
keylen -= ctx->saltlen;
@@ -639,7 +637,7 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
err = -EINVAL;
/* Need 16-byte IV size, including Initial Block Counter value */
- if (crypto_skcipher_alg_ivsize(chacha) != CHACHA20_IV_SIZE)
+ if (crypto_skcipher_alg_ivsize(chacha) != CHACHA_IV_SIZE)
goto out_drop_chacha;
/* Not a stream cipher? */
if (chacha->base.cra_blocksize != 1)
diff --git a/crypto/chacha_generic.c b/crypto/chacha_generic.c
new file mode 100644
index 000000000000..35b583101f4f
--- /dev/null
+++ b/crypto/chacha_generic.c
@@ -0,0 +1,217 @@
+/*
+ * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2015 Martin Willi
+ * Copyright (C) 2018 Google LLC
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/module.h>
+
+static void chacha_docrypt(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ /* aligned to potentially speed up crypto_xor() */
+ u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long));
+
+ if (dst != src)
+ memcpy(dst, src, bytes);
+
+ while (bytes >= CHACHA_BLOCK_SIZE) {
+ chacha_block(state, stream, nrounds);
+ crypto_xor(dst, stream, CHACHA_BLOCK_SIZE);
+ bytes -= CHACHA_BLOCK_SIZE;
+ dst += CHACHA_BLOCK_SIZE;
+ }
+ if (bytes) {
+ chacha_block(state, stream, nrounds);
+ crypto_xor(dst, stream, bytes);
+ }
+}
+
+static int chacha_stream_xor(struct skcipher_request *req,
+ struct chacha_ctx *ctx, u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ crypto_chacha_init(state, ctx, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ chacha_docrypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, ctx->nrounds);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+void crypto_chacha_init(u32 *state, struct chacha_ctx *ctx, u8 *iv)
+{
+ state[0] = 0x61707865; /* "expa" */
+ state[1] = 0x3320646e; /* "nd 3" */
+ state[2] = 0x79622d32; /* "2-by" */
+ state[3] = 0x6b206574; /* "te k" */
+ state[4] = ctx->key[0];
+ state[5] = ctx->key[1];
+ state[6] = ctx->key[2];
+ state[7] = ctx->key[3];
+ state[8] = ctx->key[4];
+ state[9] = ctx->key[5];
+ state[10] = ctx->key[6];
+ state[11] = ctx->key[7];
+ state[12] = get_unaligned_le32(iv + 0);
+ state[13] = get_unaligned_le32(iv + 4);
+ state[14] = get_unaligned_le32(iv + 8);
+ state[15] = get_unaligned_le32(iv + 12);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha_init);
+
+static int chacha_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize, int nrounds)
+{
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int i;
+
+ if (keysize != CHACHA_KEY_SIZE)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->key); i++)
+ ctx->key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+ ctx->nrounds = nrounds;
+ return 0;
+}
+
+int crypto_chacha20_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ return chacha_setkey(tfm, key, keysize, 20);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha20_setkey);
+
+int crypto_chacha12_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keysize)
+{
+ return chacha_setkey(tfm, key, keysize, 12);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha12_setkey);
+
+int crypto_chacha_crypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_stream_xor(req, ctx, req->iv);
+}
+EXPORT_SYMBOL_GPL(crypto_chacha_crypt);
+
+int crypto_xchacha_crypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ /* Compute the subkey given the original key and first 128 nonce bits */
+ crypto_chacha_init(state, ctx, req->iv);
+ hchacha_block(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ /* Build the real IV */
+ memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */
+ memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */
+
+ /* Generate the stream and XOR it with the data */
+ return chacha_stream_xor(req, &subctx, real_iv);
+}
+EXPORT_SYMBOL_GPL(crypto_xchacha_crypt);
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-generic",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = crypto_chacha_crypt,
+ .decrypt = crypto_chacha_crypt,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-generic",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha20_setkey,
+ .encrypt = crypto_xchacha_crypt,
+ .decrypt = crypto_xchacha_crypt,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-generic",
+ .base.cra_priority = 100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = crypto_chacha12_setkey,
+ .encrypt = crypto_xchacha_crypt,
+ .decrypt = crypto_xchacha_crypt,
+ }
+};
+
+static int __init chacha_generic_mod_init(void)
+{
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_generic_mod_fini(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_generic_mod_init);
+module_exit(chacha_generic_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Martin Willi <martin@strongswan.org>");
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-generic");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-generic");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-generic");
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 7118fb5efbaa..5640e5db7bdb 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -422,8 +422,6 @@ static int cryptd_create_blkcipher(struct crypto_template *tmpl,
inst->alg.cra_ablkcipher.min_keysize = alg->cra_blkcipher.min_keysize;
inst->alg.cra_ablkcipher.max_keysize = alg->cra_blkcipher.max_keysize;
- inst->alg.cra_ablkcipher.geniv = alg->cra_blkcipher.geniv;
-
inst->alg.cra_ctxsize = sizeof(struct cryptd_blkcipher_ctx);
inst->alg.cra_init = cryptd_blkcipher_init_tfm;
@@ -1174,7 +1172,7 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
return ERR_PTR(-EINVAL);
type = crypto_skcipher_type(type);
mask &= ~CRYPTO_ALG_TYPE_MASK;
- mask |= (CRYPTO_ALG_GENIV | CRYPTO_ALG_TYPE_BLKCIPHER_MASK);
+ mask |= CRYPTO_ALG_TYPE_BLKCIPHER_MASK;
tfm = crypto_alloc_base(cryptd_alg_name, type, mask);
if (IS_ERR(tfm))
return ERR_CAST(tfm);
diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
index 784748dbb19f..f25d3f32c9c2 100644
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -84,87 +84,38 @@ static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_cipher rcipher;
- strncpy(rcipher.type, "cipher", sizeof(rcipher.type));
+ memset(&rcipher, 0, sizeof(rcipher));
+
+ strscpy(rcipher.type, "cipher", sizeof(rcipher.type));
rcipher.blocksize = alg->cra_blocksize;
rcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
rcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
- if (nla_put(skb, CRYPTOCFGA_REPORT_CIPHER,
- sizeof(struct crypto_report_cipher), &rcipher))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_CIPHER,
+ sizeof(rcipher), &rcipher);
}
static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_comp rcomp;
- strncpy(rcomp.type, "compression", sizeof(rcomp.type));
- if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
- sizeof(struct crypto_report_comp), &rcomp))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
-{
- struct crypto_report_acomp racomp;
+ memset(&rcomp, 0, sizeof(rcomp));
- strncpy(racomp.type, "acomp", sizeof(racomp.type));
+ strscpy(rcomp.type, "compression", sizeof(rcomp.type));
- if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
- sizeof(struct crypto_report_acomp), &racomp))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
-{
- struct crypto_report_akcipher rakcipher;
-
- strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
-
- if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER,
- sizeof(struct crypto_report_akcipher), &rakcipher))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
-}
-
-static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
-{
- struct crypto_report_kpp rkpp;
-
- strncpy(rkpp.type, "kpp", sizeof(rkpp.type));
-
- if (nla_put(skb, CRYPTOCFGA_REPORT_KPP,
- sizeof(struct crypto_report_kpp), &rkpp))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(rcomp), &rcomp);
}
static int crypto_report_one(struct crypto_alg *alg,
struct crypto_user_alg *ualg, struct sk_buff *skb)
{
- strncpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
- strncpy(ualg->cru_driver_name, alg->cra_driver_name,
+ memset(ualg, 0, sizeof(*ualg));
+
+ strscpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+ strscpy(ualg->cru_driver_name, alg->cra_driver_name,
sizeof(ualg->cru_driver_name));
- strncpy(ualg->cru_module_name, module_name(alg->cra_module),
+ strscpy(ualg->cru_module_name, module_name(alg->cra_module),
sizeof(ualg->cru_module_name));
ualg->cru_type = 0;
@@ -177,9 +128,9 @@ static int crypto_report_one(struct crypto_alg *alg,
if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
struct crypto_report_larval rl;
- strncpy(rl.type, "larval", sizeof(rl.type));
- if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL,
- sizeof(struct crypto_report_larval), &rl))
+ memset(&rl, 0, sizeof(rl));
+ strscpy(rl.type, "larval", sizeof(rl.type));
+ if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL, sizeof(rl), &rl))
goto nla_put_failure;
goto out;
}
@@ -202,20 +153,6 @@ static int crypto_report_one(struct crypto_alg *alg,
goto nla_put_failure;
break;
- case CRYPTO_ALG_TYPE_ACOMPRESS:
- if (crypto_report_acomp(skb, alg))
- goto nla_put_failure;
-
- break;
- case CRYPTO_ALG_TYPE_AKCIPHER:
- if (crypto_report_akcipher(skb, alg))
- goto nla_put_failure;
-
- break;
- case CRYPTO_ALG_TYPE_KPP:
- if (crypto_report_kpp(skb, alg))
- goto nla_put_failure;
- break;
}
out:
@@ -294,30 +231,33 @@ drop_alg:
static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct crypto_alg *alg;
+ const size_t start_pos = cb->args[0];
+ size_t pos = 0;
struct crypto_dump_info info;
- int err;
-
- if (cb->args[0])
- goto out;
-
- cb->args[0] = 1;
+ struct crypto_alg *alg;
+ int res;
info.in_skb = cb->skb;
info.out_skb = skb;
info.nlmsg_seq = cb->nlh->nlmsg_seq;
info.nlmsg_flags = NLM_F_MULTI;
+ down_read(&crypto_alg_sem);
list_for_each_entry(alg, &crypto_alg_list, cra_list) {
- err = crypto_report_alg(alg, &info);
- if (err)
- goto out_err;
+ if (pos >= start_pos) {
+ res = crypto_report_alg(alg, &info);
+ if (res == -EMSGSIZE)
+ break;
+ if (res)
+ goto out;
+ }
+ pos++;
}
-
+ cb->args[0] = pos;
+ res = skb->len;
out:
- return skb->len;
-out_err:
- return err;
+ up_read(&crypto_alg_sem);
+ return res;
}
static int crypto_dump_report_done(struct netlink_callback *cb)
@@ -483,9 +423,7 @@ static const struct crypto_link {
.dump = crypto_dump_report,
.done = crypto_dump_report_done},
[CRYPTO_MSG_DELRNG - CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
- [CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = { .doit = crypto_reportstat,
- .dump = crypto_dump_reportstat,
- .done = crypto_dump_reportstat_done},
+ [CRYPTO_MSG_GETSTAT - CRYPTO_MSG_BASE] = { .doit = crypto_reportstat},
};
static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -505,7 +443,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) &&
(nlh->nlmsg_flags & NLM_F_DUMP))) {
struct crypto_alg *alg;
- u16 dump_alloc = 0;
+ unsigned long dump_alloc = 0;
if (link->dump == NULL)
return -EINVAL;
@@ -513,16 +451,16 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
down_read(&crypto_alg_sem);
list_for_each_entry(alg, &crypto_alg_list, cra_list)
dump_alloc += CRYPTO_REPORT_MAXSIZE;
+ up_read(&crypto_alg_sem);
{
struct netlink_dump_control c = {
.dump = link->dump,
.done = link->done,
- .min_dump_alloc = dump_alloc,
+ .min_dump_alloc = min(dump_alloc, 65535UL),
};
err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
}
- up_read(&crypto_alg_sem);
return err;
}
diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
index 1dfaa0ccd555..3e9a53233d80 100644
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -33,260 +33,149 @@ struct crypto_dump_info {
static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat raead;
- u64 v64;
- u32 v32;
+ struct crypto_stat_aead raead;
memset(&raead, 0, sizeof(raead));
- strncpy(raead.type, "aead", sizeof(raead.type));
-
- v32 = atomic_read(&alg->encrypt_cnt);
- raead.stat_encrypt_cnt = v32;
- v64 = atomic64_read(&alg->encrypt_tlen);
- raead.stat_encrypt_tlen = v64;
- v32 = atomic_read(&alg->decrypt_cnt);
- raead.stat_decrypt_cnt = v32;
- v64 = atomic64_read(&alg->decrypt_tlen);
- raead.stat_decrypt_tlen = v64;
- v32 = atomic_read(&alg->aead_err_cnt);
- raead.stat_aead_err_cnt = v32;
-
- if (nla_put(skb, CRYPTOCFGA_STAT_AEAD,
- sizeof(struct crypto_stat), &raead))
- goto nla_put_failure;
- return 0;
+ strscpy(raead.type, "aead", sizeof(raead.type));
-nla_put_failure:
- return -EMSGSIZE;
+ raead.stat_encrypt_cnt = atomic64_read(&alg->stats.aead.encrypt_cnt);
+ raead.stat_encrypt_tlen = atomic64_read(&alg->stats.aead.encrypt_tlen);
+ raead.stat_decrypt_cnt = atomic64_read(&alg->stats.aead.decrypt_cnt);
+ raead.stat_decrypt_tlen = atomic64_read(&alg->stats.aead.decrypt_tlen);
+ raead.stat_err_cnt = atomic64_read(&alg->stats.aead.err_cnt);
+
+ return nla_put(skb, CRYPTOCFGA_STAT_AEAD, sizeof(raead), &raead);
}
static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat rcipher;
- u64 v64;
- u32 v32;
+ struct crypto_stat_cipher rcipher;
memset(&rcipher, 0, sizeof(rcipher));
- strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
-
- v32 = atomic_read(&alg->encrypt_cnt);
- rcipher.stat_encrypt_cnt = v32;
- v64 = atomic64_read(&alg->encrypt_tlen);
- rcipher.stat_encrypt_tlen = v64;
- v32 = atomic_read(&alg->decrypt_cnt);
- rcipher.stat_decrypt_cnt = v32;
- v64 = atomic64_read(&alg->decrypt_tlen);
- rcipher.stat_decrypt_tlen = v64;
- v32 = atomic_read(&alg->cipher_err_cnt);
- rcipher.stat_cipher_err_cnt = v32;
-
- if (nla_put(skb, CRYPTOCFGA_STAT_CIPHER,
- sizeof(struct crypto_stat), &rcipher))
- goto nla_put_failure;
- return 0;
+ strscpy(rcipher.type, "cipher", sizeof(rcipher.type));
-nla_put_failure:
- return -EMSGSIZE;
+ rcipher.stat_encrypt_cnt = atomic64_read(&alg->stats.cipher.encrypt_cnt);
+ rcipher.stat_encrypt_tlen = atomic64_read(&alg->stats.cipher.encrypt_tlen);
+ rcipher.stat_decrypt_cnt = atomic64_read(&alg->stats.cipher.decrypt_cnt);
+ rcipher.stat_decrypt_tlen = atomic64_read(&alg->stats.cipher.decrypt_tlen);
+ rcipher.stat_err_cnt = atomic64_read(&alg->stats.cipher.err_cnt);
+
+ return nla_put(skb, CRYPTOCFGA_STAT_CIPHER, sizeof(rcipher), &rcipher);
}
static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat rcomp;
- u64 v64;
- u32 v32;
+ struct crypto_stat_compress rcomp;
memset(&rcomp, 0, sizeof(rcomp));
- strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
- v32 = atomic_read(&alg->compress_cnt);
- rcomp.stat_compress_cnt = v32;
- v64 = atomic64_read(&alg->compress_tlen);
- rcomp.stat_compress_tlen = v64;
- v32 = atomic_read(&alg->decompress_cnt);
- rcomp.stat_decompress_cnt = v32;
- v64 = atomic64_read(&alg->decompress_tlen);
- rcomp.stat_decompress_tlen = v64;
- v32 = atomic_read(&alg->cipher_err_cnt);
- rcomp.stat_compress_err_cnt = v32;
-
- if (nla_put(skb, CRYPTOCFGA_STAT_COMPRESS,
- sizeof(struct crypto_stat), &rcomp))
- goto nla_put_failure;
- return 0;
+ strscpy(rcomp.type, "compression", sizeof(rcomp.type));
+ rcomp.stat_compress_cnt = atomic64_read(&alg->stats.compress.compress_cnt);
+ rcomp.stat_compress_tlen = atomic64_read(&alg->stats.compress.compress_tlen);
+ rcomp.stat_decompress_cnt = atomic64_read(&alg->stats.compress.decompress_cnt);
+ rcomp.stat_decompress_tlen = atomic64_read(&alg->stats.compress.decompress_tlen);
+ rcomp.stat_err_cnt = atomic64_read(&alg->stats.compress.err_cnt);
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_STAT_COMPRESS, sizeof(rcomp), &rcomp);
}
static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat racomp;
- u64 v64;
- u32 v32;
+ struct crypto_stat_compress racomp;
memset(&racomp, 0, sizeof(racomp));
- strlcpy(racomp.type, "acomp", sizeof(racomp.type));
- v32 = atomic_read(&alg->compress_cnt);
- racomp.stat_compress_cnt = v32;
- v64 = atomic64_read(&alg->compress_tlen);
- racomp.stat_compress_tlen = v64;
- v32 = atomic_read(&alg->decompress_cnt);
- racomp.stat_decompress_cnt = v32;
- v64 = atomic64_read(&alg->decompress_tlen);
- racomp.stat_decompress_tlen = v64;
- v32 = atomic_read(&alg->cipher_err_cnt);
- racomp.stat_compress_err_cnt = v32;
-
- if (nla_put(skb, CRYPTOCFGA_STAT_ACOMP,
- sizeof(struct crypto_stat), &racomp))
- goto nla_put_failure;
- return 0;
+ strscpy(racomp.type, "acomp", sizeof(racomp.type));
+ racomp.stat_compress_cnt = atomic64_read(&alg->stats.compress.compress_cnt);
+ racomp.stat_compress_tlen = atomic64_read(&alg->stats.compress.compress_tlen);
+ racomp.stat_decompress_cnt = atomic64_read(&alg->stats.compress.decompress_cnt);
+ racomp.stat_decompress_tlen = atomic64_read(&alg->stats.compress.decompress_tlen);
+ racomp.stat_err_cnt = atomic64_read(&alg->stats.compress.err_cnt);
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_STAT_ACOMP, sizeof(racomp), &racomp);
}
static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat rakcipher;
- u64 v64;
- u32 v32;
+ struct crypto_stat_akcipher rakcipher;
memset(&rakcipher, 0, sizeof(rakcipher));
- strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
- v32 = atomic_read(&alg->encrypt_cnt);
- rakcipher.stat_encrypt_cnt = v32;
- v64 = atomic64_read(&alg->encrypt_tlen);
- rakcipher.stat_encrypt_tlen = v64;
- v32 = atomic_read(&alg->decrypt_cnt);
- rakcipher.stat_decrypt_cnt = v32;
- v64 = atomic64_read(&alg->decrypt_tlen);
- rakcipher.stat_decrypt_tlen = v64;
- v32 = atomic_read(&alg->sign_cnt);
- rakcipher.stat_sign_cnt = v32;
- v32 = atomic_read(&alg->verify_cnt);
- rakcipher.stat_verify_cnt = v32;
- v32 = atomic_read(&alg->akcipher_err_cnt);
- rakcipher.stat_akcipher_err_cnt = v32;
-
- if (nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER,
- sizeof(struct crypto_stat), &rakcipher))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ strscpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+ rakcipher.stat_encrypt_cnt = atomic64_read(&alg->stats.akcipher.encrypt_cnt);
+ rakcipher.stat_encrypt_tlen = atomic64_read(&alg->stats.akcipher.encrypt_tlen);
+ rakcipher.stat_decrypt_cnt = atomic64_read(&alg->stats.akcipher.decrypt_cnt);
+ rakcipher.stat_decrypt_tlen = atomic64_read(&alg->stats.akcipher.decrypt_tlen);
+ rakcipher.stat_sign_cnt = atomic64_read(&alg->stats.akcipher.sign_cnt);
+ rakcipher.stat_verify_cnt = atomic64_read(&alg->stats.akcipher.verify_cnt);
+ rakcipher.stat_err_cnt = atomic64_read(&alg->stats.akcipher.err_cnt);
+
+ return nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER,
+ sizeof(rakcipher), &rakcipher);
}
static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat rkpp;
- u32 v;
+ struct crypto_stat_kpp rkpp;
memset(&rkpp, 0, sizeof(rkpp));
- strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
-
- v = atomic_read(&alg->setsecret_cnt);
- rkpp.stat_setsecret_cnt = v;
- v = atomic_read(&alg->generate_public_key_cnt);
- rkpp.stat_generate_public_key_cnt = v;
- v = atomic_read(&alg->compute_shared_secret_cnt);
- rkpp.stat_compute_shared_secret_cnt = v;
- v = atomic_read(&alg->kpp_err_cnt);
- rkpp.stat_kpp_err_cnt = v;
+ strscpy(rkpp.type, "kpp", sizeof(rkpp.type));
- if (nla_put(skb, CRYPTOCFGA_STAT_KPP,
- sizeof(struct crypto_stat), &rkpp))
- goto nla_put_failure;
- return 0;
+ rkpp.stat_setsecret_cnt = atomic64_read(&alg->stats.kpp.setsecret_cnt);
+ rkpp.stat_generate_public_key_cnt = atomic64_read(&alg->stats.kpp.generate_public_key_cnt);
+ rkpp.stat_compute_shared_secret_cnt = atomic64_read(&alg->stats.kpp.compute_shared_secret_cnt);
+ rkpp.stat_err_cnt = atomic64_read(&alg->stats.kpp.err_cnt);
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_STAT_KPP, sizeof(rkpp), &rkpp);
}
static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat rhash;
- u64 v64;
- u32 v32;
+ struct crypto_stat_hash rhash;
memset(&rhash, 0, sizeof(rhash));
- strncpy(rhash.type, "ahash", sizeof(rhash.type));
+ strscpy(rhash.type, "ahash", sizeof(rhash.type));
- v32 = atomic_read(&alg->hash_cnt);
- rhash.stat_hash_cnt = v32;
- v64 = atomic64_read(&alg->hash_tlen);
- rhash.stat_hash_tlen = v64;
- v32 = atomic_read(&alg->hash_err_cnt);
- rhash.stat_hash_err_cnt = v32;
+ rhash.stat_hash_cnt = atomic64_read(&alg->stats.hash.hash_cnt);
+ rhash.stat_hash_tlen = atomic64_read(&alg->stats.hash.hash_tlen);
+ rhash.stat_err_cnt = atomic64_read(&alg->stats.hash.err_cnt);
- if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
- sizeof(struct crypto_stat), &rhash))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash);
}
static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat rhash;
- u64 v64;
- u32 v32;
+ struct crypto_stat_hash rhash;
memset(&rhash, 0, sizeof(rhash));
- strncpy(rhash.type, "shash", sizeof(rhash.type));
-
- v32 = atomic_read(&alg->hash_cnt);
- rhash.stat_hash_cnt = v32;
- v64 = atomic64_read(&alg->hash_tlen);
- rhash.stat_hash_tlen = v64;
- v32 = atomic_read(&alg->hash_err_cnt);
- rhash.stat_hash_err_cnt = v32;
+ strscpy(rhash.type, "shash", sizeof(rhash.type));
- if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
- sizeof(struct crypto_stat), &rhash))
- goto nla_put_failure;
- return 0;
+ rhash.stat_hash_cnt = atomic64_read(&alg->stats.hash.hash_cnt);
+ rhash.stat_hash_tlen = atomic64_read(&alg->stats.hash.hash_tlen);
+ rhash.stat_err_cnt = atomic64_read(&alg->stats.hash.err_cnt);
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_STAT_HASH, sizeof(rhash), &rhash);
}
static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg)
{
- struct crypto_stat rrng;
- u64 v64;
- u32 v32;
+ struct crypto_stat_rng rrng;
memset(&rrng, 0, sizeof(rrng));
- strncpy(rrng.type, "rng", sizeof(rrng.type));
+ strscpy(rrng.type, "rng", sizeof(rrng.type));
- v32 = atomic_read(&alg->generate_cnt);
- rrng.stat_generate_cnt = v32;
- v64 = atomic64_read(&alg->generate_tlen);
- rrng.stat_generate_tlen = v64;
- v32 = atomic_read(&alg->seed_cnt);
- rrng.stat_seed_cnt = v32;
- v32 = atomic_read(&alg->hash_err_cnt);
- rrng.stat_rng_err_cnt = v32;
-
- if (nla_put(skb, CRYPTOCFGA_STAT_RNG,
- sizeof(struct crypto_stat), &rrng))
- goto nla_put_failure;
- return 0;
+ rrng.stat_generate_cnt = atomic64_read(&alg->stats.rng.generate_cnt);
+ rrng.stat_generate_tlen = atomic64_read(&alg->stats.rng.generate_tlen);
+ rrng.stat_seed_cnt = atomic64_read(&alg->stats.rng.seed_cnt);
+ rrng.stat_err_cnt = atomic64_read(&alg->stats.rng.err_cnt);
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_STAT_RNG, sizeof(rrng), &rrng);
}
static int crypto_reportstat_one(struct crypto_alg *alg,
@@ -295,10 +184,10 @@ static int crypto_reportstat_one(struct crypto_alg *alg,
{
memset(ualg, 0, sizeof(*ualg));
- strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
- strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
+ strscpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+ strscpy(ualg->cru_driver_name, alg->cra_driver_name,
sizeof(ualg->cru_driver_name));
- strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
+ strscpy(ualg->cru_module_name, module_name(alg->cra_module),
sizeof(ualg->cru_module_name));
ualg->cru_type = 0;
@@ -309,12 +198,11 @@ static int crypto_reportstat_one(struct crypto_alg *alg,
if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
goto nla_put_failure;
if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
- struct crypto_stat rl;
+ struct crypto_stat_larval rl;
memset(&rl, 0, sizeof(rl));
- strlcpy(rl.type, "larval", sizeof(rl.type));
- if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL,
- sizeof(struct crypto_stat), &rl))
+ strscpy(rl.type, "larval", sizeof(rl.type));
+ if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL, sizeof(rl), &rl))
goto nla_put_failure;
goto out;
}
@@ -448,37 +336,4 @@ drop_alg:
return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
}
-int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct crypto_alg *alg;
- struct crypto_dump_info info;
- int err;
-
- if (cb->args[0])
- goto out;
-
- cb->args[0] = 1;
-
- info.in_skb = cb->skb;
- info.out_skb = skb;
- info.nlmsg_seq = cb->nlh->nlmsg_seq;
- info.nlmsg_flags = NLM_F_MULTI;
-
- list_for_each_entry(alg, &crypto_alg_list, cra_list) {
- err = crypto_reportstat_alg(alg, &info);
- if (err)
- goto out_err;
- }
-
-out:
- return skb->len;
-out_err:
- return err;
-}
-
-int crypto_dump_reportstat_done(struct netlink_callback *cb)
-{
- return 0;
-}
-
MODULE_LICENSE("GPL");
diff --git a/crypto/ctr.c b/crypto/ctr.c
index 435b75bd619e..30f3946efc6d 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -233,8 +233,6 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
inst->alg.cra_blkcipher.encrypt = crypto_ctr_crypt;
inst->alg.cra_blkcipher.decrypt = crypto_ctr_crypt;
- inst->alg.cra_blkcipher.geniv = "chainiv";
-
out:
crypto_mod_put(alg);
return inst;
diff --git a/crypto/ecc.c b/crypto/ecc.c
index 8facafd67802..ed1237115066 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -842,15 +842,23 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime,
static void ecc_point_mult(struct ecc_point *result,
const struct ecc_point *point, const u64 *scalar,
- u64 *initial_z, u64 *curve_prime,
+ u64 *initial_z, const struct ecc_curve *curve,
unsigned int ndigits)
{
/* R0 and R1 */
u64 rx[2][ECC_MAX_DIGITS];
u64 ry[2][ECC_MAX_DIGITS];
u64 z[ECC_MAX_DIGITS];
+ u64 sk[2][ECC_MAX_DIGITS];
+ u64 *curve_prime = curve->p;
int i, nb;
- int num_bits = vli_num_bits(scalar, ndigits);
+ int num_bits;
+ int carry;
+
+ carry = vli_add(sk[0], scalar, curve->n, ndigits);
+ vli_add(sk[1], sk[0], curve->n, ndigits);
+ scalar = sk[!carry];
+ num_bits = sizeof(u64) * ndigits * 8 + 1;
vli_set(rx[1], point->x, ndigits);
vli_set(ry[1], point->y, ndigits);
@@ -904,30 +912,43 @@ static inline void ecc_swap_digits(const u64 *in, u64 *out,
out[i] = __swab64(in[ndigits - 1 - i]);
}
-int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
- const u64 *private_key, unsigned int private_key_len)
+static int __ecc_is_key_valid(const struct ecc_curve *curve,
+ const u64 *private_key, unsigned int ndigits)
{
- int nbytes;
- const struct ecc_curve *curve = ecc_get_curve(curve_id);
+ u64 one[ECC_MAX_DIGITS] = { 1, };
+ u64 res[ECC_MAX_DIGITS];
if (!private_key)
return -EINVAL;
- nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
-
- if (private_key_len != nbytes)
+ if (curve->g.ndigits != ndigits)
return -EINVAL;
- if (vli_is_zero(private_key, ndigits))
+ /* Make sure the private key is in the range [2, n-3]. */
+ if (vli_cmp(one, private_key, ndigits) != -1)
return -EINVAL;
-
- /* Make sure the private key is in the range [1, n-1]. */
- if (vli_cmp(curve->n, private_key, ndigits) != 1)
+ vli_sub(res, curve->n, one, ndigits);
+ vli_sub(res, res, one, ndigits);
+ if (vli_cmp(res, private_key, ndigits) != 1)
return -EINVAL;
return 0;
}
+int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
+ const u64 *private_key, unsigned int private_key_len)
+{
+ int nbytes;
+ const struct ecc_curve *curve = ecc_get_curve(curve_id);
+
+ nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
+
+ if (private_key_len != nbytes)
+ return -EINVAL;
+
+ return __ecc_is_key_valid(curve, private_key, ndigits);
+}
+
/*
* ECC private keys are generated using the method of extra random bits,
* equivalent to that described in FIPS 186-4, Appendix B.4.1.
@@ -971,11 +992,8 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
if (err)
return err;
- if (vli_is_zero(priv, ndigits))
- return -EINVAL;
-
- /* Make sure the private key is in the range [1, n-1]. */
- if (vli_cmp(curve->n, priv, ndigits) != 1)
+ /* Make sure the private key is in the valid range. */
+ if (__ecc_is_key_valid(curve, priv, ndigits))
return -EINVAL;
ecc_swap_digits(priv, privkey, ndigits);
@@ -1004,7 +1022,7 @@ int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
goto out;
}
- ecc_point_mult(pk, &curve->g, priv, NULL, curve->p, ndigits);
+ ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits);
if (ecc_point_is_zero(pk)) {
ret = -EAGAIN;
goto err_free_point;
@@ -1090,7 +1108,7 @@ int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
goto err_alloc_product;
}
- ecc_point_mult(product, pk, priv, rand_z, curve->p, ndigits);
+ ecc_point_mult(product, pk, priv, rand_z, curve, ndigits);
ecc_swap_digits(product->x, secret, ndigits);
diff --git a/crypto/hash_info.c b/crypto/hash_info.c
index 7b1e0b188ce6..1dd095e4b451 100644
--- a/crypto/hash_info.c
+++ b/crypto/hash_info.c
@@ -32,6 +32,8 @@ const char *const hash_algo_name[HASH_ALGO__LAST] = {
[HASH_ALGO_TGR_160] = "tgr160",
[HASH_ALGO_TGR_192] = "tgr192",
[HASH_ALGO_SM3_256] = "sm3-256",
+ [HASH_ALGO_STREEBOG_256] = "streebog256",
+ [HASH_ALGO_STREEBOG_512] = "streebog512",
};
EXPORT_SYMBOL_GPL(hash_algo_name);
@@ -54,5 +56,7 @@ const int hash_digest_size[HASH_ALGO__LAST] = {
[HASH_ALGO_TGR_160] = TGR160_DIGEST_SIZE,
[HASH_ALGO_TGR_192] = TGR192_DIGEST_SIZE,
[HASH_ALGO_SM3_256] = SM3256_DIGEST_SIZE,
+ [HASH_ALGO_STREEBOG_256] = STREEBOG256_DIGEST_SIZE,
+ [HASH_ALGO_STREEBOG_512] = STREEBOG512_DIGEST_SIZE,
};
EXPORT_SYMBOL_GPL(hash_digest_size);
diff --git a/crypto/kpp.c b/crypto/kpp.c
index a90edc27af77..bc2f1006a2f7 100644
--- a/crypto/kpp.c
+++ b/crypto/kpp.c
@@ -30,15 +30,11 @@ static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_kpp rkpp;
- strncpy(rkpp.type, "kpp", sizeof(rkpp.type));
+ memset(&rkpp, 0, sizeof(rkpp));
- if (nla_put(skb, CRYPTOCFGA_REPORT_KPP,
- sizeof(struct crypto_report_kpp), &rkpp))
- goto nla_put_failure;
- return 0;
+ strscpy(rkpp.type, "kpp", sizeof(rkpp.type));
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_KPP, sizeof(rkpp), &rkpp);
}
#else
static int crypto_kpp_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 2ce2660d3519..c160dfdbf2e0 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -122,7 +122,6 @@ static struct crypto_alg alg_lz4 = {
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lz4_ctx),
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(alg_lz4.cra_list),
.cra_init = lz4_init,
.cra_exit = lz4_exit,
.cra_u = { .compress = {
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 2be14f054daf..583b5e013d7a 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -123,7 +123,6 @@ static struct crypto_alg alg_lz4hc = {
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lz4hc_ctx),
.cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(alg_lz4hc.cra_list),
.cra_init = lz4hc_init,
.cra_exit = lz4hc_exit,
.cra_u = { .compress = {
diff --git a/crypto/nhpoly1305.c b/crypto/nhpoly1305.c
new file mode 100644
index 000000000000..ec831a5594d8
--- /dev/null
+++ b/crypto/nhpoly1305.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
+ *
+ * Copyright 2018 Google LLC
+ */
+
+/*
+ * "NHPoly1305" is the main component of Adiantum hashing.
+ * Specifically, it is the calculation
+ *
+ * H_L ← Poly1305_{K_L}(NH_{K_N}(pad_{128}(L)))
+ *
+ * from the procedure in section 6.4 of the Adiantum paper [1]. It is an
+ * ε-almost-∆-universal (ε-∆U) hash function for equal-length inputs over
+ * Z/(2^{128}Z), where the "∆" operation is addition. It hashes 1024-byte
+ * chunks of the input with the NH hash function [2], reducing the input length
+ * by 32x. The resulting NH digests are evaluated as a polynomial in
+ * GF(2^{130}-5), like in the Poly1305 MAC [3]. Note that the polynomial
+ * evaluation by itself would suffice to achieve the ε-∆U property; NH is used
+ * for performance since it's over twice as fast as Poly1305.
+ *
+ * This is *not* a cryptographic hash function; do not use it as such!
+ *
+ * [1] Adiantum: length-preserving encryption for entry-level processors
+ * (https://eprint.iacr.org/2018/720.pdf)
+ * [2] UMAC: Fast and Secure Message Authentication
+ * (https://fastcrypto.org/umac/umac_proc.pdf)
+ * [3] The Poly1305-AES message-authentication code
+ * (https://cr.yp.to/mac/poly1305-20050329.pdf)
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/nhpoly1305.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+static void nh_generic(const u32 *key, const u8 *message, size_t message_len,
+ __le64 hash[NH_NUM_PASSES])
+{
+ u64 sums[4] = { 0, 0, 0, 0 };
+
+ BUILD_BUG_ON(NH_PAIR_STRIDE != 2);
+ BUILD_BUG_ON(NH_NUM_PASSES != 4);
+
+ while (message_len) {
+ u32 m0 = get_unaligned_le32(message + 0);
+ u32 m1 = get_unaligned_le32(message + 4);
+ u32 m2 = get_unaligned_le32(message + 8);
+ u32 m3 = get_unaligned_le32(message + 12);
+
+ sums[0] += (u64)(u32)(m0 + key[ 0]) * (u32)(m2 + key[ 2]);
+ sums[1] += (u64)(u32)(m0 + key[ 4]) * (u32)(m2 + key[ 6]);
+ sums[2] += (u64)(u32)(m0 + key[ 8]) * (u32)(m2 + key[10]);
+ sums[3] += (u64)(u32)(m0 + key[12]) * (u32)(m2 + key[14]);
+ sums[0] += (u64)(u32)(m1 + key[ 1]) * (u32)(m3 + key[ 3]);
+ sums[1] += (u64)(u32)(m1 + key[ 5]) * (u32)(m3 + key[ 7]);
+ sums[2] += (u64)(u32)(m1 + key[ 9]) * (u32)(m3 + key[11]);
+ sums[3] += (u64)(u32)(m1 + key[13]) * (u32)(m3 + key[15]);
+ key += NH_MESSAGE_UNIT / sizeof(key[0]);
+ message += NH_MESSAGE_UNIT;
+ message_len -= NH_MESSAGE_UNIT;
+ }
+
+ hash[0] = cpu_to_le64(sums[0]);
+ hash[1] = cpu_to_le64(sums[1]);
+ hash[2] = cpu_to_le64(sums[2]);
+ hash[3] = cpu_to_le64(sums[3]);
+}
+
+/* Pass the next NH hash value through Poly1305 */
+static void process_nh_hash_value(struct nhpoly1305_state *state,
+ const struct nhpoly1305_key *key)
+{
+ BUILD_BUG_ON(NH_HASH_BYTES % POLY1305_BLOCK_SIZE != 0);
+
+ poly1305_core_blocks(&state->poly_state, &key->poly_key, state->nh_hash,
+ NH_HASH_BYTES / POLY1305_BLOCK_SIZE);
+}
+
+/*
+ * Feed the next portion of the source data, as a whole number of 16-byte
+ * "NH message units", through NH and Poly1305. Each NH hash is taken over
+ * 1024 bytes, except possibly the final one which is taken over a multiple of
+ * 16 bytes up to 1024. Also, in the case where data is passed in misaligned
+ * chunks, we combine partial hashes; the end result is the same either way.
+ */
+static void nhpoly1305_units(struct nhpoly1305_state *state,
+ const struct nhpoly1305_key *key,
+ const u8 *src, unsigned int srclen, nh_t nh_fn)
+{
+ do {
+ unsigned int bytes;
+
+ if (state->nh_remaining == 0) {
+ /* Starting a new NH message */
+ bytes = min_t(unsigned int, srclen, NH_MESSAGE_BYTES);
+ nh_fn(key->nh_key, src, bytes, state->nh_hash);
+ state->nh_remaining = NH_MESSAGE_BYTES - bytes;
+ } else {
+ /* Continuing a previous NH message */
+ __le64 tmp_hash[NH_NUM_PASSES];
+ unsigned int pos;
+ int i;
+
+ pos = NH_MESSAGE_BYTES - state->nh_remaining;
+ bytes = min(srclen, state->nh_remaining);
+ nh_fn(&key->nh_key[pos / 4], src, bytes, tmp_hash);
+ for (i = 0; i < NH_NUM_PASSES; i++)
+ le64_add_cpu(&state->nh_hash[i],
+ le64_to_cpu(tmp_hash[i]));
+ state->nh_remaining -= bytes;
+ }
+ if (state->nh_remaining == 0)
+ process_nh_hash_value(state, key);
+ src += bytes;
+ srclen -= bytes;
+ } while (srclen);
+}
+
+int crypto_nhpoly1305_setkey(struct crypto_shash *tfm,
+ const u8 *key, unsigned int keylen)
+{
+ struct nhpoly1305_key *ctx = crypto_shash_ctx(tfm);
+ int i;
+
+ if (keylen != NHPOLY1305_KEY_SIZE)
+ return -EINVAL;
+
+ poly1305_core_setkey(&ctx->poly_key, key);
+ key += POLY1305_BLOCK_SIZE;
+
+ for (i = 0; i < NH_KEY_WORDS; i++)
+ ctx->nh_key[i] = get_unaligned_le32(key + i * sizeof(u32));
+
+ return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_setkey);
+
+int crypto_nhpoly1305_init(struct shash_desc *desc)
+{
+ struct nhpoly1305_state *state = shash_desc_ctx(desc);
+
+ poly1305_core_init(&state->poly_state);
+ state->buflen = 0;
+ state->nh_remaining = 0;
+ return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_init);
+
+int crypto_nhpoly1305_update_helper(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen,
+ nh_t nh_fn)
+{
+ struct nhpoly1305_state *state = shash_desc_ctx(desc);
+ const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm);
+ unsigned int bytes;
+
+ if (state->buflen) {
+ bytes = min(srclen, (int)NH_MESSAGE_UNIT - state->buflen);
+ memcpy(&state->buffer[state->buflen], src, bytes);
+ state->buflen += bytes;
+ if (state->buflen < NH_MESSAGE_UNIT)
+ return 0;
+ nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT,
+ nh_fn);
+ state->buflen = 0;
+ src += bytes;
+ srclen -= bytes;
+ }
+
+ if (srclen >= NH_MESSAGE_UNIT) {
+ bytes = round_down(srclen, NH_MESSAGE_UNIT);
+ nhpoly1305_units(state, key, src, bytes, nh_fn);
+ src += bytes;
+ srclen -= bytes;
+ }
+
+ if (srclen) {
+ memcpy(state->buffer, src, srclen);
+ state->buflen = srclen;
+ }
+ return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_update_helper);
+
+int crypto_nhpoly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ return crypto_nhpoly1305_update_helper(desc, src, srclen, nh_generic);
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_update);
+
+int crypto_nhpoly1305_final_helper(struct shash_desc *desc, u8 *dst, nh_t nh_fn)
+{
+ struct nhpoly1305_state *state = shash_desc_ctx(desc);
+ const struct nhpoly1305_key *key = crypto_shash_ctx(desc->tfm);
+
+ if (state->buflen) {
+ memset(&state->buffer[state->buflen], 0,
+ NH_MESSAGE_UNIT - state->buflen);
+ nhpoly1305_units(state, key, state->buffer, NH_MESSAGE_UNIT,
+ nh_fn);
+ }
+
+ if (state->nh_remaining)
+ process_nh_hash_value(state, key);
+
+ poly1305_core_emit(&state->poly_state, dst);
+ return 0;
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_final_helper);
+
+int crypto_nhpoly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ return crypto_nhpoly1305_final_helper(desc, dst, nh_generic);
+}
+EXPORT_SYMBOL(crypto_nhpoly1305_final);
+
+static struct shash_alg nhpoly1305_alg = {
+ .base.cra_name = "nhpoly1305",
+ .base.cra_driver_name = "nhpoly1305-generic",
+ .base.cra_priority = 100,
+ .base.cra_ctxsize = sizeof(struct nhpoly1305_key),
+ .base.cra_module = THIS_MODULE,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .init = crypto_nhpoly1305_init,
+ .update = crypto_nhpoly1305_update,
+ .final = crypto_nhpoly1305_final,
+ .setkey = crypto_nhpoly1305_setkey,
+ .descsize = sizeof(struct nhpoly1305_state),
+};
+
+static int __init nhpoly1305_mod_init(void)
+{
+ return crypto_register_shash(&nhpoly1305_alg);
+}
+
+static void __exit nhpoly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&nhpoly1305_alg);
+}
+
+module_init(nhpoly1305_mod_init);
+module_exit(nhpoly1305_mod_exit);
+
+MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+MODULE_ALIAS_CRYPTO("nhpoly1305");
+MODULE_ALIAS_CRYPTO("nhpoly1305-generic");
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 8eb3c4c9ff67..d47cfc47b1b1 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -394,7 +394,7 @@ static int pcrypt_sysfs_add(struct padata_instance *pinst, const char *name)
int ret;
pinst->kobj.kset = pcrypt_kset;
- ret = kobject_add(&pinst->kobj, NULL, name);
+ ret = kobject_add(&pinst->kobj, NULL, "%s", name);
if (!ret)
kobject_uevent(&pinst->kobj, KOBJ_ADD);
diff --git a/crypto/poly1305_generic.c b/crypto/poly1305_generic.c
index 47d3a6b83931..2a06874204e8 100644
--- a/crypto/poly1305_generic.c
+++ b/crypto/poly1305_generic.c
@@ -38,7 +38,7 @@ int crypto_poly1305_init(struct shash_desc *desc)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- memset(dctx->h, 0, sizeof(dctx->h));
+ poly1305_core_init(&dctx->h);
dctx->buflen = 0;
dctx->rset = false;
dctx->sset = false;
@@ -47,23 +47,16 @@ int crypto_poly1305_init(struct shash_desc *desc)
}
EXPORT_SYMBOL_GPL(crypto_poly1305_init);
-static void poly1305_setrkey(struct poly1305_desc_ctx *dctx, const u8 *key)
+void poly1305_core_setkey(struct poly1305_key *key, const u8 *raw_key)
{
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
- dctx->r[0] = (get_unaligned_le32(key + 0) >> 0) & 0x3ffffff;
- dctx->r[1] = (get_unaligned_le32(key + 3) >> 2) & 0x3ffff03;
- dctx->r[2] = (get_unaligned_le32(key + 6) >> 4) & 0x3ffc0ff;
- dctx->r[3] = (get_unaligned_le32(key + 9) >> 6) & 0x3f03fff;
- dctx->r[4] = (get_unaligned_le32(key + 12) >> 8) & 0x00fffff;
-}
-
-static void poly1305_setskey(struct poly1305_desc_ctx *dctx, const u8 *key)
-{
- dctx->s[0] = get_unaligned_le32(key + 0);
- dctx->s[1] = get_unaligned_le32(key + 4);
- dctx->s[2] = get_unaligned_le32(key + 8);
- dctx->s[3] = get_unaligned_le32(key + 12);
+ key->r[0] = (get_unaligned_le32(raw_key + 0) >> 0) & 0x3ffffff;
+ key->r[1] = (get_unaligned_le32(raw_key + 3) >> 2) & 0x3ffff03;
+ key->r[2] = (get_unaligned_le32(raw_key + 6) >> 4) & 0x3ffc0ff;
+ key->r[3] = (get_unaligned_le32(raw_key + 9) >> 6) & 0x3f03fff;
+ key->r[4] = (get_unaligned_le32(raw_key + 12) >> 8) & 0x00fffff;
}
+EXPORT_SYMBOL_GPL(poly1305_core_setkey);
/*
* Poly1305 requires a unique key for each tag, which implies that we can't set
@@ -75,13 +68,16 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
{
if (!dctx->sset) {
if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_setrkey(dctx, src);
+ poly1305_core_setkey(&dctx->r, src);
src += POLY1305_BLOCK_SIZE;
srclen -= POLY1305_BLOCK_SIZE;
dctx->rset = true;
}
if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_setskey(dctx, src);
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
src += POLY1305_BLOCK_SIZE;
srclen -= POLY1305_BLOCK_SIZE;
dctx->sset = true;
@@ -91,41 +87,37 @@ unsigned int crypto_poly1305_setdesckey(struct poly1305_desc_ctx *dctx,
}
EXPORT_SYMBOL_GPL(crypto_poly1305_setdesckey);
-static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
- const u8 *src, unsigned int srclen,
- u32 hibit)
+static void poly1305_blocks_internal(struct poly1305_state *state,
+ const struct poly1305_key *key,
+ const void *src, unsigned int nblocks,
+ u32 hibit)
{
u32 r0, r1, r2, r3, r4;
u32 s1, s2, s3, s4;
u32 h0, h1, h2, h3, h4;
u64 d0, d1, d2, d3, d4;
- unsigned int datalen;
- if (unlikely(!dctx->sset)) {
- datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
- src += srclen - datalen;
- srclen = datalen;
- }
+ if (!nblocks)
+ return;
- r0 = dctx->r[0];
- r1 = dctx->r[1];
- r2 = dctx->r[2];
- r3 = dctx->r[3];
- r4 = dctx->r[4];
+ r0 = key->r[0];
+ r1 = key->r[1];
+ r2 = key->r[2];
+ r3 = key->r[3];
+ r4 = key->r[4];
s1 = r1 * 5;
s2 = r2 * 5;
s3 = r3 * 5;
s4 = r4 * 5;
- h0 = dctx->h[0];
- h1 = dctx->h[1];
- h2 = dctx->h[2];
- h3 = dctx->h[3];
- h4 = dctx->h[4];
-
- while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
+ h0 = state->h[0];
+ h1 = state->h[1];
+ h2 = state->h[2];
+ h3 = state->h[3];
+ h4 = state->h[4];
+ do {
/* h += m[i] */
h0 += (get_unaligned_le32(src + 0) >> 0) & 0x3ffffff;
h1 += (get_unaligned_le32(src + 3) >> 2) & 0x3ffffff;
@@ -154,16 +146,36 @@ static unsigned int poly1305_blocks(struct poly1305_desc_ctx *dctx,
h1 += h0 >> 26; h0 = h0 & 0x3ffffff;
src += POLY1305_BLOCK_SIZE;
- srclen -= POLY1305_BLOCK_SIZE;
- }
+ } while (--nblocks);
- dctx->h[0] = h0;
- dctx->h[1] = h1;
- dctx->h[2] = h2;
- dctx->h[3] = h3;
- dctx->h[4] = h4;
+ state->h[0] = h0;
+ state->h[1] = h1;
+ state->h[2] = h2;
+ state->h[3] = h3;
+ state->h[4] = h4;
+}
- return srclen;
+void poly1305_core_blocks(struct poly1305_state *state,
+ const struct poly1305_key *key,
+ const void *src, unsigned int nblocks)
+{
+ poly1305_blocks_internal(state, key, src, nblocks, 1 << 24);
+}
+EXPORT_SYMBOL_GPL(poly1305_core_blocks);
+
+static void poly1305_blocks(struct poly1305_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen, u32 hibit)
+{
+ unsigned int datalen;
+
+ if (unlikely(!dctx->sset)) {
+ datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+ src += srclen - datalen;
+ srclen = datalen;
+ }
+
+ poly1305_blocks_internal(&dctx->h, &dctx->r,
+ src, srclen / POLY1305_BLOCK_SIZE, hibit);
}
int crypto_poly1305_update(struct shash_desc *desc,
@@ -187,9 +199,9 @@ int crypto_poly1305_update(struct shash_desc *desc,
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- bytes = poly1305_blocks(dctx, src, srclen, 1 << 24);
- src += srclen - bytes;
- srclen = bytes;
+ poly1305_blocks(dctx, src, srclen, 1 << 24);
+ src += srclen - (srclen % POLY1305_BLOCK_SIZE);
+ srclen %= POLY1305_BLOCK_SIZE;
}
if (unlikely(srclen)) {
@@ -201,30 +213,18 @@ int crypto_poly1305_update(struct shash_desc *desc,
}
EXPORT_SYMBOL_GPL(crypto_poly1305_update);
-int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+void poly1305_core_emit(const struct poly1305_state *state, void *dst)
{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
u32 h0, h1, h2, h3, h4;
u32 g0, g1, g2, g3, g4;
u32 mask;
- u64 f = 0;
-
- if (unlikely(!dctx->sset))
- return -ENOKEY;
-
- if (unlikely(dctx->buflen)) {
- dctx->buf[dctx->buflen++] = 1;
- memset(dctx->buf + dctx->buflen, 0,
- POLY1305_BLOCK_SIZE - dctx->buflen);
- poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
- }
/* fully carry h */
- h0 = dctx->h[0];
- h1 = dctx->h[1];
- h2 = dctx->h[2];
- h3 = dctx->h[3];
- h4 = dctx->h[4];
+ h0 = state->h[0];
+ h1 = state->h[1];
+ h2 = state->h[2];
+ h3 = state->h[3];
+ h4 = state->h[4];
h2 += (h1 >> 26); h1 = h1 & 0x3ffffff;
h3 += (h2 >> 26); h2 = h2 & 0x3ffffff;
@@ -254,16 +254,40 @@ int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
h4 = (h4 & mask) | g4;
/* h = h % (2^128) */
- h0 = (h0 >> 0) | (h1 << 26);
- h1 = (h1 >> 6) | (h2 << 20);
- h2 = (h2 >> 12) | (h3 << 14);
- h3 = (h3 >> 18) | (h4 << 8);
+ put_unaligned_le32((h0 >> 0) | (h1 << 26), dst + 0);
+ put_unaligned_le32((h1 >> 6) | (h2 << 20), dst + 4);
+ put_unaligned_le32((h2 >> 12) | (h3 << 14), dst + 8);
+ put_unaligned_le32((h3 >> 18) | (h4 << 8), dst + 12);
+}
+EXPORT_SYMBOL_GPL(poly1305_core_emit);
+
+int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_core_emit(&dctx->h, digest);
/* mac = (h + s) % (2^128) */
- f = (f >> 32) + h0 + dctx->s[0]; put_unaligned_le32(f, dst + 0);
- f = (f >> 32) + h1 + dctx->s[1]; put_unaligned_le32(f, dst + 4);
- f = (f >> 32) + h2 + dctx->s[2]; put_unaligned_le32(f, dst + 8);
- f = (f >> 32) + h3 + dctx->s[3]; put_unaligned_le32(f, dst + 12);
+ f = (f >> 32) + le32_to_cpu(digest[0]) + dctx->s[0];
+ put_unaligned_le32(f, dst + 0);
+ f = (f >> 32) + le32_to_cpu(digest[1]) + dctx->s[1];
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]) + dctx->s[2];
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]) + dctx->s[3];
+ put_unaligned_le32(f, dst + 12);
return 0;
}
diff --git a/crypto/rng.c b/crypto/rng.c
index 547f16ecbfb0..33c38a72bff5 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -35,9 +35,11 @@ static int crypto_default_rng_refcnt;
int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
{
+ struct crypto_alg *alg = tfm->base.__crt_alg;
u8 *buf = NULL;
int err;
+ crypto_stats_get(alg);
if (!seed && slen) {
buf = kmalloc(slen, GFP_KERNEL);
if (!buf)
@@ -50,7 +52,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
}
err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
- crypto_stat_rng_seed(tfm, err);
+ crypto_stats_rng_seed(alg, err);
out:
kzfree(buf);
return err;
@@ -74,17 +76,13 @@ static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_rng rrng;
- strncpy(rrng.type, "rng", sizeof(rrng.type));
+ memset(&rrng, 0, sizeof(rrng));
- rrng.seedsize = seedsize(alg);
+ strscpy(rrng.type, "rng", sizeof(rrng.type));
- if (nla_put(skb, CRYPTOCFGA_REPORT_RNG,
- sizeof(struct crypto_report_rng), &rrng))
- goto nla_put_failure;
- return 0;
+ rrng.seedsize = seedsize(alg);
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_RNG, sizeof(rrng), &rrng);
}
#else
static int crypto_rng_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index 8c77bc78a09f..00fce32ae17a 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -159,7 +159,7 @@ static int salsa20_crypt(struct skcipher_request *req)
u32 state[16];
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
salsa20_init(state, ctx, walk.iv);
diff --git a/crypto/scompress.c b/crypto/scompress.c
index 968bbcf65c94..6f8305f8c300 100644
--- a/crypto/scompress.c
+++ b/crypto/scompress.c
@@ -40,15 +40,12 @@ static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
{
struct crypto_report_comp rscomp;
- strncpy(rscomp.type, "scomp", sizeof(rscomp.type));
+ memset(&rscomp, 0, sizeof(rscomp));
- if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
- sizeof(struct crypto_report_comp), &rscomp))
- goto nla_put_failure;
- return 0;
+ strscpy(rscomp.type, "scomp", sizeof(rscomp.type));
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
+ sizeof(rscomp), &rscomp);
}
#else
static int crypto_scomp_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/shash.c b/crypto/shash.c
index d21f04d70dce..44d297b82a8f 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -408,18 +408,14 @@ static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
struct crypto_report_hash rhash;
struct shash_alg *salg = __crypto_shash_alg(alg);
- strncpy(rhash.type, "shash", sizeof(rhash.type));
+ memset(&rhash, 0, sizeof(rhash));
+
+ strscpy(rhash.type, "shash", sizeof(rhash.type));
rhash.blocksize = alg->cra_blocksize;
rhash.digestsize = salg->digestsize;
- if (nla_put(skb, CRYPTOCFGA_REPORT_HASH,
- sizeof(struct crypto_report_hash), &rhash))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_HASH, sizeof(rhash), &rhash);
}
#else
static int crypto_shash_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 4caab81d2d02..2a969296bc24 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -474,6 +474,8 @@ int skcipher_walk_virt(struct skcipher_walk *walk,
{
int err;
+ might_sleep_if(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP);
+
walk->flags &= ~SKCIPHER_WALK_PHYS;
err = skcipher_walk_skcipher(walk, req);
@@ -577,8 +579,7 @@ static unsigned int crypto_skcipher_extsize(struct crypto_alg *alg)
if (alg->cra_type == &crypto_blkcipher_type)
return sizeof(struct crypto_blkcipher *);
- if (alg->cra_type == &crypto_ablkcipher_type ||
- alg->cra_type == &crypto_givcipher_type)
+ if (alg->cra_type == &crypto_ablkcipher_type)
return sizeof(struct crypto_ablkcipher *);
return crypto_alg_extsize(alg);
@@ -842,8 +843,7 @@ static int crypto_skcipher_init_tfm(struct crypto_tfm *tfm)
if (tfm->__crt_alg->cra_type == &crypto_blkcipher_type)
return crypto_init_skcipher_ops_blkcipher(tfm);
- if (tfm->__crt_alg->cra_type == &crypto_ablkcipher_type ||
- tfm->__crt_alg->cra_type == &crypto_givcipher_type)
+ if (tfm->__crt_alg->cra_type == &crypto_ablkcipher_type)
return crypto_init_skcipher_ops_ablkcipher(tfm);
skcipher->setkey = skcipher_setkey;
@@ -897,21 +897,18 @@ static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
struct skcipher_alg *skcipher = container_of(alg, struct skcipher_alg,
base);
- strncpy(rblkcipher.type, "skcipher", sizeof(rblkcipher.type));
- strncpy(rblkcipher.geniv, "<none>", sizeof(rblkcipher.geniv));
+ memset(&rblkcipher, 0, sizeof(rblkcipher));
+
+ strscpy(rblkcipher.type, "skcipher", sizeof(rblkcipher.type));
+ strscpy(rblkcipher.geniv, "<none>", sizeof(rblkcipher.geniv));
rblkcipher.blocksize = alg->cra_blocksize;
rblkcipher.min_keysize = skcipher->min_keysize;
rblkcipher.max_keysize = skcipher->max_keysize;
rblkcipher.ivsize = skcipher->ivsize;
- if (nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
- sizeof(struct crypto_report_blkcipher), &rblkcipher))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -EMSGSIZE;
+ return nla_put(skb, CRYPTOCFGA_REPORT_BLKCIPHER,
+ sizeof(rblkcipher), &rblkcipher);
}
#else
static int crypto_skcipher_report(struct sk_buff *skb, struct crypto_alg *alg)
diff --git a/crypto/streebog_generic.c b/crypto/streebog_generic.c
new file mode 100644
index 000000000000..03272a22afce
--- /dev/null
+++ b/crypto/streebog_generic.c
@@ -0,0 +1,1140 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-2-Clause
+/*
+ * Streebog hash function as specified by GOST R 34.11-2012 and
+ * described at https://tools.ietf.org/html/rfc6986
+ *
+ * Copyright (c) 2013 Alexey Degtyarev <alexey@renatasystems.org>
+ * Copyright (c) 2018 Vitaly Chikunov <vt@altlinux.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/internal/hash.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <crypto/streebog.h>
+
+static const struct streebog_uint512 buffer0 = { {
+ 0, 0, 0, 0, 0, 0, 0, 0
+} };
+
+static const struct streebog_uint512 buffer512 = { {
+ cpu_to_le64(0x200), 0, 0, 0, 0, 0, 0, 0
+} };
+
+static const struct streebog_uint512 C[12] = {
+ { {
+ cpu_to_le64(0xdd806559f2a64507ULL),
+ cpu_to_le64(0x05767436cc744d23ULL),
+ cpu_to_le64(0xa2422a08a460d315ULL),
+ cpu_to_le64(0x4b7ce09192676901ULL),
+ cpu_to_le64(0x714eb88d7585c4fcULL),
+ cpu_to_le64(0x2f6a76432e45d016ULL),
+ cpu_to_le64(0xebcb2f81c0657c1fULL),
+ cpu_to_le64(0xb1085bda1ecadae9ULL)
+ } },
+ { {
+ cpu_to_le64(0xe679047021b19bb7ULL),
+ cpu_to_le64(0x55dda21bd7cbcd56ULL),
+ cpu_to_le64(0x5cb561c2db0aa7caULL),
+ cpu_to_le64(0x9ab5176b12d69958ULL),
+ cpu_to_le64(0x61d55e0f16b50131ULL),
+ cpu_to_le64(0xf3feea720a232b98ULL),
+ cpu_to_le64(0x4fe39d460f70b5d7ULL),
+ cpu_to_le64(0x6fa3b58aa99d2f1aULL)
+ } },
+ { {
+ cpu_to_le64(0x991e96f50aba0ab2ULL),
+ cpu_to_le64(0xc2b6f443867adb31ULL),
+ cpu_to_le64(0xc1c93a376062db09ULL),
+ cpu_to_le64(0xd3e20fe490359eb1ULL),
+ cpu_to_le64(0xf2ea7514b1297b7bULL),
+ cpu_to_le64(0x06f15e5f529c1f8bULL),
+ cpu_to_le64(0x0a39fc286a3d8435ULL),
+ cpu_to_le64(0xf574dcac2bce2fc7ULL)
+ } },
+ { {
+ cpu_to_le64(0x220cbebc84e3d12eULL),
+ cpu_to_le64(0x3453eaa193e837f1ULL),
+ cpu_to_le64(0xd8b71333935203beULL),
+ cpu_to_le64(0xa9d72c82ed03d675ULL),
+ cpu_to_le64(0x9d721cad685e353fULL),
+ cpu_to_le64(0x488e857e335c3c7dULL),
+ cpu_to_le64(0xf948e1a05d71e4ddULL),
+ cpu_to_le64(0xef1fdfb3e81566d2ULL)
+ } },
+ { {
+ cpu_to_le64(0x601758fd7c6cfe57ULL),
+ cpu_to_le64(0x7a56a27ea9ea63f5ULL),
+ cpu_to_le64(0xdfff00b723271a16ULL),
+ cpu_to_le64(0xbfcd1747253af5a3ULL),
+ cpu_to_le64(0x359e35d7800fffbdULL),
+ cpu_to_le64(0x7f151c1f1686104aULL),
+ cpu_to_le64(0x9a3f410c6ca92363ULL),
+ cpu_to_le64(0x4bea6bacad474799ULL)
+ } },
+ { {
+ cpu_to_le64(0xfa68407a46647d6eULL),
+ cpu_to_le64(0xbf71c57236904f35ULL),
+ cpu_to_le64(0x0af21f66c2bec6b6ULL),
+ cpu_to_le64(0xcffaa6b71c9ab7b4ULL),
+ cpu_to_le64(0x187f9ab49af08ec6ULL),
+ cpu_to_le64(0x2d66c4f95142a46cULL),
+ cpu_to_le64(0x6fa4c33b7a3039c0ULL),
+ cpu_to_le64(0xae4faeae1d3ad3d9ULL)
+ } },
+ { {
+ cpu_to_le64(0x8886564d3a14d493ULL),
+ cpu_to_le64(0x3517454ca23c4af3ULL),
+ cpu_to_le64(0x06476983284a0504ULL),
+ cpu_to_le64(0x0992abc52d822c37ULL),
+ cpu_to_le64(0xd3473e33197a93c9ULL),
+ cpu_to_le64(0x399ec6c7e6bf87c9ULL),
+ cpu_to_le64(0x51ac86febf240954ULL),
+ cpu_to_le64(0xf4c70e16eeaac5ecULL)
+ } },
+ { {
+ cpu_to_le64(0xa47f0dd4bf02e71eULL),
+ cpu_to_le64(0x36acc2355951a8d9ULL),
+ cpu_to_le64(0x69d18d2bd1a5c42fULL),
+ cpu_to_le64(0xf4892bcb929b0690ULL),
+ cpu_to_le64(0x89b4443b4ddbc49aULL),
+ cpu_to_le64(0x4eb7f8719c36de1eULL),
+ cpu_to_le64(0x03e7aa020c6e4141ULL),
+ cpu_to_le64(0x9b1f5b424d93c9a7ULL)
+ } },
+ { {
+ cpu_to_le64(0x7261445183235adbULL),
+ cpu_to_le64(0x0e38dc92cb1f2a60ULL),
+ cpu_to_le64(0x7b2b8a9aa6079c54ULL),
+ cpu_to_le64(0x800a440bdbb2ceb1ULL),
+ cpu_to_le64(0x3cd955b7e00d0984ULL),
+ cpu_to_le64(0x3a7d3a1b25894224ULL),
+ cpu_to_le64(0x944c9ad8ec165fdeULL),
+ cpu_to_le64(0x378f5a541631229bULL)
+ } },
+ { {
+ cpu_to_le64(0x74b4c7fb98459cedULL),
+ cpu_to_le64(0x3698fad1153bb6c3ULL),
+ cpu_to_le64(0x7a1e6c303b7652f4ULL),
+ cpu_to_le64(0x9fe76702af69334bULL),
+ cpu_to_le64(0x1fffe18a1b336103ULL),
+ cpu_to_le64(0x8941e71cff8a78dbULL),
+ cpu_to_le64(0x382ae548b2e4f3f3ULL),
+ cpu_to_le64(0xabbedea680056f52ULL)
+ } },
+ { {
+ cpu_to_le64(0x6bcaa4cd81f32d1bULL),
+ cpu_to_le64(0xdea2594ac06fd85dULL),
+ cpu_to_le64(0xefbacd1d7d476e98ULL),
+ cpu_to_le64(0x8a1d71efea48b9caULL),
+ cpu_to_le64(0x2001802114846679ULL),
+ cpu_to_le64(0xd8fa6bbbebab0761ULL),
+ cpu_to_le64(0x3002c6cd635afe94ULL),
+ cpu_to_le64(0x7bcd9ed0efc889fbULL)
+ } },
+ { {
+ cpu_to_le64(0x48bc924af11bd720ULL),
+ cpu_to_le64(0xfaf417d5d9b21b99ULL),
+ cpu_to_le64(0xe71da4aa88e12852ULL),
+ cpu_to_le64(0x5d80ef9d1891cc86ULL),
+ cpu_to_le64(0xf82012d430219f9bULL),
+ cpu_to_le64(0xcda43c32bcdf1d77ULL),
+ cpu_to_le64(0xd21380b00449b17aULL),
+ cpu_to_le64(0x378ee767f11631baULL)
+ } }
+};
+
+static const u8 Tau[64] = {
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 1, 9, 17, 25, 33, 41, 49, 57,
+ 2, 10, 18, 26, 34, 42, 50, 58,
+ 3, 11, 19, 27, 35, 43, 51, 59,
+ 4, 12, 20, 28, 36, 44, 52, 60,
+ 5, 13, 21, 29, 37, 45, 53, 61,
+ 6, 14, 22, 30, 38, 46, 54, 62,
+ 7, 15, 23, 31, 39, 47, 55, 63
+};
+
+static const u8 Pi[256] = {
+ 252, 238, 221, 17, 207, 110, 49, 22,
+ 251, 196, 250, 218, 35, 197, 4, 77,
+ 233, 119, 240, 219, 147, 46, 153, 186,
+ 23, 54, 241, 187, 20, 205, 95, 193,
+ 249, 24, 101, 90, 226, 92, 239, 33,
+ 129, 28, 60, 66, 139, 1, 142, 79,
+ 5, 132, 2, 174, 227, 106, 143, 160,
+ 6, 11, 237, 152, 127, 212, 211, 31,
+ 235, 52, 44, 81, 234, 200, 72, 171,
+ 242, 42, 104, 162, 253, 58, 206, 204,
+ 181, 112, 14, 86, 8, 12, 118, 18,
+ 191, 114, 19, 71, 156, 183, 93, 135,
+ 21, 161, 150, 41, 16, 123, 154, 199,
+ 243, 145, 120, 111, 157, 158, 178, 177,
+ 50, 117, 25, 61, 255, 53, 138, 126,
+ 109, 84, 198, 128, 195, 189, 13, 87,
+ 223, 245, 36, 169, 62, 168, 67, 201,
+ 215, 121, 214, 246, 124, 34, 185, 3,
+ 224, 15, 236, 222, 122, 148, 176, 188,
+ 220, 232, 40, 80, 78, 51, 10, 74,
+ 167, 151, 96, 115, 30, 0, 98, 68,
+ 26, 184, 56, 130, 100, 159, 38, 65,
+ 173, 69, 70, 146, 39, 94, 85, 47,
+ 140, 163, 165, 125, 105, 213, 149, 59,
+ 7, 88, 179, 64, 134, 172, 29, 247,
+ 48, 55, 107, 228, 136, 217, 231, 137,
+ 225, 27, 131, 73, 76, 63, 248, 254,
+ 141, 83, 170, 144, 202, 216, 133, 97,
+ 32, 113, 103, 164, 45, 43, 9, 91,
+ 203, 155, 37, 208, 190, 229, 108, 82,
+ 89, 166, 116, 210, 230, 244, 180, 192,
+ 209, 102, 175, 194, 57, 75, 99, 182
+};
+
+static const unsigned long long Ax[8][256] = {
+ {
+ 0xd01f715b5c7ef8e6ULL, 0x16fa240980778325ULL, 0xa8a42e857ee049c8ULL,
+ 0x6ac1068fa186465bULL, 0x6e417bd7a2e9320bULL, 0x665c8167a437daabULL,
+ 0x7666681aa89617f6ULL, 0x4b959163700bdcf5ULL, 0xf14be6b78df36248ULL,
+ 0xc585bd689a625cffULL, 0x9557d7fca67d82cbULL, 0x89f0b969af6dd366ULL,
+ 0xb0833d48749f6c35ULL, 0xa1998c23b1ecbc7cULL, 0x8d70c431ac02a736ULL,
+ 0xd6dfbc2fd0a8b69eULL, 0x37aeb3e551fa198bULL, 0x0b7d128a40b5cf9cULL,
+ 0x5a8f2008b5780cbcULL, 0xedec882284e333e5ULL, 0xd25fc177d3c7c2ceULL,
+ 0x5e0f5d50b61778ecULL, 0x1d873683c0c24cb9ULL, 0xad040bcbb45d208cULL,
+ 0x2f89a0285b853c76ULL, 0x5732fff6791b8d58ULL, 0x3e9311439ef6ec3fULL,
+ 0xc9183a809fd3c00fULL, 0x83adf3f5260a01eeULL, 0xa6791941f4e8ef10ULL,
+ 0x103ae97d0ca1cd5dULL, 0x2ce948121dee1b4aULL, 0x39738421dbf2bf53ULL,
+ 0x093da2a6cf0cf5b4ULL, 0xcd9847d89cbcb45fULL, 0xf9561c078b2d8ae8ULL,
+ 0x9c6a755a6971777fULL, 0xbc1ebaa0712ef0c5ULL, 0x72e61542abf963a6ULL,
+ 0x78bb5fde229eb12eULL, 0x14ba94250fceb90dULL, 0x844d6697630e5282ULL,
+ 0x98ea08026a1e032fULL, 0xf06bbea144217f5cULL, 0xdb6263d11ccb377aULL,
+ 0x641c314b2b8ee083ULL, 0x320e96ab9b4770cfULL, 0x1ee7deb986a96b85ULL,
+ 0xe96cf57a878c47b5ULL, 0xfdd6615f8842feb8ULL, 0xc83862965601dd1bULL,
+ 0x2ea9f83e92572162ULL, 0xf876441142ff97fcULL, 0xeb2c455608357d9dULL,
+ 0x5612a7e0b0c9904cULL, 0x6c01cbfb2d500823ULL, 0x4548a6a7fa037a2dULL,
+ 0xabc4c6bf388b6ef4ULL, 0xbade77d4fdf8bebdULL, 0x799b07c8eb4cac3aULL,
+ 0x0c9d87e805b19cf0ULL, 0xcb588aac106afa27ULL, 0xea0c1d40c1e76089ULL,
+ 0x2869354a1e816f1aULL, 0xff96d17307fbc490ULL, 0x9f0a9d602f1a5043ULL,
+ 0x96373fc6e016a5f7ULL, 0x5292dab8b3a6e41cULL, 0x9b8ae0382c752413ULL,
+ 0x4f15ec3b7364a8a5ULL, 0x3fb349555724f12bULL, 0xc7c50d4415db66d7ULL,
+ 0x92b7429ee379d1a7ULL, 0xd37f99611a15dfdaULL, 0x231427c05e34a086ULL,
+ 0xa439a96d7b51d538ULL, 0xb403401077f01865ULL, 0xdda2aea5901d7902ULL,
+ 0x0a5d4a9c8967d288ULL, 0xc265280adf660f93ULL, 0x8bb0094520d4e94eULL,
+ 0x2a29856691385532ULL, 0x42a833c5bf072941ULL, 0x73c64d54622b7eb2ULL,
+ 0x07e095624504536cULL, 0x8a905153e906f45aULL, 0x6f6123c16b3b2f1fULL,
+ 0xc6e55552dc097bc3ULL, 0x4468feb133d16739ULL, 0xe211e7f0c7398829ULL,
+ 0xa2f96419f7879b40ULL, 0x19074bdbc3ad38e9ULL, 0xf4ebc3f9474e0b0cULL,
+ 0x43886bd376d53455ULL, 0xd8028beb5aa01046ULL, 0x51f23282f5cdc320ULL,
+ 0xe7b1c2be0d84e16dULL, 0x081dfab006dee8a0ULL, 0x3b33340d544b857bULL,
+ 0x7f5bcabc679ae242ULL, 0x0edd37c48a08a6d8ULL, 0x81ed43d9a9b33bc6ULL,
+ 0xb1a3655ebd4d7121ULL, 0x69a1eeb5e7ed6167ULL, 0xf6ab73d5c8f73124ULL,
+ 0x1a67a3e185c61fd5ULL, 0x2dc91004d43c065eULL, 0x0240b02c8fb93a28ULL,
+ 0x90f7f2b26cc0eb8fULL, 0x3cd3a16f114fd617ULL, 0xaae49ea9f15973e0ULL,
+ 0x06c0cd748cd64e78ULL, 0xda423bc7d5192a6eULL, 0xc345701c16b41287ULL,
+ 0x6d2193ede4821537ULL, 0xfcf639494190e3acULL, 0x7c3b228621f1c57eULL,
+ 0xfb16ac2b0494b0c0ULL, 0xbf7e529a3745d7f9ULL, 0x6881b6a32e3f7c73ULL,
+ 0xca78d2bad9b8e733ULL, 0xbbfe2fc2342aa3a9ULL, 0x0dbddffecc6381e4ULL,
+ 0x70a6a56e2440598eULL, 0xe4d12a844befc651ULL, 0x8c509c2765d0ba22ULL,
+ 0xee8c6018c28814d9ULL, 0x17da7c1f49a59e31ULL, 0x609c4c1328e194d3ULL,
+ 0xb3e3d57232f44b09ULL, 0x91d7aaa4a512f69bULL, 0x0ffd6fd243dabbccULL,
+ 0x50d26a943c1fde34ULL, 0x6be15e9968545b4fULL, 0x94778fea6faf9fdfULL,
+ 0x2b09dd7058ea4826ULL, 0x677cd9716de5c7bfULL, 0x49d5214fffb2e6ddULL,
+ 0x0360e83a466b273cULL, 0x1fc786af4f7b7691ULL, 0xa0b9d435783ea168ULL,
+ 0xd49f0c035f118cb6ULL, 0x01205816c9d21d14ULL, 0xac2453dd7d8f3d98ULL,
+ 0x545217cc3f70aa64ULL, 0x26b4028e9489c9c2ULL, 0xdec2469fd6765e3eULL,
+ 0x04807d58036f7450ULL, 0xe5f17292823ddb45ULL, 0xf30b569b024a5860ULL,
+ 0x62dcfc3fa758aefbULL, 0xe84cad6c4e5e5aa1ULL, 0xccb81fce556ea94bULL,
+ 0x53b282ae7a74f908ULL, 0x1b47fbf74c1402c1ULL, 0x368eebf39828049fULL,
+ 0x7afbeff2ad278b06ULL, 0xbe5e0a8cfe97caedULL, 0xcfd8f7f413058e77ULL,
+ 0xf78b2bc301252c30ULL, 0x4d555c17fcdd928dULL, 0x5f2f05467fc565f8ULL,
+ 0x24f4b2a21b30f3eaULL, 0x860dd6bbecb768aaULL, 0x4c750401350f8f99ULL,
+ 0x0000000000000000ULL, 0xecccd0344d312ef1ULL, 0xb5231806be220571ULL,
+ 0xc105c030990d28afULL, 0x653c695de25cfd97ULL, 0x159acc33c61ca419ULL,
+ 0xb89ec7f872418495ULL, 0xa9847693b73254dcULL, 0x58cf90243ac13694ULL,
+ 0x59efc832f3132b80ULL, 0x5c4fed7c39ae42c4ULL, 0x828dabe3efd81cfaULL,
+ 0xd13f294d95ace5f2ULL, 0x7d1b7a90e823d86aULL, 0xb643f03cf849224dULL,
+ 0x3df3f979d89dcb03ULL, 0x7426d836272f2ddeULL, 0xdfe21e891fa4432aULL,
+ 0x3a136c1b9d99986fULL, 0xfa36f43dcd46add4ULL, 0xc025982650df35bbULL,
+ 0x856d3e81aadc4f96ULL, 0xc4a5e57e53b041ebULL, 0x4708168b75ba4005ULL,
+ 0xaf44bbe73be41aa4ULL, 0x971767d029c4b8e3ULL, 0xb9be9feebb939981ULL,
+ 0x215497ecd18d9aaeULL, 0x316e7e91dd2c57f3ULL, 0xcef8afe2dad79363ULL,
+ 0x3853dc371220a247ULL, 0x35ee03c9de4323a3ULL, 0xe6919aa8c456fc79ULL,
+ 0xe05157dc4880b201ULL, 0x7bdbb7e464f59612ULL, 0x127a59518318f775ULL,
+ 0x332ecebd52956ddbULL, 0x8f30741d23bb9d1eULL, 0xd922d3fd93720d52ULL,
+ 0x7746300c61440ae2ULL, 0x25d4eab4d2e2eefeULL, 0x75068020eefd30caULL,
+ 0x135a01474acaea61ULL, 0x304e268714fe4ae7ULL, 0xa519f17bb283c82cULL,
+ 0xdc82f6b359cf6416ULL, 0x5baf781e7caa11a8ULL, 0xb2c38d64fb26561dULL,
+ 0x34ce5bdf17913eb7ULL, 0x5d6fb56af07c5fd0ULL, 0x182713cd0a7f25fdULL,
+ 0x9e2ac576e6c84d57ULL, 0x9aaab82ee5a73907ULL, 0xa3d93c0f3e558654ULL,
+ 0x7e7b92aaae48ff56ULL, 0x872d8ead256575beULL, 0x41c8dbfff96c0e7dULL,
+ 0x99ca5014a3cc1e3bULL, 0x40e883e930be1369ULL, 0x1ca76e95091051adULL,
+ 0x4e35b42dbab6b5b1ULL, 0x05a0254ecabd6944ULL, 0xe1710fca8152af15ULL,
+ 0xf22b0e8dcb984574ULL, 0xb763a82a319b3f59ULL, 0x63fca4296e8ab3efULL,
+ 0x9d4a2d4ca0a36a6bULL, 0xe331bfe60eeb953dULL, 0xd5bf541596c391a2ULL,
+ 0xf5cb9bef8e9c1618ULL, 0x46284e9dbc685d11ULL, 0x2074cffa185f87baULL,
+ 0xbd3ee2b6b8fcedd1ULL, 0xae64e3f1f23607b0ULL, 0xfeb68965ce29d984ULL,
+ 0x55724fdaf6a2b770ULL, 0x29496d5cd753720eULL, 0xa75941573d3af204ULL,
+ 0x8e102c0bea69800aULL, 0x111ab16bc573d049ULL, 0xd7ffe439197aab8aULL,
+ 0xefac380e0b5a09cdULL, 0x48f579593660fbc9ULL, 0x22347fd697e6bd92ULL,
+ 0x61bc1405e13389c7ULL, 0x4ab5c975b9d9c1e1ULL, 0x80cd1bcf606126d2ULL,
+ 0x7186fd78ed92449aULL, 0x93971a882aabccb3ULL, 0x88d0e17f66bfce72ULL,
+ 0x27945a985d5bd4d6ULL
+ }, {
+ 0xde553f8c05a811c8ULL, 0x1906b59631b4f565ULL, 0x436e70d6b1964ff7ULL,
+ 0x36d343cb8b1e9d85ULL, 0x843dfacc858aab5aULL, 0xfdfc95c299bfc7f9ULL,
+ 0x0f634bdea1d51fa2ULL, 0x6d458b3b76efb3cdULL, 0x85c3f77cf8593f80ULL,
+ 0x3c91315fbe737cb2ULL, 0x2148b03366ace398ULL, 0x18f8b8264c6761bfULL,
+ 0xc830c1c495c9fb0fULL, 0x981a76102086a0aaULL, 0xaa16012142f35760ULL,
+ 0x35cc54060c763cf6ULL, 0x42907d66cc45db2dULL, 0x8203d44b965af4bcULL,
+ 0x3d6f3cefc3a0e868ULL, 0xbc73ff69d292bda7ULL, 0x8722ed0102e20a29ULL,
+ 0x8f8185e8cd34deb7ULL, 0x9b0561dda7ee01d9ULL, 0x5335a0193227fad6ULL,
+ 0xc9cecc74e81a6fd5ULL, 0x54f5832e5c2431eaULL, 0x99e47ba05d553470ULL,
+ 0xf7bee756acd226ceULL, 0x384e05a5571816fdULL, 0xd1367452a47d0e6aULL,
+ 0xf29fde1c386ad85bULL, 0x320c77316275f7caULL, 0xd0c879e2d9ae9ab0ULL,
+ 0xdb7406c69110ef5dULL, 0x45505e51a2461011ULL, 0xfc029872e46c5323ULL,
+ 0xfa3cb6f5f7bc0cc5ULL, 0x031f17cd8768a173ULL, 0xbd8df2d9af41297dULL,
+ 0x9d3b4f5ab43e5e3fULL, 0x4071671b36feee84ULL, 0x716207e7d3e3b83dULL,
+ 0x48d20ff2f9283a1aULL, 0x27769eb4757cbc7eULL, 0x5c56ebc793f2e574ULL,
+ 0xa48b474f9ef5dc18ULL, 0x52cbada94ff46e0cULL, 0x60c7da982d8199c6ULL,
+ 0x0e9d466edc068b78ULL, 0x4eec2175eaf865fcULL, 0x550b8e9e21f7a530ULL,
+ 0x6b7ba5bc653fec2bULL, 0x5eb7f1ba6949d0ddULL, 0x57ea94e3db4c9099ULL,
+ 0xf640eae6d101b214ULL, 0xdd4a284182c0b0bbULL, 0xff1d8fbf6304f250ULL,
+ 0xb8accb933bf9d7e8ULL, 0xe8867c478eb68c4dULL, 0x3f8e2692391bddc1ULL,
+ 0xcb2fd60912a15a7cULL, 0xaec935dbab983d2fULL, 0xf55ffd2b56691367ULL,
+ 0x80e2ce366ce1c115ULL, 0x179bf3f8edb27e1dULL, 0x01fe0db07dd394daULL,
+ 0xda8a0b76ecc37b87ULL, 0x44ae53e1df9584cbULL, 0xb310b4b77347a205ULL,
+ 0xdfab323c787b8512ULL, 0x3b511268d070b78eULL, 0x65e6e3d2b9396753ULL,
+ 0x6864b271e2574d58ULL, 0x259784c98fc789d7ULL, 0x02e11a7dfabb35a9ULL,
+ 0x8841a6dfa337158bULL, 0x7ade78c39b5dcdd0ULL, 0xb7cf804d9a2cc84aULL,
+ 0x20b6bd831b7f7742ULL, 0x75bd331d3a88d272ULL, 0x418f6aab4b2d7a5eULL,
+ 0xd9951cbb6babdaf4ULL, 0xb6318dfde7ff5c90ULL, 0x1f389b112264aa83ULL,
+ 0x492c024284fbaec0ULL, 0xe33a0363c608f9a0ULL, 0x2688930408af28a4ULL,
+ 0xc7538a1a341ce4adULL, 0x5da8e677ee2171aeULL, 0x8c9e92254a5c7fc4ULL,
+ 0x63d8cd55aae938b5ULL, 0x29ebd8daa97a3706ULL, 0x959827b37be88aa1ULL,
+ 0x1484e4356adadf6eULL, 0xa7945082199d7d6bULL, 0xbf6ce8a455fa1cd4ULL,
+ 0x9cc542eac9edcae5ULL, 0x79c16f0e1c356ca3ULL, 0x89bfab6fdee48151ULL,
+ 0xd4174d1830c5f0ffULL, 0x9258048415eb419dULL, 0x6139d72850520d1cULL,
+ 0x6a85a80c18ec78f1ULL, 0xcd11f88e0171059aULL, 0xcceff53e7ca29140ULL,
+ 0xd229639f2315af19ULL, 0x90b91ef9ef507434ULL, 0x5977d28d074a1be1ULL,
+ 0x311360fce51d56b9ULL, 0xc093a92d5a1f2f91ULL, 0x1a19a25bb6dc5416ULL,
+ 0xeb996b8a09de2d3eULL, 0xfee3820f1ed7668aULL, 0xd7085ad5b7ad518cULL,
+ 0x7fff41890fe53345ULL, 0xec5948bd67dde602ULL, 0x2fd5f65dbaaa68e0ULL,
+ 0xa5754affe32648c2ULL, 0xf8ddac880d07396cULL, 0x6fa491468c548664ULL,
+ 0x0c7c5c1326bdbed1ULL, 0x4a33158f03930fb3ULL, 0x699abfc19f84d982ULL,
+ 0xe4fa2054a80b329cULL, 0x6707f9af438252faULL, 0x08a368e9cfd6d49eULL,
+ 0x47b1442c58fd25b8ULL, 0xbbb3dc5ebc91769bULL, 0x1665fe489061eac7ULL,
+ 0x33f27a811fa66310ULL, 0x93a609346838d547ULL, 0x30ed6d4c98cec263ULL,
+ 0x1dd9816cd8df9f2aULL, 0x94662a03063b1e7bULL, 0x83fdd9fbeb896066ULL,
+ 0x7b207573e68e590aULL, 0x5f49fc0a149a4407ULL, 0x343259b671a5a82cULL,
+ 0xfbc2bb458a6f981fULL, 0xc272b350a0a41a38ULL, 0x3aaf1fd8ada32354ULL,
+ 0x6cbb868b0b3c2717ULL, 0xa2b569c88d2583feULL, 0xf180c9d1bf027928ULL,
+ 0xaf37386bd64ba9f5ULL, 0x12bacab2790a8088ULL, 0x4c0d3b0810435055ULL,
+ 0xb2eeb9070e9436dfULL, 0xc5b29067cea7d104ULL, 0xdcb425f1ff132461ULL,
+ 0x4f122cc5972bf126ULL, 0xac282fa651230886ULL, 0xe7e537992f6393efULL,
+ 0xe61b3a2952b00735ULL, 0x709c0a57ae302ce7ULL, 0xe02514ae416058d3ULL,
+ 0xc44c9dd7b37445deULL, 0x5a68c5408022ba92ULL, 0x1c278cdca50c0bf0ULL,
+ 0x6e5a9cf6f18712beULL, 0x86dce0b17f319ef3ULL, 0x2d34ec2040115d49ULL,
+ 0x4bcd183f7e409b69ULL, 0x2815d56ad4a9a3dcULL, 0x24698979f2141d0dULL,
+ 0x0000000000000000ULL, 0x1ec696a15fb73e59ULL, 0xd86b110b16784e2eULL,
+ 0x8e7f8858b0e74a6dULL, 0x063e2e8713d05fe6ULL, 0xe2c40ed3bbdb6d7aULL,
+ 0xb1f1aeca89fc97acULL, 0xe1db191e3cb3cc09ULL, 0x6418ee62c4eaf389ULL,
+ 0xc6ad87aa49cf7077ULL, 0xd6f65765ca7ec556ULL, 0x9afb6c6dda3d9503ULL,
+ 0x7ce05644888d9236ULL, 0x8d609f95378feb1eULL, 0x23a9aa4e9c17d631ULL,
+ 0x6226c0e5d73aac6fULL, 0x56149953a69f0443ULL, 0xeeb852c09d66d3abULL,
+ 0x2b0ac2a753c102afULL, 0x07c023376e03cb3cULL, 0x2ccae1903dc2c993ULL,
+ 0xd3d76e2f5ec63bc3ULL, 0x9e2458973356ff4cULL, 0xa66a5d32644ee9b1ULL,
+ 0x0a427294356de137ULL, 0x783f62be61e6f879ULL, 0x1344c70204d91452ULL,
+ 0x5b96c8f0fdf12e48ULL, 0xa90916ecc59bf613ULL, 0xbe92e5142829880eULL,
+ 0x727d102a548b194eULL, 0x1be7afebcb0fc0ccULL, 0x3e702b2244c8491bULL,
+ 0xd5e940a84d166425ULL, 0x66f9f41f3e51c620ULL, 0xabe80c913f20c3baULL,
+ 0xf07ec461c2d1edf2ULL, 0xf361d3ac45b94c81ULL, 0x0521394a94b8fe95ULL,
+ 0xadd622162cf09c5cULL, 0xe97871f7f3651897ULL, 0xf4a1f09b2bba87bdULL,
+ 0x095d6559b2054044ULL, 0x0bbc7f2448be75edULL, 0x2af4cf172e129675ULL,
+ 0x157ae98517094bb4ULL, 0x9fda55274e856b96ULL, 0x914713499283e0eeULL,
+ 0xb952c623462a4332ULL, 0x74433ead475b46a8ULL, 0x8b5eb112245fb4f8ULL,
+ 0xa34b6478f0f61724ULL, 0x11a5dd7ffe6221fbULL, 0xc16da49d27ccbb4bULL,
+ 0x76a224d0bde07301ULL, 0x8aa0bca2598c2022ULL, 0x4df336b86d90c48fULL,
+ 0xea67663a740db9e4ULL, 0xef465f70e0b54771ULL, 0x39b008152acb8227ULL,
+ 0x7d1e5bf4f55e06ecULL, 0x105bd0cf83b1b521ULL, 0x775c2960c033e7dbULL,
+ 0x7e014c397236a79fULL, 0x811cc386113255cfULL, 0xeda7450d1a0e72d8ULL,
+ 0x5889df3d7a998f3bULL, 0x2e2bfbedc779fc3aULL, 0xce0eef438619a4e9ULL,
+ 0x372d4e7bf6cd095fULL, 0x04df34fae96b6a4fULL, 0xf923a13870d4adb6ULL,
+ 0xa1aa7e050a4d228dULL, 0xa8f71b5cb84862c9ULL, 0xb52e9a306097fde3ULL,
+ 0x0d8251a35b6e2a0bULL, 0x2257a7fee1c442ebULL, 0x73831d9a29588d94ULL,
+ 0x51d4ba64c89ccf7fULL, 0x502ab7d4b54f5ba5ULL, 0x97793dce8153bf08ULL,
+ 0xe5042de4d5d8a646ULL, 0x9687307efc802bd2ULL, 0xa05473b5779eb657ULL,
+ 0xb4d097801d446939ULL, 0xcff0e2f3fbca3033ULL, 0xc38cbee0dd778ee2ULL,
+ 0x464f499c252eb162ULL, 0xcad1dbb96f72cea6ULL, 0xba4dd1eec142e241ULL,
+ 0xb00fa37af42f0376ULL
+ }, {
+ 0xcce4cd3aa968b245ULL, 0x089d5484e80b7fafULL, 0x638246c1b3548304ULL,
+ 0xd2fe0ec8c2355492ULL, 0xa7fbdf7ff2374eeeULL, 0x4df1600c92337a16ULL,
+ 0x84e503ea523b12fbULL, 0x0790bbfd53ab0c4aULL, 0x198a780f38f6ea9dULL,
+ 0x2ab30c8f55ec48cbULL, 0xe0f7fed6b2c49db5ULL, 0xb6ecf3f422cadbdcULL,
+ 0x409c9a541358df11ULL, 0xd3ce8a56dfde3fe3ULL, 0xc3e9224312c8c1a0ULL,
+ 0x0d6dfa58816ba507ULL, 0xddf3e1b179952777ULL, 0x04c02a42748bb1d9ULL,
+ 0x94c2abff9f2decb8ULL, 0x4f91752da8f8acf4ULL, 0x78682befb169bf7bULL,
+ 0xe1c77a48af2ff6c4ULL, 0x0c5d7ec69c80ce76ULL, 0x4cc1e4928fd81167ULL,
+ 0xfeed3d24d9997b62ULL, 0x518bb6dfc3a54a23ULL, 0x6dbf2d26151f9b90ULL,
+ 0xb5bc624b05ea664fULL, 0xe86aaa525acfe21aULL, 0x4801ced0fb53a0beULL,
+ 0xc91463e6c00868edULL, 0x1027a815cd16fe43ULL, 0xf67069a0319204cdULL,
+ 0xb04ccc976c8abce7ULL, 0xc0b9b3fc35e87c33ULL, 0xf380c77c58f2de65ULL,
+ 0x50bb3241de4e2152ULL, 0xdf93f490435ef195ULL, 0xf1e0d25d62390887ULL,
+ 0xaf668bfb1a3c3141ULL, 0xbc11b251f00a7291ULL, 0x73a5eed47e427d47ULL,
+ 0x25bee3f6ee4c3b2eULL, 0x43cc0beb34786282ULL, 0xc824e778dde3039cULL,
+ 0xf97d86d98a327728ULL, 0xf2b043e24519b514ULL, 0xe297ebf7880f4b57ULL,
+ 0x3a94a49a98fab688ULL, 0x868516cb68f0c419ULL, 0xeffa11af0964ee50ULL,
+ 0xa4ab4ec0d517f37dULL, 0xa9c6b498547c567aULL, 0x8e18424f80fbbbb6ULL,
+ 0x0bcdc53bcf2bc23cULL, 0x137739aaea3643d0ULL, 0x2c1333ec1bac2ff0ULL,
+ 0x8d48d3f0a7db0625ULL, 0x1e1ac3f26b5de6d7ULL, 0xf520f81f16b2b95eULL,
+ 0x9f0f6ec450062e84ULL, 0x0130849e1deb6b71ULL, 0xd45e31ab8c7533a9ULL,
+ 0x652279a2fd14e43fULL, 0x3209f01e70f1c927ULL, 0xbe71a770cac1a473ULL,
+ 0x0e3d6be7a64b1894ULL, 0x7ec8148cff29d840ULL, 0xcb7476c7fac3be0fULL,
+ 0x72956a4a63a91636ULL, 0x37f95ec21991138fULL, 0x9e3fea5a4ded45f5ULL,
+ 0x7b38ba50964902e8ULL, 0x222e580bbde73764ULL, 0x61e253e0899f55e6ULL,
+ 0xfc8d2805e352ad80ULL, 0x35994be3235ac56dULL, 0x09add01af5e014deULL,
+ 0x5e8659a6780539c6ULL, 0xb17c48097161d796ULL, 0x026015213acbd6e2ULL,
+ 0xd1ae9f77e515e901ULL, 0xb7dc776a3f21b0adULL, 0xaba6a1b96eb78098ULL,
+ 0x9bcf4486248d9f5dULL, 0x582666c536455efdULL, 0xfdbdac9bfeb9c6f1ULL,
+ 0xc47999be4163cdeaULL, 0x765540081722a7efULL, 0x3e548ed8ec710751ULL,
+ 0x3d041f67cb51bac2ULL, 0x7958af71ac82d40aULL, 0x36c9da5c047a78feULL,
+ 0xed9a048e33af38b2ULL, 0x26ee7249c96c86bdULL, 0x900281bdeba65d61ULL,
+ 0x11172c8bd0fd9532ULL, 0xea0abf73600434f8ULL, 0x42fc8f75299309f3ULL,
+ 0x34a9cf7d3eb1ae1cULL, 0x2b838811480723baULL, 0x5ce64c8742ceef24ULL,
+ 0x1adae9b01fd6570eULL, 0x3c349bf9d6bad1b3ULL, 0x82453c891c7b75c0ULL,
+ 0x97923a40b80d512bULL, 0x4a61dbf1c198765cULL, 0xb48ce6d518010d3eULL,
+ 0xcfb45c858e480fd6ULL, 0xd933cbf30d1e96aeULL, 0xd70ea014ab558e3aULL,
+ 0xc189376228031742ULL, 0x9262949cd16d8b83ULL, 0xeb3a3bed7def5f89ULL,
+ 0x49314a4ee6b8cbcfULL, 0xdcc3652f647e4c06ULL, 0xda635a4c2a3e2b3dULL,
+ 0x470c21a940f3d35bULL, 0x315961a157d174b4ULL, 0x6672e81dda3459acULL,
+ 0x5b76f77a1165e36eULL, 0x445cb01667d36ec8ULL, 0xc5491d205c88a69bULL,
+ 0x456c34887a3805b9ULL, 0xffddb9bac4721013ULL, 0x99af51a71e4649bfULL,
+ 0xa15be01cbc7729d5ULL, 0x52db2760e485f7b0ULL, 0x8c78576eba306d54ULL,
+ 0xae560f6507d75a30ULL, 0x95f22f6182c687c9ULL, 0x71c5fbf54489aba5ULL,
+ 0xca44f259e728d57eULL, 0x88b87d2ccebbdc8dULL, 0xbab18d32be4a15aaULL,
+ 0x8be8ec93e99b611eULL, 0x17b713e89ebdf209ULL, 0xb31c5d284baa0174ULL,
+ 0xeeca9531148f8521ULL, 0xb8d198138481c348ULL, 0x8988f9b2d350b7fcULL,
+ 0xb9e11c8d996aa839ULL, 0x5a4673e40c8e881fULL, 0x1687977683569978ULL,
+ 0xbf4123eed72acf02ULL, 0x4ea1f1b3b513c785ULL, 0xe767452be16f91ffULL,
+ 0x7505d1b730021a7cULL, 0xa59bca5ec8fc980cULL, 0xad069eda20f7e7a3ULL,
+ 0x38f4b1bba231606aULL, 0x60d2d77e94743e97ULL, 0x9affc0183966f42cULL,
+ 0x248e6768f3a7505fULL, 0xcdd449a4b483d934ULL, 0x87b59255751baf68ULL,
+ 0x1bea6d2e023d3c7fULL, 0x6b1f12455b5ffcabULL, 0x743555292de9710dULL,
+ 0xd8034f6d10f5fddfULL, 0xc6198c9f7ba81b08ULL, 0xbb8109aca3a17edbULL,
+ 0xfa2d1766ad12cabbULL, 0xc729080166437079ULL, 0x9c5fff7b77269317ULL,
+ 0x0000000000000000ULL, 0x15d706c9a47624ebULL, 0x6fdf38072fd44d72ULL,
+ 0x5fb6dd3865ee52b7ULL, 0xa33bf53d86bcff37ULL, 0xe657c1b5fc84fa8eULL,
+ 0xaa962527735cebe9ULL, 0x39c43525bfda0b1bULL, 0x204e4d2a872ce186ULL,
+ 0x7a083ece8ba26999ULL, 0x554b9c9db72efbfaULL, 0xb22cd9b656416a05ULL,
+ 0x96a2bedea5e63a5aULL, 0x802529a826b0a322ULL, 0x8115ad363b5bc853ULL,
+ 0x8375b81701901eb1ULL, 0x3069e53f4a3a1fc5ULL, 0xbd2136cfede119e0ULL,
+ 0x18bafc91251d81ecULL, 0x1d4a524d4c7d5b44ULL, 0x05f0aedc6960daa8ULL,
+ 0x29e39d3072ccf558ULL, 0x70f57f6b5962c0d4ULL, 0x989fd53903ad22ceULL,
+ 0xf84d024797d91c59ULL, 0x547b1803aac5908bULL, 0xf0d056c37fd263f6ULL,
+ 0xd56eb535919e58d8ULL, 0x1c7ad6d351963035ULL, 0x2e7326cd2167f912ULL,
+ 0xac361a443d1c8cd2ULL, 0x697f076461942a49ULL, 0x4b515f6fdc731d2dULL,
+ 0x8ad8680df4700a6fULL, 0x41ac1eca0eb3b460ULL, 0x7d988533d80965d3ULL,
+ 0xa8f6300649973d0bULL, 0x7765c4960ac9cc9eULL, 0x7ca801adc5e20ea2ULL,
+ 0xdea3700e5eb59ae4ULL, 0xa06b6482a19c42a4ULL, 0x6a2f96db46b497daULL,
+ 0x27def6d7d487edccULL, 0x463ca5375d18b82aULL, 0xa6cb5be1efdc259fULL,
+ 0x53eba3fef96e9cc1ULL, 0xce84d81b93a364a7ULL, 0xf4107c810b59d22fULL,
+ 0x333974806d1aa256ULL, 0x0f0def79bba073e5ULL, 0x231edc95a00c5c15ULL,
+ 0xe437d494c64f2c6cULL, 0x91320523f64d3610ULL, 0x67426c83c7df32ddULL,
+ 0x6eefbc99323f2603ULL, 0x9d6f7be56acdf866ULL, 0x5916e25b2bae358cULL,
+ 0x7ff89012e2c2b331ULL, 0x035091bf2720bd93ULL, 0x561b0d22900e4669ULL,
+ 0x28d319ae6f279e29ULL, 0x2f43a2533c8c9263ULL, 0xd09e1be9f8fe8270ULL,
+ 0xf740ed3e2c796fbcULL, 0xdb53ded237d5404cULL, 0x62b2c25faebfe875ULL,
+ 0x0afd41a5d2c0a94dULL, 0x6412fd3ce0ff8f4eULL, 0xe3a76f6995e42026ULL,
+ 0x6c8fa9b808f4f0e1ULL, 0xc2d9a6dd0f23aad1ULL, 0x8f28c6d19d10d0c7ULL,
+ 0x85d587744fd0798aULL, 0xa20b71a39b579446ULL, 0x684f83fa7c7f4138ULL,
+ 0xe507500adba4471dULL, 0x3f640a46f19a6c20ULL, 0x1247bd34f7dd28a1ULL,
+ 0x2d23b77206474481ULL, 0x93521002cc86e0f2ULL, 0x572b89bc8de52d18ULL,
+ 0xfb1d93f8b0f9a1caULL, 0xe95a2ecc4724896bULL, 0x3ba420048511ddf9ULL,
+ 0xd63e248ab6bee54bULL, 0x5dd6c8195f258455ULL, 0x06a03f634e40673bULL,
+ 0x1f2a476c76b68da6ULL, 0x217ec9b49ac78af7ULL, 0xecaa80102e4453c3ULL,
+ 0x14e78257b99d4f9aULL
+ }, {
+ 0x20329b2cc87bba05ULL, 0x4f5eb6f86546a531ULL, 0xd4f44775f751b6b1ULL,
+ 0x8266a47b850dfa8bULL, 0xbb986aa15a6ca985ULL, 0xc979eb08f9ae0f99ULL,
+ 0x2da6f447a2375ea1ULL, 0x1e74275dcd7d8576ULL, 0xbc20180a800bc5f8ULL,
+ 0xb4a2f701b2dc65beULL, 0xe726946f981b6d66ULL, 0x48e6c453bf21c94cULL,
+ 0x42cad9930f0a4195ULL, 0xefa47b64aacccd20ULL, 0x71180a8960409a42ULL,
+ 0x8bb3329bf6a44e0cULL, 0xd34c35de2d36daccULL, 0xa92f5b7cbc23dc96ULL,
+ 0xb31a85aa68bb09c3ULL, 0x13e04836a73161d2ULL, 0xb24dfc4129c51d02ULL,
+ 0x8ae44b70b7da5acdULL, 0xe671ed84d96579a7ULL, 0xa4bb3417d66f3832ULL,
+ 0x4572ab38d56d2de8ULL, 0xb1b47761ea47215cULL, 0xe81c09cf70aba15dULL,
+ 0xffbdb872ce7f90acULL, 0xa8782297fd5dc857ULL, 0x0d946f6b6a4ce4a4ULL,
+ 0xe4df1f4f5b995138ULL, 0x9ebc71edca8c5762ULL, 0x0a2c1dc0b02b88d9ULL,
+ 0x3b503c115d9d7b91ULL, 0xc64376a8111ec3a2ULL, 0xcec199a323c963e4ULL,
+ 0xdc76a87ec58616f7ULL, 0x09d596e073a9b487ULL, 0x14583a9d7d560dafULL,
+ 0xf4c6dc593f2a0cb4ULL, 0xdd21d19584f80236ULL, 0x4a4836983ddde1d3ULL,
+ 0xe58866a41ae745f9ULL, 0xf591a5b27e541875ULL, 0x891dc05074586693ULL,
+ 0x5b068c651810a89eULL, 0xa30346bc0c08544fULL, 0x3dbf3751c684032dULL,
+ 0x2a1e86ec785032dcULL, 0xf73f5779fca830eaULL, 0xb60c05ca30204d21ULL,
+ 0x0cc316802b32f065ULL, 0x8770241bdd96be69ULL, 0xb861e18199ee95dbULL,
+ 0xf805cad91418fcd1ULL, 0x29e70dccbbd20e82ULL, 0xc7140f435060d763ULL,
+ 0x0f3a9da0e8b0cc3bULL, 0xa2543f574d76408eULL, 0xbd7761e1c175d139ULL,
+ 0x4b1f4f737ca3f512ULL, 0x6dc2df1f2fc137abULL, 0xf1d05c3967b14856ULL,
+ 0xa742bf3715ed046cULL, 0x654030141d1697edULL, 0x07b872abda676c7dULL,
+ 0x3ce84eba87fa17ecULL, 0xc1fb0403cb79afdfULL, 0x3e46bc7105063f73ULL,
+ 0x278ae987121cd678ULL, 0xa1adb4778ef47cd0ULL, 0x26dd906c5362c2b9ULL,
+ 0x05168060589b44e2ULL, 0xfbfc41f9d79ac08fULL, 0x0e6de44ba9ced8faULL,
+ 0x9feb08068bf243a3ULL, 0x7b341749d06b129bULL, 0x229c69e74a87929aULL,
+ 0xe09ee6c4427c011bULL, 0x5692e30e725c4c3aULL, 0xda99a33e5e9f6e4bULL,
+ 0x353dd85af453a36bULL, 0x25241b4c90e0fee7ULL, 0x5de987258309d022ULL,
+ 0xe230140fc0802984ULL, 0x93281e86a0c0b3c6ULL, 0xf229d719a4337408ULL,
+ 0x6f6c2dd4ad3d1f34ULL, 0x8ea5b2fbae3f0aeeULL, 0x8331dd90c473ee4aULL,
+ 0x346aa1b1b52db7aaULL, 0xdf8f235e06042aa9ULL, 0xcc6f6b68a1354b7bULL,
+ 0x6c95a6f46ebf236aULL, 0x52d31a856bb91c19ULL, 0x1a35ded6d498d555ULL,
+ 0xf37eaef2e54d60c9ULL, 0x72e181a9a3c2a61cULL, 0x98537aad51952fdeULL,
+ 0x16f6c856ffaa2530ULL, 0xd960281e9d1d5215ULL, 0x3a0745fa1ce36f50ULL,
+ 0x0b7b642bf1559c18ULL, 0x59a87eae9aec8001ULL, 0x5e100c05408bec7cULL,
+ 0x0441f98b19e55023ULL, 0xd70dcc5534d38aefULL, 0x927f676de1bea707ULL,
+ 0x9769e70db925e3e5ULL, 0x7a636ea29115065aULL, 0x468b201816ef11b6ULL,
+ 0xab81a9b73edff409ULL, 0xc0ac7de88a07bb1eULL, 0x1f235eb68c0391b7ULL,
+ 0x6056b074458dd30fULL, 0xbe8eeac102f7ed67ULL, 0xcd381283e04b5fbaULL,
+ 0x5cbefecec277c4e3ULL, 0xd21b4c356c48ce0dULL, 0x1019c31664b35d8cULL,
+ 0x247362a7d19eea26ULL, 0xebe582efb3299d03ULL, 0x02aef2cb82fc289fULL,
+ 0x86275df09ce8aaa8ULL, 0x28b07427faac1a43ULL, 0x38a9b7319e1f47cfULL,
+ 0xc82e92e3b8d01b58ULL, 0x06ef0b409b1978bcULL, 0x62f842bfc771fb90ULL,
+ 0x9904034610eb3b1fULL, 0xded85ab5477a3e68ULL, 0x90d195a663428f98ULL,
+ 0x5384636e2ac708d8ULL, 0xcbd719c37b522706ULL, 0xae9729d76644b0ebULL,
+ 0x7c8c65e20a0c7ee6ULL, 0x80c856b007f1d214ULL, 0x8c0b40302cc32271ULL,
+ 0xdbcedad51fe17a8aULL, 0x740e8ae938dbdea0ULL, 0xa615c6dc549310adULL,
+ 0x19cc55f6171ae90bULL, 0x49b1bdb8fe5fdd8dULL, 0xed0a89af2830e5bfULL,
+ 0x6a7aadb4f5a65bd6ULL, 0x7e22972988f05679ULL, 0xf952b3325566e810ULL,
+ 0x39fecedadf61530eULL, 0x6101c99f04f3c7ceULL, 0x2e5f7f6761b562ffULL,
+ 0xf08725d226cf5c97ULL, 0x63af3b54860fef51ULL, 0x8ff2cb10ef411e2fULL,
+ 0x884ab9bb35267252ULL, 0x4df04433e7ba8daeULL, 0x9afd8866d3690741ULL,
+ 0x66b9bb34de94abb3ULL, 0x9baaf18d92171380ULL, 0x543c11c5f0a064a5ULL,
+ 0x17a1b1bdbed431f1ULL, 0xb5f58eeaf3a2717fULL, 0xc355f6c849858740ULL,
+ 0xec5df044694ef17eULL, 0xd83751f5dc6346d4ULL, 0xfc4433520dfdacf2ULL,
+ 0x0000000000000000ULL, 0x5a51f58e596ebc5fULL, 0x3285aaf12e34cf16ULL,
+ 0x8d5c39db6dbd36b0ULL, 0x12b731dde64f7513ULL, 0x94906c2d7aa7dfbbULL,
+ 0x302b583aacc8e789ULL, 0x9d45facd090e6b3cULL, 0x2165e2c78905aec4ULL,
+ 0x68d45f7f775a7349ULL, 0x189b2c1d5664fdcaULL, 0xe1c99f2f030215daULL,
+ 0x6983269436246788ULL, 0x8489af3b1e148237ULL, 0xe94b702431d5b59cULL,
+ 0x33d2d31a6f4adbd7ULL, 0xbfd9932a4389f9a6ULL, 0xb0e30e8aab39359dULL,
+ 0xd1e2c715afcaf253ULL, 0x150f43763c28196eULL, 0xc4ed846393e2eb3dULL,
+ 0x03f98b20c3823c5eULL, 0xfd134ab94c83b833ULL, 0x556b682eb1de7064ULL,
+ 0x36c4537a37d19f35ULL, 0x7559f30279a5ca61ULL, 0x799ae58252973a04ULL,
+ 0x9c12832648707ffdULL, 0x78cd9c6913e92ec5ULL, 0x1d8dac7d0effb928ULL,
+ 0x439da0784e745554ULL, 0x413352b3cc887dcbULL, 0xbacf134a1b12bd44ULL,
+ 0x114ebafd25cd494dULL, 0x2f08068c20cb763eULL, 0x76a07822ba27f63fULL,
+ 0xeab2fb04f25789c2ULL, 0xe3676de481fe3d45ULL, 0x1b62a73d95e6c194ULL,
+ 0x641749ff5c68832cULL, 0xa5ec4dfc97112cf3ULL, 0xf6682e92bdd6242bULL,
+ 0x3f11c59a44782bb2ULL, 0x317c21d1edb6f348ULL, 0xd65ab5be75ad9e2eULL,
+ 0x6b2dd45fb4d84f17ULL, 0xfaab381296e4d44eULL, 0xd0b5befeeeb4e692ULL,
+ 0x0882ef0b32d7a046ULL, 0x512a91a5a83b2047ULL, 0x963e9ee6f85bf724ULL,
+ 0x4e09cf132438b1f0ULL, 0x77f701c9fb59e2feULL, 0x7ddb1c094b726a27ULL,
+ 0x5f4775ee01f5f8bdULL, 0x9186ec4d223c9b59ULL, 0xfeeac1998f01846dULL,
+ 0xac39db1ce4b89874ULL, 0xb75b7c21715e59e0ULL, 0xafc0503c273aa42aULL,
+ 0x6e3b543fec430bf5ULL, 0x704f7362213e8e83ULL, 0x58ff0745db9294c0ULL,
+ 0x67eec2df9feabf72ULL, 0xa0facd9ccf8a6811ULL, 0xb936986ad890811aULL,
+ 0x95c715c63bd9cb7aULL, 0xca8060283a2c33c7ULL, 0x507de84ee9453486ULL,
+ 0x85ded6d05f6a96f6ULL, 0x1cdad5964f81ade9ULL, 0xd5a33e9eb62fa270ULL,
+ 0x40642b588df6690aULL, 0x7f75eec2c98e42b8ULL, 0x2cf18dace3494a60ULL,
+ 0x23cb100c0bf9865bULL, 0xeef3028febb2d9e1ULL, 0x4425d2d394133929ULL,
+ 0xaad6d05c7fa1e0c8ULL, 0xad6ea2f7a5c68cb5ULL, 0xc2028f2308fb9381ULL,
+ 0x819f2f5b468fc6d5ULL, 0xc5bafd88d29cfffcULL, 0x47dc59f357910577ULL,
+ 0x2b49ff07392e261dULL, 0x57c59ae5332258fbULL, 0x73b6f842e2bcb2ddULL,
+ 0xcf96e04862b77725ULL, 0x4ca73dd8a6c4996fULL, 0x015779eb417e14c1ULL,
+ 0x37932a9176af8bf4ULL
+ }, {
+ 0x190a2c9b249df23eULL, 0x2f62f8b62263e1e9ULL, 0x7a7f754740993655ULL,
+ 0x330b7ba4d5564d9fULL, 0x4c17a16a46672582ULL, 0xb22f08eb7d05f5b8ULL,
+ 0x535f47f40bc148ccULL, 0x3aec5d27d4883037ULL, 0x10ed0a1825438f96ULL,
+ 0x516101f72c233d17ULL, 0x13cc6f949fd04eaeULL, 0x739853c441474bfdULL,
+ 0x653793d90d3f5b1bULL, 0x5240647b96b0fc2fULL, 0x0c84890ad27623e0ULL,
+ 0xd7189b32703aaea3ULL, 0x2685de3523bd9c41ULL, 0x99317c5b11bffefaULL,
+ 0x0d9baa854f079703ULL, 0x70b93648fbd48ac5ULL, 0xa80441fce30bc6beULL,
+ 0x7287704bdc36ff1eULL, 0xb65384ed33dc1f13ULL, 0xd36417343ee34408ULL,
+ 0x39cd38ab6e1bf10fULL, 0x5ab861770a1f3564ULL, 0x0ebacf09f594563bULL,
+ 0xd04572b884708530ULL, 0x3cae9722bdb3af47ULL, 0x4a556b6f2f5cbaf2ULL,
+ 0xe1704f1f76c4bd74ULL, 0x5ec4ed7144c6dfcfULL, 0x16afc01d4c7810e6ULL,
+ 0x283f113cd629ca7aULL, 0xaf59a8761741ed2dULL, 0xeed5a3991e215facULL,
+ 0x3bf37ea849f984d4ULL, 0xe413e096a56ce33cULL, 0x2c439d3a98f020d1ULL,
+ 0x637559dc6404c46bULL, 0x9e6c95d1e5f5d569ULL, 0x24bb9836045fe99aULL,
+ 0x44efa466dac8ecc9ULL, 0xc6eab2a5c80895d6ULL, 0x803b50c035220cc4ULL,
+ 0x0321658cba93c138ULL, 0x8f9ebc465dc7ee1cULL, 0xd15a5137190131d3ULL,
+ 0x0fa5ec8668e5e2d8ULL, 0x91c979578d1037b1ULL, 0x0642ca05693b9f70ULL,
+ 0xefca80168350eb4fULL, 0x38d21b24f36a45ecULL, 0xbeab81e1af73d658ULL,
+ 0x8cbfd9cae7542f24ULL, 0xfd19cc0d81f11102ULL, 0x0ac6430fbb4dbc90ULL,
+ 0x1d76a09d6a441895ULL, 0x2a01573ff1cbbfa1ULL, 0xb572e161894fde2bULL,
+ 0x8124734fa853b827ULL, 0x614b1fdf43e6b1b0ULL, 0x68ac395c4238cc18ULL,
+ 0x21d837bfd7f7b7d2ULL, 0x20c714304a860331ULL, 0x5cfaab726324aa14ULL,
+ 0x74c5ba4eb50d606eULL, 0xf3a3030474654739ULL, 0x23e671bcf015c209ULL,
+ 0x45f087e947b9582aULL, 0xd8bd77b418df4c7bULL, 0xe06f6c90ebb50997ULL,
+ 0x0bd96080263c0873ULL, 0x7e03f9410e40dcfeULL, 0xb8e94be4c6484928ULL,
+ 0xfb5b0608e8ca8e72ULL, 0x1a2b49179e0e3306ULL, 0x4e29e76961855059ULL,
+ 0x4f36c4e6fcf4e4baULL, 0x49740ee395cf7bcaULL, 0xc2963ea386d17f7dULL,
+ 0x90d65ad810618352ULL, 0x12d34c1b02a1fa4dULL, 0xfa44258775bb3a91ULL,
+ 0x18150f14b9ec46ddULL, 0x1491861e6b9a653dULL, 0x9a1019d7ab2c3fc2ULL,
+ 0x3668d42d06fe13d7ULL, 0xdcc1fbb25606a6d0ULL, 0x969490dd795a1c22ULL,
+ 0x3549b1a1bc6dd2efULL, 0xc94f5e23a0ed770eULL, 0xb9f6686b5b39fdcbULL,
+ 0xc4d4f4a6efeae00dULL, 0xe732851a1fff2204ULL, 0x94aad6de5eb869f9ULL,
+ 0x3f8ff2ae07206e7fULL, 0xfe38a9813b62d03aULL, 0xa7a1ad7a8bee2466ULL,
+ 0x7b6056c8dde882b6ULL, 0x302a1e286fc58ca7ULL, 0x8da0fa457a259bc7ULL,
+ 0xb3302b64e074415bULL, 0x5402ae7eff8b635fULL, 0x08f8050c9cafc94bULL,
+ 0xae468bf98a3059ceULL, 0x88c355cca98dc58fULL, 0xb10e6d67c7963480ULL,
+ 0xbad70de7e1aa3cf3ULL, 0xbfb4a26e320262bbULL, 0xcb711820870f02d5ULL,
+ 0xce12b7a954a75c9dULL, 0x563ce87dd8691684ULL, 0x9f73b65e7884618aULL,
+ 0x2b1e74b06cba0b42ULL, 0x47cec1ea605b2df1ULL, 0x1c698312f735ac76ULL,
+ 0x5fdbcefed9b76b2cULL, 0x831a354c8fb1cdfcULL, 0x820516c312c0791fULL,
+ 0xb74ca762aeadabf0ULL, 0xfc06ef821c80a5e1ULL, 0x5723cbf24518a267ULL,
+ 0x9d4df05d5f661451ULL, 0x588627742dfd40bfULL, 0xda8331b73f3d39a0ULL,
+ 0x17b0e392d109a405ULL, 0xf965400bcf28fba9ULL, 0x7c3dbf4229a2a925ULL,
+ 0x023e460327e275dbULL, 0x6cd0b55a0ce126b3ULL, 0xe62da695828e96e7ULL,
+ 0x42ad6e63b3f373b9ULL, 0xe50cc319381d57dfULL, 0xc5cbd729729b54eeULL,
+ 0x46d1e265fd2a9912ULL, 0x6428b056904eeff8ULL, 0x8be23040131e04b7ULL,
+ 0x6709d5da2add2ec0ULL, 0x075de98af44a2b93ULL, 0x8447dcc67bfbe66fULL,
+ 0x6616f655b7ac9a23ULL, 0xd607b8bded4b1a40ULL, 0x0563af89d3a85e48ULL,
+ 0x3db1b4ad20c21ba4ULL, 0x11f22997b8323b75ULL, 0x292032b34b587e99ULL,
+ 0x7f1cdace9331681dULL, 0x8e819fc9c0b65affULL, 0xa1e3677fe2d5bb16ULL,
+ 0xcd33d225ee349da5ULL, 0xd9a2543b85aef898ULL, 0x795e10cbfa0af76dULL,
+ 0x25a4bbb9992e5d79ULL, 0x78413344677b438eULL, 0xf0826688cef68601ULL,
+ 0xd27b34bba392f0ebULL, 0x551d8df162fad7bcULL, 0x1e57c511d0d7d9adULL,
+ 0xdeffbdb171e4d30bULL, 0xf4feea8e802f6caaULL, 0xa480c8f6317de55eULL,
+ 0xa0fc44f07fa40ff5ULL, 0x95b5f551c3c9dd1aULL, 0x22f952336d6476eaULL,
+ 0x0000000000000000ULL, 0xa6be8ef5169f9085ULL, 0xcc2cf1aa73452946ULL,
+ 0x2e7ddb39bf12550aULL, 0xd526dd3157d8db78ULL, 0x486b2d6c08becf29ULL,
+ 0x9b0f3a58365d8b21ULL, 0xac78cdfaadd22c15ULL, 0xbc95c7e28891a383ULL,
+ 0x6a927f5f65dab9c3ULL, 0xc3891d2c1ba0cb9eULL, 0xeaa92f9f50f8b507ULL,
+ 0xcf0d9426c9d6e87eULL, 0xca6e3baf1a7eb636ULL, 0xab25247059980786ULL,
+ 0x69b31ad3df4978fbULL, 0xe2512a93cc577c4cULL, 0xff278a0ea61364d9ULL,
+ 0x71a615c766a53e26ULL, 0x89dc764334fc716cULL, 0xf87a638452594f4aULL,
+ 0xf2bc208be914f3daULL, 0x8766b94ac1682757ULL, 0xbbc82e687cdb8810ULL,
+ 0x626a7a53f9757088ULL, 0xa2c202f358467a2eULL, 0x4d0882e5db169161ULL,
+ 0x09e7268301de7da8ULL, 0xe897699c771ac0dcULL, 0xc8507dac3d9cc3edULL,
+ 0xc0a878a0a1330aa6ULL, 0x978bb352e42ba8c1ULL, 0xe9884a13ea6b743fULL,
+ 0x279afdbabecc28a2ULL, 0x047c8c064ed9eaabULL, 0x507e2278b15289f4ULL,
+ 0x599904fbb08cf45cULL, 0xbd8ae46d15e01760ULL, 0x31353da7f2b43844ULL,
+ 0x8558ff49e68a528cULL, 0x76fbfc4d92ef15b5ULL, 0x3456922e211c660cULL,
+ 0x86799ac55c1993b4ULL, 0x3e90d1219a51da9cULL, 0x2d5cbeb505819432ULL,
+ 0x982e5fd48cce4a19ULL, 0xdb9c1238a24c8d43ULL, 0xd439febecaa96f9bULL,
+ 0x418c0bef0960b281ULL, 0x158ea591f6ebd1deULL, 0x1f48e69e4da66d4eULL,
+ 0x8afd13cf8e6fb054ULL, 0xf5e1c9011d5ed849ULL, 0xe34e091c5126c8afULL,
+ 0xad67ee7530a398f6ULL, 0x43b24dec2e82c75aULL, 0x75da99c1287cd48dULL,
+ 0x92e81cdb3783f689ULL, 0xa3dd217cc537cecdULL, 0x60543c50de970553ULL,
+ 0x93f73f54aaf2426aULL, 0xa91b62737e7a725dULL, 0xf19d4507538732e2ULL,
+ 0x77e4dfc20f9ea156ULL, 0x7d229ccdb4d31dc6ULL, 0x1b346a98037f87e5ULL,
+ 0xedf4c615a4b29e94ULL, 0x4093286094110662ULL, 0xb0114ee85ae78063ULL,
+ 0x6ff1d0d6b672e78bULL, 0x6dcf96d591909250ULL, 0xdfe09e3eec9567e8ULL,
+ 0x3214582b4827f97cULL, 0xb46dc2ee143e6ac8ULL, 0xf6c0ac8da7cd1971ULL,
+ 0xebb60c10cd8901e4ULL, 0xf7df8f023abcad92ULL, 0x9c52d3d2c217a0b2ULL,
+ 0x6b8d5cd0f8ab0d20ULL, 0x3777f7a29b8fa734ULL, 0x011f238f9d71b4e3ULL,
+ 0xc1b75b2f3c42be45ULL, 0x5de588fdfe551ef7ULL, 0x6eeef3592b035368ULL,
+ 0xaa3a07ffc4e9b365ULL, 0xecebe59a39c32a77ULL, 0x5ba742f8976e8187ULL,
+ 0x4b4a48e0b22d0e11ULL, 0xddded83dcb771233ULL, 0xa59feb79ac0c51bdULL,
+ 0xc7f5912a55792135ULL
+ }, {
+ 0x6d6ae04668a9b08aULL, 0x3ab3f04b0be8c743ULL, 0xe51e166b54b3c908ULL,
+ 0xbe90a9eb35c2f139ULL, 0xb2c7066637f2bec1ULL, 0xaa6945613392202cULL,
+ 0x9a28c36f3b5201ebULL, 0xddce5a93ab536994ULL, 0x0e34133ef6382827ULL,
+ 0x52a02ba1ec55048bULL, 0xa2f88f97c4b2a177ULL, 0x8640e513ca2251a5ULL,
+ 0xcdf1d36258137622ULL, 0xfe6cb708dedf8ddbULL, 0x8a174a9ec8121e5dULL,
+ 0x679896036b81560eULL, 0x59ed033395795feeULL, 0x1dd778ab8b74edafULL,
+ 0xee533ef92d9f926dULL, 0x2a8c79baf8a8d8f5ULL, 0x6bcf398e69b119f6ULL,
+ 0xe20491742fafdd95ULL, 0x276488e0809c2aecULL, 0xea955b82d88f5cceULL,
+ 0x7102c63a99d9e0c4ULL, 0xf9763017a5c39946ULL, 0x429fa2501f151b3dULL,
+ 0x4659c72bea05d59eULL, 0x984b7fdccf5a6634ULL, 0xf742232953fbb161ULL,
+ 0x3041860e08c021c7ULL, 0x747bfd9616cd9386ULL, 0x4bb1367192312787ULL,
+ 0x1b72a1638a6c44d3ULL, 0x4a0e68a6e8359a66ULL, 0x169a5039f258b6caULL,
+ 0xb98a2ef44edee5a4ULL, 0xd9083fe85e43a737ULL, 0x967f6ce239624e13ULL,
+ 0x8874f62d3c1a7982ULL, 0x3c1629830af06e3fULL, 0x9165ebfd427e5a8eULL,
+ 0xb5dd81794ceeaa5cULL, 0x0de8f15a7834f219ULL, 0x70bd98ede3dd5d25ULL,
+ 0xaccc9ca9328a8950ULL, 0x56664eda1945ca28ULL, 0x221db34c0f8859aeULL,
+ 0x26dbd637fa98970dULL, 0x1acdffb4f068f932ULL, 0x4585254f64090fa0ULL,
+ 0x72de245e17d53afaULL, 0x1546b25d7c546cf4ULL, 0x207e0ffffb803e71ULL,
+ 0xfaaad2732bcf4378ULL, 0xb462dfae36ea17bdULL, 0xcf926fd1ac1b11fdULL,
+ 0xe0672dc7dba7ba4aULL, 0xd3fa49ad5d6b41b3ULL, 0x8ba81449b216a3bcULL,
+ 0x14f9ec8a0650d115ULL, 0x40fc1ee3eb1d7ce2ULL, 0x23a2ed9b758ce44fULL,
+ 0x782c521b14fddc7eULL, 0x1c68267cf170504eULL, 0xbcf31558c1ca96e6ULL,
+ 0xa781b43b4ba6d235ULL, 0xf6fd7dfe29ff0c80ULL, 0xb0a4bad5c3fad91eULL,
+ 0xd199f51ea963266cULL, 0x414340349119c103ULL, 0x5405f269ed4dadf7ULL,
+ 0xabd61bb649969dcdULL, 0x6813dbeae7bdc3c8ULL, 0x65fb2ab09f8931d1ULL,
+ 0xf1e7fae152e3181dULL, 0xc1a67cef5a2339daULL, 0x7a4feea8e0f5bba1ULL,
+ 0x1e0b9acf05783791ULL, 0x5b8ebf8061713831ULL, 0x80e53cdbcb3af8d9ULL,
+ 0x7e898bd315e57502ULL, 0xc6bcfbf0213f2d47ULL, 0x95a38e86b76e942dULL,
+ 0x092e94218d243cbaULL, 0x8339debf453622e7ULL, 0xb11be402b9fe64ffULL,
+ 0x57d9100d634177c9ULL, 0xcc4e8db52217cbc3ULL, 0x3b0cae9c71ec7aa2ULL,
+ 0xfb158ca451cbfe99ULL, 0x2b33276d82ac6514ULL, 0x01bf5ed77a04bde1ULL,
+ 0xc5601994af33f779ULL, 0x75c4a3416cc92e67ULL, 0xf3844652a6eb7fc2ULL,
+ 0x3487e375fdd0ef64ULL, 0x18ae430704609eedULL, 0x4d14efb993298efbULL,
+ 0x815a620cb13e4538ULL, 0x125c354207487869ULL, 0x9eeea614ce42cf48ULL,
+ 0xce2d3106d61fac1cULL, 0xbbe99247bad6827bULL, 0x071a871f7b1c149dULL,
+ 0x2e4a1cc10db81656ULL, 0x77a71ff298c149b8ULL, 0x06a5d9c80118a97cULL,
+ 0xad73c27e488e34b1ULL, 0x443a7b981e0db241ULL, 0xe3bbcfa355ab6074ULL,
+ 0x0af276450328e684ULL, 0x73617a896dd1871bULL, 0x58525de4ef7de20fULL,
+ 0xb7be3dcab8e6cd83ULL, 0x19111dd07e64230cULL, 0x842359a03e2a367aULL,
+ 0x103f89f1f3401fb6ULL, 0xdc710444d157d475ULL, 0xb835702334da5845ULL,
+ 0x4320fc876511a6dcULL, 0xd026abc9d3679b8dULL, 0x17250eee885c0b2bULL,
+ 0x90dab52a387ae76fULL, 0x31fed8d972c49c26ULL, 0x89cba8fa461ec463ULL,
+ 0x2ff5421677bcabb7ULL, 0x396f122f85e41d7dULL, 0xa09b332430bac6a8ULL,
+ 0xc888e8ced7070560ULL, 0xaeaf201ac682ee8fULL, 0x1180d7268944a257ULL,
+ 0xf058a43628e7a5fcULL, 0xbd4c4b8fbbce2b07ULL, 0xa1246df34abe7b49ULL,
+ 0x7d5569b79be9af3cULL, 0xa9b5a705bd9efa12ULL, 0xdb6b835baa4bc0e8ULL,
+ 0x05793bac8f147342ULL, 0x21c1512881848390ULL, 0xfdb0556c50d357e5ULL,
+ 0x613d4fcb6a99ff72ULL, 0x03dce2648e0cda3eULL, 0xe949b9e6568386f0ULL,
+ 0xfc0f0bbb2ad7ea04ULL, 0x6a70675913b5a417ULL, 0x7f36d5046fe1c8e3ULL,
+ 0x0c57af8d02304ff8ULL, 0x32223abdfcc84618ULL, 0x0891caf6f720815bULL,
+ 0xa63eeaec31a26fd4ULL, 0x2507345374944d33ULL, 0x49d28ac266394058ULL,
+ 0xf5219f9aa7f3d6beULL, 0x2d96fea583b4cc68ULL, 0x5a31e1571b7585d0ULL,
+ 0x8ed12fe53d02d0feULL, 0xdfade6205f5b0e4bULL, 0x4cabb16ee92d331aULL,
+ 0x04c6657bf510cea3ULL, 0xd73c2cd6a87b8f10ULL, 0xe1d87310a1a307abULL,
+ 0x6cd5be9112ad0d6bULL, 0x97c032354366f3f2ULL, 0xd4e0ceb22677552eULL,
+ 0x0000000000000000ULL, 0x29509bde76a402cbULL, 0xc27a9e8bd42fe3e4ULL,
+ 0x5ef7842cee654b73ULL, 0xaf107ecdbc86536eULL, 0x3fcacbe784fcb401ULL,
+ 0xd55f90655c73e8cfULL, 0xe6c2f40fdabf1336ULL, 0xe8f6e7312c873b11ULL,
+ 0xeb2a0555a28be12fULL, 0xe4a148bc2eb774e9ULL, 0x9b979db84156bc0aULL,
+ 0x6eb60222e6a56ab4ULL, 0x87ffbbc4b026ec44ULL, 0xc703a5275b3b90a6ULL,
+ 0x47e699fc9001687fULL, 0x9c8d1aa73a4aa897ULL, 0x7cea3760e1ed12ddULL,
+ 0x4ec80ddd1d2554c5ULL, 0x13e36b957d4cc588ULL, 0x5d2b66486069914dULL,
+ 0x92b90999cc7280b0ULL, 0x517cc9c56259deb5ULL, 0xc937b619ad03b881ULL,
+ 0xec30824ad997f5b2ULL, 0xa45d565fc5aa080bULL, 0xd6837201d27f32f1ULL,
+ 0x635ef3789e9198adULL, 0x531f75769651b96aULL, 0x4f77530a6721e924ULL,
+ 0x486dd4151c3dfdb9ULL, 0x5f48dafb9461f692ULL, 0x375b011173dc355aULL,
+ 0x3da9775470f4d3deULL, 0x8d0dcd81b30e0ac0ULL, 0x36e45fc609d888bbULL,
+ 0x55baacbe97491016ULL, 0x8cb29356c90ab721ULL, 0x76184125e2c5f459ULL,
+ 0x99f4210bb55edbd5ULL, 0x6f095cf59ca1d755ULL, 0x9f51f8c3b44672a9ULL,
+ 0x3538bda287d45285ULL, 0x50c39712185d6354ULL, 0xf23b1885dcefc223ULL,
+ 0x79930ccc6ef9619fULL, 0xed8fdc9da3934853ULL, 0xcb540aaa590bdf5eULL,
+ 0x5c94389f1a6d2cacULL, 0xe77daad8a0bbaed7ULL, 0x28efc5090ca0bf2aULL,
+ 0xbf2ff73c4fc64cd8ULL, 0xb37858b14df60320ULL, 0xf8c96ec0dfc724a7ULL,
+ 0x828680683f329f06ULL, 0x941cd051cd6a29ccULL, 0xc3c5c05cae2b5e05ULL,
+ 0xb601631dc2e27062ULL, 0xc01922382027843bULL, 0x24b86a840e90f0d2ULL,
+ 0xd245177a276ffc52ULL, 0x0f8b4de98c3c95c6ULL, 0x3e759530fef809e0ULL,
+ 0x0b4d2892792c5b65ULL, 0xc4df4743d5374a98ULL, 0xa5e20888bfaeb5eaULL,
+ 0xba56cc90c0d23f9aULL, 0x38d04cf8ffe0a09cULL, 0x62e1adafe495254cULL,
+ 0x0263bcb3f40867dfULL, 0xcaeb547d230f62bfULL, 0x6082111c109d4293ULL,
+ 0xdad4dd8cd04f7d09ULL, 0xefec602e579b2f8cULL, 0x1fb4c4187f7c8a70ULL,
+ 0xffd3e9dfa4db303aULL, 0x7bf0b07f9af10640ULL, 0xf49ec14dddf76b5fULL,
+ 0x8f6e713247066d1fULL, 0x339d646a86ccfbf9ULL, 0x64447467e58d8c30ULL,
+ 0x2c29a072f9b07189ULL, 0xd8b7613f24471ad6ULL, 0x6627c8d41185ebefULL,
+ 0xa347d140beb61c96ULL, 0xde12b8f7255fb3aaULL, 0x9d324470404e1576ULL,
+ 0x9306574eb6763d51ULL, 0xa80af9d2c79a47f3ULL, 0x859c0777442e8b9bULL,
+ 0x69ac853d9db97e29ULL
+ }, {
+ 0xc3407dfc2de6377eULL, 0x5b9e93eea4256f77ULL, 0xadb58fdd50c845e0ULL,
+ 0x5219ff11a75bed86ULL, 0x356b61cfd90b1de9ULL, 0xfb8f406e25abe037ULL,
+ 0x7a5a0231c0f60796ULL, 0x9d3cd216e1f5020bULL, 0x0c6550fb6b48d8f3ULL,
+ 0xf57508c427ff1c62ULL, 0x4ad35ffa71cb407dULL, 0x6290a2da1666aa6dULL,
+ 0xe284ec2349355f9fULL, 0xb3c307c53d7c84ecULL, 0x05e23c0468365a02ULL,
+ 0x190bac4d6c9ebfa8ULL, 0x94bbbee9e28b80faULL, 0xa34fc777529cb9b5ULL,
+ 0xcc7b39f095bcd978ULL, 0x2426addb0ce532e3ULL, 0x7e79329312ce4fc7ULL,
+ 0xab09a72eebec2917ULL, 0xf8d15499f6b9d6c2ULL, 0x1a55b8babf8c895dULL,
+ 0xdb8add17fb769a85ULL, 0xb57f2f368658e81bULL, 0x8acd36f18f3f41f6ULL,
+ 0x5ce3b7bba50f11d3ULL, 0x114dcc14d5ee2f0aULL, 0xb91a7fcded1030e8ULL,
+ 0x81d5425fe55de7a1ULL, 0xb6213bc1554adeeeULL, 0x80144ef95f53f5f2ULL,
+ 0x1e7688186db4c10cULL, 0x3b912965db5fe1bcULL, 0xc281715a97e8252dULL,
+ 0x54a5d7e21c7f8171ULL, 0x4b12535ccbc5522eULL, 0x1d289cefbea6f7f9ULL,
+ 0x6ef5f2217d2e729eULL, 0xe6a7dc819b0d17ceULL, 0x1b94b41c05829b0eULL,
+ 0x33d7493c622f711eULL, 0xdcf7f942fa5ce421ULL, 0x600fba8b7f7a8ecbULL,
+ 0x46b60f011a83988eULL, 0x235b898e0dcf4c47ULL, 0x957ab24f588592a9ULL,
+ 0x4354330572b5c28cULL, 0xa5f3ef84e9b8d542ULL, 0x8c711e02341b2d01ULL,
+ 0x0b1874ae6a62a657ULL, 0x1213d8e306fc19ffULL, 0xfe6d7c6a4d9dba35ULL,
+ 0x65ed868f174cd4c9ULL, 0x88522ea0e6236550ULL, 0x899322065c2d7703ULL,
+ 0xc01e690bfef4018bULL, 0x915982ed8abddaf8ULL, 0xbe675b98ec3a4e4cULL,
+ 0xa996bf7f82f00db1ULL, 0xe1daf8d49a27696aULL, 0x2effd5d3dc8986e7ULL,
+ 0xd153a51f2b1a2e81ULL, 0x18caa0ebd690adfbULL, 0x390e3134b243c51aULL,
+ 0x2778b92cdff70416ULL, 0x029f1851691c24a6ULL, 0x5e7cafeacc133575ULL,
+ 0xfa4e4cc89fa5f264ULL, 0x5a5f9f481e2b7d24ULL, 0x484c47ab18d764dbULL,
+ 0x400a27f2a1a7f479ULL, 0xaeeb9b2a83da7315ULL, 0x721c626879869734ULL,
+ 0x042330a2d2384851ULL, 0x85f672fd3765aff0ULL, 0xba446b3a3e02061dULL,
+ 0x73dd6ecec3888567ULL, 0xffac70ccf793a866ULL, 0xdfa9edb5294ed2d4ULL,
+ 0x6c6aea7014325638ULL, 0x834a5a0e8c41c307ULL, 0xcdba35562fb2cb2bULL,
+ 0x0ad97808d06cb404ULL, 0x0f3b440cb85aee06ULL, 0xe5f9c876481f213bULL,
+ 0x98deee1289c35809ULL, 0x59018bbfcd394bd1ULL, 0xe01bf47220297b39ULL,
+ 0xde68e1139340c087ULL, 0x9fa3ca4788e926adULL, 0xbb85679c840c144eULL,
+ 0x53d8f3b71d55ffd5ULL, 0x0da45c5dd146caa0ULL, 0x6f34fe87c72060cdULL,
+ 0x57fbc315cf6db784ULL, 0xcee421a1fca0fddeULL, 0x3d2d0196607b8d4bULL,
+ 0x642c8a29ad42c69aULL, 0x14aff010bdd87508ULL, 0xac74837beac657b3ULL,
+ 0x3216459ad821634dULL, 0x3fb219c70967a9edULL, 0x06bc28f3bb246cf7ULL,
+ 0xf2082c9126d562c6ULL, 0x66b39278c45ee23cULL, 0xbd394f6f3f2878b9ULL,
+ 0xfd33689d9e8f8cc0ULL, 0x37f4799eb017394fULL, 0x108cc0b26fe03d59ULL,
+ 0xda4bd1b1417888d6ULL, 0xb09d1332ee6eb219ULL, 0x2f3ed975668794b4ULL,
+ 0x58c0871977375982ULL, 0x7561463d78ace990ULL, 0x09876cff037e82f1ULL,
+ 0x7fb83e35a8c05d94ULL, 0x26b9b58a65f91645ULL, 0xef20b07e9873953fULL,
+ 0x3148516d0b3355b8ULL, 0x41cb2b541ba9e62aULL, 0x790416c613e43163ULL,
+ 0xa011d380818e8f40ULL, 0x3a5025c36151f3efULL, 0xd57095bdf92266d0ULL,
+ 0x498d4b0da2d97688ULL, 0x8b0c3a57353153a5ULL, 0x21c491df64d368e1ULL,
+ 0x8f2f0af5e7091bf4ULL, 0x2da1c1240f9bb012ULL, 0xc43d59a92ccc49daULL,
+ 0xbfa6573e56345c1fULL, 0x828b56a8364fd154ULL, 0x9a41f643e0df7cafULL,
+ 0xbcf843c985266aeaULL, 0x2b1de9d7b4bfdce5ULL, 0x20059d79dedd7ab2ULL,
+ 0x6dabe6d6ae3c446bULL, 0x45e81bf6c991ae7bULL, 0x6351ae7cac68b83eULL,
+ 0xa432e32253b6c711ULL, 0xd092a9b991143cd2ULL, 0xcac711032e98b58fULL,
+ 0xd8d4c9e02864ac70ULL, 0xc5fc550f96c25b89ULL, 0xd7ef8dec903e4276ULL,
+ 0x67729ede7e50f06fULL, 0xeac28c7af045cf3dULL, 0xb15c1f945460a04aULL,
+ 0x9cfddeb05bfb1058ULL, 0x93c69abce3a1fe5eULL, 0xeb0380dc4a4bdd6eULL,
+ 0xd20db1e8f8081874ULL, 0x229a8528b7c15e14ULL, 0x44291750739fbc28ULL,
+ 0xd3ccbd4e42060a27ULL, 0xf62b1c33f4ed2a97ULL, 0x86a8660ae4779905ULL,
+ 0xd62e814a2a305025ULL, 0x477703a7a08d8addULL, 0x7b9b0e977af815c5ULL,
+ 0x78c51a60a9ea2330ULL, 0xa6adfb733aaae3b7ULL, 0x97e5aa1e3199b60fULL,
+ 0x0000000000000000ULL, 0xf4b404629df10e31ULL, 0x5564db44a6719322ULL,
+ 0x9207961a59afec0dULL, 0x9624a6b88b97a45cULL, 0x363575380a192b1cULL,
+ 0x2c60cd82b595a241ULL, 0x7d272664c1dc7932ULL, 0x7142769faa94a1c1ULL,
+ 0xa1d0df263b809d13ULL, 0x1630e841d4c451aeULL, 0xc1df65ad44fa13d8ULL,
+ 0x13d2d445bcf20bacULL, 0xd915c546926abe23ULL, 0x38cf3d92084dd749ULL,
+ 0xe766d0272103059dULL, 0xc7634d5effde7f2fULL, 0x077d2455012a7ea4ULL,
+ 0xedbfa82ff16fb199ULL, 0xaf2a978c39d46146ULL, 0x42953fa3c8bbd0dfULL,
+ 0xcb061da59496a7dcULL, 0x25e7a17db6eb20b0ULL, 0x34aa6d6963050fbaULL,
+ 0xa76cf7d580a4f1e4ULL, 0xf7ea10954ee338c4ULL, 0xfcf2643b24819e93ULL,
+ 0xcf252d0746aeef8dULL, 0x4ef06f58a3f3082cULL, 0x563acfb37563a5d7ULL,
+ 0x5086e740ce47c920ULL, 0x2982f186dda3f843ULL, 0x87696aac5e798b56ULL,
+ 0x5d22bb1d1f010380ULL, 0x035e14f7d31236f5ULL, 0x3cec0d30da759f18ULL,
+ 0xf3c920379cdb7095ULL, 0xb8db736b571e22bbULL, 0xdd36f5e44052f672ULL,
+ 0xaac8ab8851e23b44ULL, 0xa857b3d938fe1fe2ULL, 0x17f1e4e76eca43fdULL,
+ 0xec7ea4894b61a3caULL, 0x9e62c6e132e734feULL, 0xd4b1991b432c7483ULL,
+ 0x6ad6c283af163acfULL, 0x1ce9904904a8e5aaULL, 0x5fbda34c761d2726ULL,
+ 0xf910583f4cb7c491ULL, 0xc6a241f845d06d7cULL, 0x4f3163fe19fd1a7fULL,
+ 0xe99c988d2357f9c8ULL, 0x8eee06535d0709a7ULL, 0x0efa48aa0254fc55ULL,
+ 0xb4be23903c56fa48ULL, 0x763f52caabbedf65ULL, 0xeee1bcd8227d876cULL,
+ 0xe345e085f33b4dccULL, 0x3e731561b369bbbeULL, 0x2843fd2067adea10ULL,
+ 0x2adce5710eb1ceb6ULL, 0xb7e03767ef44ccbdULL, 0x8db012a48e153f52ULL,
+ 0x61ceb62dc5749c98ULL, 0xe85d942b9959eb9bULL, 0x4c6f7709caef2c8aULL,
+ 0x84377e5b8d6bbda3ULL, 0x30895dcbb13d47ebULL, 0x74a04a9bc2a2fbc3ULL,
+ 0x6b17ce251518289cULL, 0xe438c4d0f2113368ULL, 0x1fb784bed7bad35fULL,
+ 0x9b80fae55ad16efcULL, 0x77fe5e6c11b0cd36ULL, 0xc858095247849129ULL,
+ 0x08466059b97090a2ULL, 0x01c10ca6ba0e1253ULL, 0x6988d6747c040c3aULL,
+ 0x6849dad2c60a1e69ULL, 0x5147ebe67449db73ULL, 0xc99905f4fd8a837aULL,
+ 0x991fe2b433cd4a5aULL, 0xf09734c04fc94660ULL, 0xa28ecbd1e892abe6ULL,
+ 0xf1563866f5c75433ULL, 0x4dae7baf70e13ed9ULL, 0x7ce62ac27bd26b61ULL,
+ 0x70837a39109ab392ULL, 0x90988e4b30b3c8abULL, 0xb2020b63877296bfULL,
+ 0x156efcb607d6675bULL
+ }, {
+ 0xe63f55ce97c331d0ULL, 0x25b506b0015bba16ULL, 0xc8706e29e6ad9ba8ULL,
+ 0x5b43d3775d521f6aULL, 0x0bfa3d577035106eULL, 0xab95fc172afb0e66ULL,
+ 0xf64b63979e7a3276ULL, 0xf58b4562649dad4bULL, 0x48f7c3dbae0c83f1ULL,
+ 0xff31916642f5c8c5ULL, 0xcbb048dc1c4a0495ULL, 0x66b8f83cdf622989ULL,
+ 0x35c130e908e2b9b0ULL, 0x7c761a61f0b34fa1ULL, 0x3601161cf205268dULL,
+ 0x9e54ccfe2219b7d6ULL, 0x8b7d90a538940837ULL, 0x9cd403588ea35d0bULL,
+ 0xbc3c6fea9ccc5b5aULL, 0xe5ff733b6d24aeedULL, 0xceed22de0f7eb8d2ULL,
+ 0xec8581cab1ab545eULL, 0xb96105e88ff8e71dULL, 0x8ca03501871a5eadULL,
+ 0x76ccce65d6db2a2fULL, 0x5883f582a7b58057ULL, 0x3f7be4ed2e8adc3eULL,
+ 0x0fe7be06355cd9c9ULL, 0xee054e6c1d11be83ULL, 0x1074365909b903a6ULL,
+ 0x5dde9f80b4813c10ULL, 0x4a770c7d02b6692cULL, 0x5379c8d5d7809039ULL,
+ 0xb4067448161ed409ULL, 0x5f5e5026183bd6cdULL, 0xe898029bf4c29df9ULL,
+ 0x7fb63c940a54d09cULL, 0xc5171f897f4ba8bcULL, 0xa6f28db7b31d3d72ULL,
+ 0x2e4f3be7716eaa78ULL, 0x0d6771a099e63314ULL, 0x82076254e41bf284ULL,
+ 0x2f0fd2b42733df98ULL, 0x5c9e76d3e2dc49f0ULL, 0x7aeb569619606cdbULL,
+ 0x83478b07b2468764ULL, 0xcfadcb8d5923cd32ULL, 0x85dac7f05b95a41eULL,
+ 0xb5469d1b4043a1e9ULL, 0xb821ecbbd9a592fdULL, 0x1b8e0b0e798c13c8ULL,
+ 0x62a57b6d9a0be02eULL, 0xfcf1b793b81257f8ULL, 0x9d94ea0bd8fe28ebULL,
+ 0x4cea408aeb654a56ULL, 0x23284a47e888996cULL, 0x2d8f1d128b893545ULL,
+ 0xf4cbac3132c0d8abULL, 0xbd7c86b9ca912ebaULL, 0x3a268eef3dbe6079ULL,
+ 0xf0d62f6077a9110cULL, 0x2735c916ade150cbULL, 0x89fd5f03942ee2eaULL,
+ 0x1acee25d2fd16628ULL, 0x90f39bab41181bffULL, 0x430dfe8cde39939fULL,
+ 0xf70b8ac4c8274796ULL, 0x1c53aeaac6024552ULL, 0x13b410acf35e9c9bULL,
+ 0xa532ab4249faa24fULL, 0x2b1251e5625a163fULL, 0xd7e3e676da4841c7ULL,
+ 0xa7b264e4e5404892ULL, 0xda8497d643ae72d3ULL, 0x861ae105a1723b23ULL,
+ 0x38a6414991048aa4ULL, 0x6578dec92585b6b4ULL, 0x0280cfa6acbaeaddULL,
+ 0x88bdb650c273970aULL, 0x9333bd5ebbff84c2ULL, 0x4e6a8f2c47dfa08bULL,
+ 0x321c954db76cef2aULL, 0x418d312a72837942ULL, 0xb29b38bfffcdf773ULL,
+ 0x6c022c38f90a4c07ULL, 0x5a033a240b0f6a8aULL, 0x1f93885f3ce5da6fULL,
+ 0xc38a537e96988bc6ULL, 0x39e6a81ac759ff44ULL, 0x29929e43cee0fce2ULL,
+ 0x40cdd87924de0ca2ULL, 0xe9d8ebc8a29fe819ULL, 0x0c2798f3cfbb46f4ULL,
+ 0x55e484223e53b343ULL, 0x4650948ecd0d2fd8ULL, 0x20e86cb2126f0651ULL,
+ 0x6d42c56baf5739e7ULL, 0xa06fc1405ace1e08ULL, 0x7babbfc54f3d193bULL,
+ 0x424d17df8864e67fULL, 0xd8045870ef14980eULL, 0xc6d7397c85ac3781ULL,
+ 0x21a885e1443273b1ULL, 0x67f8116f893f5c69ULL, 0x24f5efe35706cff6ULL,
+ 0xd56329d076f2ab1aULL, 0x5e1eb9754e66a32dULL, 0x28d2771098bd8902ULL,
+ 0x8f6013f47dfdc190ULL, 0x17a993fdb637553cULL, 0xe0a219397e1012aaULL,
+ 0x786b9930b5da8606ULL, 0x6e82e39e55b0a6daULL, 0x875a0856f72f4ec3ULL,
+ 0x3741ff4fa458536dULL, 0xac4859b3957558fcULL, 0x7ef6d5c75c09a57cULL,
+ 0xc04a758b6c7f14fbULL, 0xf9acdd91ab26ebbfULL, 0x7391a467c5ef9668ULL,
+ 0x335c7c1ee1319acaULL, 0xa91533b18641e4bbULL, 0xe4bf9a683b79db0dULL,
+ 0x8e20faa72ba0b470ULL, 0x51f907737b3a7ae4ULL, 0x2268a314bed5ec8cULL,
+ 0xd944b123b949edeeULL, 0x31dcb3b84d8b7017ULL, 0xd3fe65279f218860ULL,
+ 0x097af2f1dc8ffab3ULL, 0x9b09a6fc312d0b91ULL, 0xcc6ded78a3c4520fULL,
+ 0x3481d9ba5ebfcc50ULL, 0x4f2a667f1182d56bULL, 0xdfd9fdd4509ace94ULL,
+ 0x26752045fbbc252bULL, 0xbffc491f662bc467ULL, 0xdd593272fc202449ULL,
+ 0x3cbbc218d46d4303ULL, 0x91b372f817456e1fULL, 0x681faf69bc6385a0ULL,
+ 0xb686bbeebaa43ed4ULL, 0x1469b5084cd0ca01ULL, 0x98c98009cbca94acULL,
+ 0x6438379a73d8c354ULL, 0xc2caba2dc0c5fe26ULL, 0x3e3b0dbe78d7a9deULL,
+ 0x50b9ee202d670f04ULL, 0x4590b27b37eab0e5ULL, 0x6025b4cb36b10af3ULL,
+ 0xfb2c1237079c0162ULL, 0xa12f28130c936be8ULL, 0x4b37e52e54eb1cccULL,
+ 0x083a1ba28ad28f53ULL, 0xc10a9cd83a22611bULL, 0x9f1425ad7444c236ULL,
+ 0x069d4cf7e9d3237aULL, 0xedc56899e7f621beULL, 0x778c273680865fcfULL,
+ 0x309c5aeb1bd605f7ULL, 0x8de0dc52d1472b4dULL, 0xf8ec34c2fd7b9e5fULL,
+ 0xea18cd3d58787724ULL, 0xaad515447ca67b86ULL, 0x9989695a9d97e14cULL,
+ 0x0000000000000000ULL, 0xf196c63321f464ecULL, 0x71116bc169557cb5ULL,
+ 0xaf887f466f92c7c1ULL, 0x972e3e0ffe964d65ULL, 0x190ec4a8d536f915ULL,
+ 0x95aef1a9522ca7b8ULL, 0xdc19db21aa7d51a9ULL, 0x94ee18fa0471d258ULL,
+ 0x8087adf248a11859ULL, 0xc457f6da2916dd5cULL, 0xfa6cfb6451c17482ULL,
+ 0xf256e0c6db13fbd1ULL, 0x6a9f60cf10d96f7dULL, 0x4daaa9d9bd383fb6ULL,
+ 0x03c026f5fae79f3dULL, 0xde99148706c7bb74ULL, 0x2a52b8b6340763dfULL,
+ 0x6fc20acd03edd33aULL, 0xd423c08320afdefaULL, 0xbbe1ca4e23420dc0ULL,
+ 0x966ed75ca8cb3885ULL, 0xeb58246e0e2502c4ULL, 0x055d6a021334bc47ULL,
+ 0xa47242111fa7d7afULL, 0xe3623fcc84f78d97ULL, 0x81c744a11efc6db9ULL,
+ 0xaec8961539cfb221ULL, 0xf31609958d4e8e31ULL, 0x63e5923ecc5695ceULL,
+ 0x47107ddd9b505a38ULL, 0xa3afe7b5a0298135ULL, 0x792b7063e387f3e6ULL,
+ 0x0140e953565d75e0ULL, 0x12f4f9ffa503e97bULL, 0x750ce8902c3cb512ULL,
+ 0xdbc47e8515f30733ULL, 0x1ed3610c6ab8af8fULL, 0x5239218681dde5d9ULL,
+ 0xe222d69fd2aaf877ULL, 0xfe71783514a8bd25ULL, 0xcaf0a18f4a177175ULL,
+ 0x61655d9860ec7f13ULL, 0xe77fbc9dc19e4430ULL, 0x2ccff441ddd440a5ULL,
+ 0x16e97aaee06a20dcULL, 0xa855dae2d01c915bULL, 0x1d1347f9905f30b2ULL,
+ 0xb7c652bdecf94b34ULL, 0xd03e43d265c6175dULL, 0xfdb15ec0ee4f2218ULL,
+ 0x57644b8492e9599eULL, 0x07dda5a4bf8e569aULL, 0x54a46d71680ec6a3ULL,
+ 0x5624a2d7c4b42c7eULL, 0xbebca04c3076b187ULL, 0x7d36f332a6ee3a41ULL,
+ 0x3b6667bc6be31599ULL, 0x695f463aea3ef040ULL, 0xad08b0e0c3282d1cULL,
+ 0xb15b1e4a052a684eULL, 0x44d05b2861b7c505ULL, 0x15295c5b1a8dbfe1ULL,
+ 0x744c01c37a61c0f2ULL, 0x59c31cd1f1e8f5b7ULL, 0xef45a73f4b4ccb63ULL,
+ 0x6bdf899c46841a9dULL, 0x3dfb2b4b823036e3ULL, 0xa2ef0ee6f674f4d5ULL,
+ 0x184e2dfb836b8cf5ULL, 0x1134df0a5fe47646ULL, 0xbaa1231d751f7820ULL,
+ 0xd17eaa81339b62bdULL, 0xb01bf71953771daeULL, 0x849a2ea30dc8d1feULL,
+ 0x705182923f080955ULL, 0x0ea757556301ac29ULL, 0x041d83514569c9a7ULL,
+ 0x0abad4042668658eULL, 0x49b72a88f851f611ULL, 0x8a3d79f66ec97dd7ULL,
+ 0xcd2d042bf59927efULL, 0xc930877ab0f0ee48ULL, 0x9273540deda2f122ULL,
+ 0xc797d02fd3f14261ULL, 0xe1e2f06a284d674aULL, 0xd2be8c74c97cfd80ULL,
+ 0x9a494faf67707e71ULL, 0xb3dbd1eca9908293ULL, 0x72d14d3493b2e388ULL,
+ 0xd6a30f258c153427ULL
+ }
+}; /* Ax */
+
+static void streebog_xor(const struct streebog_uint512 *x,
+ const struct streebog_uint512 *y,
+ struct streebog_uint512 *z)
+{
+ z->qword[0] = x->qword[0] ^ y->qword[0];
+ z->qword[1] = x->qword[1] ^ y->qword[1];
+ z->qword[2] = x->qword[2] ^ y->qword[2];
+ z->qword[3] = x->qword[3] ^ y->qword[3];
+ z->qword[4] = x->qword[4] ^ y->qword[4];
+ z->qword[5] = x->qword[5] ^ y->qword[5];
+ z->qword[6] = x->qword[6] ^ y->qword[6];
+ z->qword[7] = x->qword[7] ^ y->qword[7];
+}
+
+static void streebog_xlps(const struct streebog_uint512 *x,
+ const struct streebog_uint512 *y,
+ struct streebog_uint512 *data)
+{
+ u64 r0, r1, r2, r3, r4, r5, r6, r7;
+ int i;
+
+ r0 = le64_to_cpu(x->qword[0] ^ y->qword[0]);
+ r1 = le64_to_cpu(x->qword[1] ^ y->qword[1]);
+ r2 = le64_to_cpu(x->qword[2] ^ y->qword[2]);
+ r3 = le64_to_cpu(x->qword[3] ^ y->qword[3]);
+ r4 = le64_to_cpu(x->qword[4] ^ y->qword[4]);
+ r5 = le64_to_cpu(x->qword[5] ^ y->qword[5]);
+ r6 = le64_to_cpu(x->qword[6] ^ y->qword[6]);
+ r7 = le64_to_cpu(x->qword[7] ^ y->qword[7]);
+
+ for (i = 0; i <= 7; i++) {
+ data->qword[i] = cpu_to_le64(Ax[0][r0 & 0xFF]);
+ data->qword[i] ^= cpu_to_le64(Ax[1][r1 & 0xFF]);
+ data->qword[i] ^= cpu_to_le64(Ax[2][r2 & 0xFF]);
+ data->qword[i] ^= cpu_to_le64(Ax[3][r3 & 0xFF]);
+ data->qword[i] ^= cpu_to_le64(Ax[4][r4 & 0xFF]);
+ data->qword[i] ^= cpu_to_le64(Ax[5][r5 & 0xFF]);
+ data->qword[i] ^= cpu_to_le64(Ax[6][r6 & 0xFF]);
+ data->qword[i] ^= cpu_to_le64(Ax[7][r7 & 0xFF]);
+ r0 >>= 8;
+ r1 >>= 8;
+ r2 >>= 8;
+ r3 >>= 8;
+ r4 >>= 8;
+ r5 >>= 8;
+ r6 >>= 8;
+ r7 >>= 8;
+ }
+}
+
+static void streebog_round(int i, struct streebog_uint512 *Ki,
+ struct streebog_uint512 *data)
+{
+ streebog_xlps(Ki, &C[i], Ki);
+ streebog_xlps(Ki, data, data);
+}
+
+static int streebog_init(struct shash_desc *desc)
+{
+ struct streebog_state *ctx = shash_desc_ctx(desc);
+ unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
+ unsigned int i;
+
+ memset(ctx, 0, sizeof(struct streebog_state));
+ for (i = 0; i < 8; i++) {
+ if (digest_size == STREEBOG256_DIGEST_SIZE)
+ ctx->h.qword[i] = 0x0101010101010101ULL;
+ }
+ return 0;
+}
+
+static void streebog_pad(struct streebog_state *ctx)
+{
+ if (ctx->fillsize >= STREEBOG_BLOCK_SIZE)
+ return;
+
+ memset(ctx->buffer + ctx->fillsize, 0,
+ sizeof(ctx->buffer) - ctx->fillsize);
+
+ ctx->buffer[ctx->fillsize] = 1;
+}
+
+static void streebog_add512(const struct streebog_uint512 *x,
+ const struct streebog_uint512 *y,
+ struct streebog_uint512 *r)
+{
+ u64 carry = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ const u64 left = le64_to_cpu(x->qword[i]);
+ u64 sum;
+
+ sum = left + le64_to_cpu(y->qword[i]) + carry;
+ if (sum != left)
+ carry = (sum < left);
+ r->qword[i] = cpu_to_le64(sum);
+ }
+}
+
+static void streebog_g(struct streebog_uint512 *h,
+ const struct streebog_uint512 *N,
+ const u8 *m)
+{
+ struct streebog_uint512 Ki, data;
+ unsigned int i;
+
+ streebog_xlps(h, N, &data);
+
+ /* Starting E() */
+ Ki = data;
+ streebog_xlps(&Ki, (const struct streebog_uint512 *)&m[0], &data);
+
+ for (i = 0; i < 11; i++)
+ streebog_round(i, &Ki, &data);
+
+ streebog_xlps(&Ki, &C[11], &Ki);
+ streebog_xor(&Ki, &data, &data);
+ /* E() done */
+
+ streebog_xor(&data, h, &data);
+ streebog_xor(&data, (const struct streebog_uint512 *)&m[0], h);
+}
+
+static void streebog_stage2(struct streebog_state *ctx, const u8 *data)
+{
+ streebog_g(&ctx->h, &ctx->N, data);
+
+ streebog_add512(&ctx->N, &buffer512, &ctx->N);
+ streebog_add512(&ctx->Sigma, (const struct streebog_uint512 *)data,
+ &ctx->Sigma);
+}
+
+static void streebog_stage3(struct streebog_state *ctx)
+{
+ struct streebog_uint512 buf = { { 0 } };
+
+ buf.qword[0] = cpu_to_le64(ctx->fillsize << 3);
+ streebog_pad(ctx);
+
+ streebog_g(&ctx->h, &ctx->N, (const u8 *)&ctx->buffer);
+ streebog_add512(&ctx->N, &buf, &ctx->N);
+ streebog_add512(&ctx->Sigma,
+ (const struct streebog_uint512 *)&ctx->buffer[0],
+ &ctx->Sigma);
+ streebog_g(&ctx->h, &buffer0, (const u8 *)&ctx->N);
+ streebog_g(&ctx->h, &buffer0, (const u8 *)&ctx->Sigma);
+ memcpy(&ctx->hash, &ctx->h, sizeof(struct streebog_uint512));
+}
+
+static int streebog_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct streebog_state *ctx = shash_desc_ctx(desc);
+ size_t chunksize;
+
+ if (ctx->fillsize) {
+ chunksize = STREEBOG_BLOCK_SIZE - ctx->fillsize;
+ if (chunksize > len)
+ chunksize = len;
+ memcpy(&ctx->buffer[ctx->fillsize], data, chunksize);
+ ctx->fillsize += chunksize;
+ len -= chunksize;
+ data += chunksize;
+
+ if (ctx->fillsize == STREEBOG_BLOCK_SIZE) {
+ streebog_stage2(ctx, ctx->buffer);
+ ctx->fillsize = 0;
+ }
+ }
+
+ while (len >= STREEBOG_BLOCK_SIZE) {
+ streebog_stage2(ctx, data);
+ data += STREEBOG_BLOCK_SIZE;
+ len -= STREEBOG_BLOCK_SIZE;
+ }
+
+ if (len) {
+ memcpy(&ctx->buffer, data, len);
+ ctx->fillsize = len;
+ }
+ return 0;
+}
+
+static int streebog_final(struct shash_desc *desc, u8 *digest)
+{
+ struct streebog_state *ctx = shash_desc_ctx(desc);
+
+ streebog_stage3(ctx);
+ ctx->fillsize = 0;
+ if (crypto_shash_digestsize(desc->tfm) == STREEBOG256_DIGEST_SIZE)
+ memcpy(digest, &ctx->hash.qword[4], STREEBOG256_DIGEST_SIZE);
+ else
+ memcpy(digest, &ctx->hash.qword[0], STREEBOG512_DIGEST_SIZE);
+ return 0;
+}
+
+static struct shash_alg algs[2] = { {
+ .digestsize = STREEBOG256_DIGEST_SIZE,
+ .init = streebog_init,
+ .update = streebog_update,
+ .final = streebog_final,
+ .descsize = sizeof(struct streebog_state),
+ .base = {
+ .cra_name = "streebog256",
+ .cra_driver_name = "streebog256-generic",
+ .cra_blocksize = STREEBOG_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ },
+}, {
+ .digestsize = STREEBOG512_DIGEST_SIZE,
+ .init = streebog_init,
+ .update = streebog_update,
+ .final = streebog_final,
+ .descsize = sizeof(struct streebog_state),
+ .base = {
+ .cra_name = "streebog512",
+ .cra_driver_name = "streebog512-generic",
+ .cra_blocksize = STREEBOG_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+} };
+
+static int __init streebog_mod_init(void)
+{
+ return crypto_register_shashes(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit streebog_mod_fini(void)
+{
+ crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
+}
+
+module_init(streebog_mod_init);
+module_exit(streebog_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Vitaly Chikunov <vt@altlinux.org>");
+MODULE_DESCRIPTION("Streebog Hash Function");
+
+MODULE_ALIAS_CRYPTO("streebog256");
+MODULE_ALIAS_CRYPTO("streebog256-generic");
+MODULE_ALIAS_CRYPTO("streebog512");
+MODULE_ALIAS_CRYPTO("streebog512-generic");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index c20c9f5c18f2..e7fb87e114a5 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -76,10 +76,12 @@ static char *check[] = {
"cast6", "arc4", "michael_mic", "deflate", "crc32c", "tea", "xtea",
"khazad", "wp512", "wp384", "wp256", "tnepres", "xeta", "fcrypt",
"camellia", "seed", "salsa20", "rmd128", "rmd160", "rmd256", "rmd320",
- "lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512", NULL
+ "lzo", "cts", "sha3-224", "sha3-256", "sha3-384", "sha3-512",
+ "streebog256", "streebog512",
+ NULL
};
-static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 };
+static u32 block_sizes[] = { 16, 64, 256, 1024, 1472, 8192, 0 };
static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 };
#define XBUFSIZE 8
@@ -1736,6 +1738,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("ctr(aes)");
ret += tcrypt_test("rfc3686(ctr(aes))");
ret += tcrypt_test("ofb(aes)");
+ ret += tcrypt_test("cfb(aes)");
break;
case 11:
@@ -1913,6 +1916,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("sm3");
break;
+ case 53:
+ ret += tcrypt_test("streebog256");
+ break;
+
+ case 54:
+ ret += tcrypt_test("streebog512");
+ break;
+
case 100:
ret += tcrypt_test("hmac(md5)");
break;
@@ -1969,6 +1980,14 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("hmac(sha3-512)");
break;
+ case 115:
+ ret += tcrypt_test("hmac(streebog256)");
+ break;
+
+ case 116:
+ ret += tcrypt_test("hmac(streebog512)");
+ break;
+
case 150:
ret += tcrypt_test("ansi_cprng");
break;
@@ -2060,6 +2079,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
speed_template_16_24_32);
test_cipher_speed("ctr(aes)", DECRYPT, sec, NULL, 0,
speed_template_16_24_32);
+ test_cipher_speed("cfb(aes)", ENCRYPT, sec, NULL, 0,
+ speed_template_16_24_32);
+ test_cipher_speed("cfb(aes)", DECRYPT, sec, NULL, 0,
+ speed_template_16_24_32);
break;
case 201:
@@ -2297,6 +2320,18 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
speed_template_16);
break;
+
+ case 219:
+ test_cipher_speed("adiantum(xchacha12,aes)", ENCRYPT, sec, NULL,
+ 0, speed_template_32);
+ test_cipher_speed("adiantum(xchacha12,aes)", DECRYPT, sec, NULL,
+ 0, speed_template_32);
+ test_cipher_speed("adiantum(xchacha20,aes)", ENCRYPT, sec, NULL,
+ 0, speed_template_32);
+ test_cipher_speed("adiantum(xchacha20,aes)", DECRYPT, sec, NULL,
+ 0, speed_template_32);
+ break;
+
case 300:
if (alg) {
test_hash_speed(alg, sec, generic_hash_speed_template);
@@ -2407,6 +2442,16 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
test_hash_speed("sm3", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
/* fall through */
+ case 327:
+ test_hash_speed("streebog256", sec,
+ generic_hash_speed_template);
+ if (mode > 300 && mode < 400) break;
+ /* fall through */
+ case 328:
+ test_hash_speed("streebog512", sec,
+ generic_hash_speed_template);
+ if (mode > 300 && mode < 400) break;
+ /* fall through */
case 399:
break;
@@ -2520,6 +2565,16 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
num_mb);
if (mode > 400 && mode < 500) break;
/* fall through */
+ case 426:
+ test_mb_ahash_speed("streebog256", sec,
+ generic_hash_speed_template, num_mb);
+ if (mode > 400 && mode < 500) break;
+ /* fall through */
+ case 427:
+ test_mb_ahash_speed("streebog512", sec,
+ generic_hash_speed_template, num_mb);
+ if (mode > 400 && mode < 500) break;
+ /* fall through */
case 499:
break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index b1f79c6bf409..0f684a414acb 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2404,6 +2404,18 @@ static int alg_test_null(const struct alg_test_desc *desc,
/* Please keep this list sorted by algorithm name. */
static const struct alg_test_desc alg_test_descs[] = {
{
+ .alg = "adiantum(xchacha12,aes)",
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(adiantum_xchacha12_aes_tv_template)
+ },
+ }, {
+ .alg = "adiantum(xchacha20,aes)",
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(adiantum_xchacha20_aes_tv_template)
+ },
+ }, {
.alg = "aegis128",
.test = alg_test_aead,
.suite = {
@@ -2691,6 +2703,13 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+ .alg = "cfb(aes)",
+ .test = alg_test_skcipher,
+ .fips_allowed = 1,
+ .suite = {
+ .cipher = __VECS(aes_cfb_tv_template)
+ },
+ }, {
.alg = "chacha20",
.test = alg_test_skcipher,
.suite = {
@@ -2805,6 +2824,7 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "cts(cbc(aes))",
.test = alg_test_skcipher,
+ .fips_allowed = 1,
.suite = {
.cipher = __VECS(cts_mode_tv_template)
}
@@ -3185,6 +3205,18 @@ static const struct alg_test_desc alg_test_descs[] = {
.hash = __VECS(hmac_sha512_tv_template)
}
}, {
+ .alg = "hmac(streebog256)",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(hmac_streebog256_tv_template)
+ }
+ }, {
+ .alg = "hmac(streebog512)",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(hmac_streebog512_tv_template)
+ }
+ }, {
.alg = "jitterentropy_rng",
.fips_allowed = 1,
.test = alg_test_null,
@@ -3292,6 +3324,12 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+ .alg = "nhpoly1305",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(nhpoly1305_tv_template)
+ }
+ }, {
.alg = "ofb(aes)",
.test = alg_test_skcipher,
.fips_allowed = 1,
@@ -3497,6 +3535,18 @@ static const struct alg_test_desc alg_test_descs[] = {
.hash = __VECS(sm3_tv_template)
}
}, {
+ .alg = "streebog256",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(streebog256_tv_template)
+ }
+ }, {
+ .alg = "streebog512",
+ .test = alg_test_hash,
+ .suite = {
+ .hash = __VECS(streebog512_tv_template)
+ }
+ }, {
.alg = "tgr128",
.test = alg_test_hash,
.suite = {
@@ -3545,6 +3595,18 @@ static const struct alg_test_desc alg_test_descs[] = {
.hash = __VECS(aes_xcbc128_tv_template)
}
}, {
+ .alg = "xchacha12",
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(xchacha12_tv_template)
+ },
+ }, {
+ .alg = "xchacha20",
+ .test = alg_test_skcipher,
+ .suite = {
+ .cipher = __VECS(xchacha20_tv_template)
+ },
+ }, {
.alg = "xts(aes)",
.test = alg_test_skcipher,
.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 1fe7b97ba03f..e8f47d7b92cd 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -27,7 +27,7 @@
#define MAX_DIGEST_SIZE 64
#define MAX_TAP 8
-#define MAX_KEYLEN 160
+#define MAX_KEYLEN 1088
#define MAX_IVLEN 32
struct hash_testvec {
@@ -35,10 +35,10 @@ struct hash_testvec {
const char *key;
const char *plaintext;
const char *digest;
- unsigned char tap[MAX_TAP];
+ unsigned short tap[MAX_TAP];
+ unsigned short np;
unsigned short psize;
- unsigned char np;
- unsigned char ksize;
+ unsigned short ksize;
};
/*
@@ -2307,6 +2307,122 @@ static const struct hash_testvec crct10dif_tv_template[] = {
}
};
+/*
+ * Streebog test vectors from RFC 6986 and GOST R 34.11-2012
+ */
+static const struct hash_testvec streebog256_tv_template[] = {
+ { /* M1 */
+ .plaintext = "012345678901234567890123456789012345678901234567890123456789012",
+ .psize = 63,
+ .digest =
+ "\x9d\x15\x1e\xef\xd8\x59\x0b\x89"
+ "\xda\xa6\xba\x6c\xb7\x4a\xf9\x27"
+ "\x5d\xd0\x51\x02\x6b\xb1\x49\xa4"
+ "\x52\xfd\x84\xe5\xe5\x7b\x55\x00",
+ },
+ { /* M2 */
+ .plaintext =
+ "\xd1\xe5\x20\xe2\xe5\xf2\xf0\xe8"
+ "\x2c\x20\xd1\xf2\xf0\xe8\xe1\xee"
+ "\xe6\xe8\x20\xe2\xed\xf3\xf6\xe8"
+ "\x2c\x20\xe2\xe5\xfe\xf2\xfa\x20"
+ "\xf1\x20\xec\xee\xf0\xff\x20\xf1"
+ "\xf2\xf0\xe5\xeb\xe0\xec\xe8\x20"
+ "\xed\xe0\x20\xf5\xf0\xe0\xe1\xf0"
+ "\xfb\xff\x20\xef\xeb\xfa\xea\xfb"
+ "\x20\xc8\xe3\xee\xf0\xe5\xe2\xfb",
+ .psize = 72,
+ .digest =
+ "\x9d\xd2\xfe\x4e\x90\x40\x9e\x5d"
+ "\xa8\x7f\x53\x97\x6d\x74\x05\xb0"
+ "\xc0\xca\xc6\x28\xfc\x66\x9a\x74"
+ "\x1d\x50\x06\x3c\x55\x7e\x8f\x50",
+ },
+};
+
+static const struct hash_testvec streebog512_tv_template[] = {
+ { /* M1 */
+ .plaintext = "012345678901234567890123456789012345678901234567890123456789012",
+ .psize = 63,
+ .digest =
+ "\x1b\x54\xd0\x1a\x4a\xf5\xb9\xd5"
+ "\xcc\x3d\x86\xd6\x8d\x28\x54\x62"
+ "\xb1\x9a\xbc\x24\x75\x22\x2f\x35"
+ "\xc0\x85\x12\x2b\xe4\xba\x1f\xfa"
+ "\x00\xad\x30\xf8\x76\x7b\x3a\x82"
+ "\x38\x4c\x65\x74\xf0\x24\xc3\x11"
+ "\xe2\xa4\x81\x33\x2b\x08\xef\x7f"
+ "\x41\x79\x78\x91\xc1\x64\x6f\x48",
+ },
+ { /* M2 */
+ .plaintext =
+ "\xd1\xe5\x20\xe2\xe5\xf2\xf0\xe8"
+ "\x2c\x20\xd1\xf2\xf0\xe8\xe1\xee"
+ "\xe6\xe8\x20\xe2\xed\xf3\xf6\xe8"
+ "\x2c\x20\xe2\xe5\xfe\xf2\xfa\x20"
+ "\xf1\x20\xec\xee\xf0\xff\x20\xf1"
+ "\xf2\xf0\xe5\xeb\xe0\xec\xe8\x20"
+ "\xed\xe0\x20\xf5\xf0\xe0\xe1\xf0"
+ "\xfb\xff\x20\xef\xeb\xfa\xea\xfb"
+ "\x20\xc8\xe3\xee\xf0\xe5\xe2\xfb",
+ .psize = 72,
+ .digest =
+ "\x1e\x88\xe6\x22\x26\xbf\xca\x6f"
+ "\x99\x94\xf1\xf2\xd5\x15\x69\xe0"
+ "\xda\xf8\x47\x5a\x3b\x0f\xe6\x1a"
+ "\x53\x00\xee\xe4\x6d\x96\x13\x76"
+ "\x03\x5f\xe8\x35\x49\xad\xa2\xb8"
+ "\x62\x0f\xcd\x7c\x49\x6c\xe5\xb3"
+ "\x3f\x0c\xb9\xdd\xdc\x2b\x64\x60"
+ "\x14\x3b\x03\xda\xba\xc9\xfb\x28",
+ },
+};
+
+/*
+ * Two HMAC-Streebog test vectors from RFC 7836 and R 50.1.113-2016 A
+ */
+static const struct hash_testvec hmac_streebog256_tv_template[] = {
+ {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+ .ksize = 32,
+ .plaintext =
+ "\x01\x26\xbd\xb8\x78\x00\xaf\x21"
+ "\x43\x41\x45\x65\x63\x78\x01\x00",
+ .psize = 16,
+ .digest =
+ "\xa1\xaa\x5f\x7d\xe4\x02\xd7\xb3"
+ "\xd3\x23\xf2\x99\x1c\x8d\x45\x34"
+ "\x01\x31\x37\x01\x0a\x83\x75\x4f"
+ "\xd0\xaf\x6d\x7c\xd4\x92\x2e\xd9",
+ },
+};
+
+static const struct hash_testvec hmac_streebog512_tv_template[] = {
+ {
+ .key = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+ "\x10\x11\x12\x13\x14\x15\x16\x17"
+ "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+ .ksize = 32,
+ .plaintext =
+ "\x01\x26\xbd\xb8\x78\x00\xaf\x21"
+ "\x43\x41\x45\x65\x63\x78\x01\x00",
+ .psize = 16,
+ .digest =
+ "\xa5\x9b\xab\x22\xec\xae\x19\xc6"
+ "\x5f\xbd\xe6\xe5\xf4\xe9\xf5\xd8"
+ "\x54\x9d\x31\xf0\x37\xf9\xdf\x9b"
+ "\x90\x55\x00\xe1\x71\x92\x3a\x77"
+ "\x3d\x5f\x15\x30\xf2\xed\x7e\x96"
+ "\x4c\xb2\xee\xdc\x29\xe9\xad\x2f"
+ "\x3a\xfe\x93\xb2\x81\x4f\x79\xf5"
+ "\x00\x0f\xfc\x03\x66\xc2\x51\xe6",
+ },
+};
+
/* Example vectors below taken from
* http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
*
@@ -5593,6 +5709,1238 @@ static const struct hash_testvec poly1305_tv_template[] = {
},
};
+/* NHPoly1305 test vectors from https://github.com/google/adiantum */
+static const struct hash_testvec nhpoly1305_tv_template[] = {
+ {
+ .key = "\xd2\x5d\x4c\xdd\x8d\x2b\x7f\x7a"
+ "\xd9\xbe\x71\xec\xd1\x83\x52\xe3"
+ "\xe1\xad\xd7\x5c\x0a\x75\x9d\xec"
+ "\x1d\x13\x7e\x5d\x71\x07\xc9\xe4"
+ "\x57\x2d\x44\x68\xcf\xd8\xd6\xc5"
+ "\x39\x69\x7d\x32\x75\x51\x4f\x7e"
+ "\xb2\x4c\xc6\x90\x51\x6e\xd9\xd6"
+ "\xa5\x8b\x2d\xf1\x94\xf9\xf7\x5e"
+ "\x2c\x84\x7b\x41\x0f\x88\x50\x89"
+ "\x30\xd9\xa1\x38\x46\x6c\xc0\x4f"
+ "\xe8\xdf\xdc\x66\xab\x24\x43\x41"
+ "\x91\x55\x29\x65\x86\x28\x5e\x45"
+ "\xd5\x2d\xb7\x80\x08\x9a\xc3\xd4"
+ "\x9a\x77\x0a\xd4\xef\x3e\xe6\x3f"
+ "\x6f\x2f\x9b\x3a\x7d\x12\x1e\x80"
+ "\x6c\x44\xa2\x25\xe1\xf6\x60\xe9"
+ "\x0d\xaf\xc5\x3c\xa5\x79\xae\x64"
+ "\xbc\xa0\x39\xa3\x4d\x10\xe5\x4d"
+ "\xd5\xe7\x89\x7a\x13\xee\x06\x78"
+ "\xdc\xa4\xdc\x14\x27\xe6\x49\x38"
+ "\xd0\xe0\x45\x25\x36\xc5\xf4\x79"
+ "\x2e\x9a\x98\x04\xe4\x2b\x46\x52"
+ "\x7c\x33\xca\xe2\x56\x51\x50\xe2"
+ "\xa5\x9a\xae\x18\x6a\x13\xf8\xd2"
+ "\x21\x31\x66\x02\xe2\xda\x8d\x7e"
+ "\x41\x19\xb2\x61\xee\x48\x8f\xf1"
+ "\x65\x24\x2e\x1e\x68\xce\x05\xd9"
+ "\x2a\xcf\xa5\x3a\x57\xdd\x35\x91"
+ "\x93\x01\xca\x95\xfc\x2b\x36\x04"
+ "\xe6\x96\x97\x28\xf6\x31\xfe\xa3"
+ "\x9d\xf6\x6a\x1e\x80\x8d\xdc\xec"
+ "\xaf\x66\x11\x13\x02\x88\xd5\x27"
+ "\x33\xb4\x1a\xcd\xa3\xf6\xde\x31"
+ "\x8e\xc0\x0e\x6c\xd8\x5a\x97\x5e"
+ "\xdd\xfd\x60\x69\x38\x46\x3f\x90"
+ "\x5e\x97\xd3\x32\x76\xc7\x82\x49"
+ "\xfe\xba\x06\x5f\x2f\xa2\xfd\xff"
+ "\x80\x05\x40\xe4\x33\x03\xfb\x10"
+ "\xc0\xde\x65\x8c\xc9\x8d\x3a\x9d"
+ "\xb5\x7b\x36\x4b\xb5\x0c\xcf\x00"
+ "\x9c\x87\xe4\x49\xad\x90\xda\x4a"
+ "\xdd\xbd\xff\xe2\x32\x57\xd6\x78"
+ "\x36\x39\x6c\xd3\x5b\x9b\x88\x59"
+ "\x2d\xf0\x46\xe4\x13\x0e\x2b\x35"
+ "\x0d\x0f\x73\x8a\x4f\x26\x84\x75"
+ "\x88\x3c\xc5\x58\x66\x18\x1a\xb4"
+ "\x64\x51\x34\x27\x1b\xa4\x11\xc9"
+ "\x6d\x91\x8a\xfa\x32\x60\x9d\xd7"
+ "\x87\xe5\xaa\x43\x72\xf8\xda\xd1"
+ "\x48\x44\x13\x61\xdc\x8c\x76\x17"
+ "\x0c\x85\x4e\xf3\xdd\xa2\x42\xd2"
+ "\x74\xc1\x30\x1b\xeb\x35\x31\x29"
+ "\x5b\xd7\x4c\x94\x46\x35\xa1\x23"
+ "\x50\xf2\xa2\x8e\x7e\x4f\x23\x4f"
+ "\x51\xff\xe2\xc9\xa3\x7d\x56\x8b"
+ "\x41\xf2\xd0\xc5\x57\x7e\x59\xac"
+ "\xbb\x65\xf3\xfe\xf7\x17\xef\x63"
+ "\x7c\x6f\x23\xdd\x22\x8e\xed\x84"
+ "\x0e\x3b\x09\xb3\xf3\xf4\x8f\xcd"
+ "\x37\xa8\xe1\xa7\x30\xdb\xb1\xa2"
+ "\x9c\xa2\xdf\x34\x17\x3e\x68\x44"
+ "\xd0\xde\x03\x50\xd1\x48\x6b\x20"
+ "\xe2\x63\x45\xa5\xea\x87\xc2\x42"
+ "\x95\x03\x49\x05\xed\xe0\x90\x29"
+ "\x1a\xb8\xcf\x9b\x43\xcf\x29\x7a"
+ "\x63\x17\x41\x9f\xe0\xc9\x10\xfd"
+ "\x2c\x56\x8c\x08\x55\xb4\xa9\x27"
+ "\x0f\x23\xb1\x05\x6a\x12\x46\xc7"
+ "\xe1\xfe\x28\x93\x93\xd7\x2f\xdc"
+ "\x98\x30\xdb\x75\x8a\xbe\x97\x7a"
+ "\x02\xfb\x8c\xba\xbe\x25\x09\xbe"
+ "\xce\xcb\xa2\xef\x79\x4d\x0e\x9d"
+ "\x1b\x9d\xb6\x39\x34\x38\xfa\x07"
+ "\xec\xe8\xfc\x32\x85\x1d\xf7\x85"
+ "\x63\xc3\x3c\xc0\x02\x75\xd7\x3f"
+ "\xb2\x68\x60\x66\x65\x81\xc6\xb1"
+ "\x42\x65\x4b\x4b\x28\xd7\xc7\xaa"
+ "\x9b\xd2\xdc\x1b\x01\xe0\x26\x39"
+ "\x01\xc1\x52\x14\xd1\x3f\xb7\xe6"
+ "\x61\x41\xc7\x93\xd2\xa2\x67\xc6"
+ "\xf7\x11\xb5\xf5\xea\xdd\x19\xfb"
+ "\x4d\x21\x12\xd6\x7d\xf1\x10\xb0"
+ "\x89\x07\xc7\x5a\x52\x73\x70\x2f"
+ "\x32\xef\x65\x2b\x12\xb2\xf0\xf5"
+ "\x20\xe0\x90\x59\x7e\x64\xf1\x4c"
+ "\x41\xb3\xa5\x91\x08\xe6\x5e\x5f"
+ "\x05\x56\x76\xb4\xb0\xcd\x70\x53"
+ "\x10\x48\x9c\xff\xc2\x69\x55\x24"
+ "\x87\xef\x84\xea\xfb\xa7\xbf\xa0"
+ "\x91\x04\xad\x4f\x8b\x57\x54\x4b"
+ "\xb6\xe9\xd1\xac\x37\x2f\x1d\x2e"
+ "\xab\xa5\xa4\xe8\xff\xfb\xd9\x39"
+ "\x2f\xb7\xac\xd1\xfe\x0b\x9a\x80"
+ "\x0f\xb6\xf4\x36\x39\x90\x51\xe3"
+ "\x0a\x2f\xb6\x45\x76\x89\xcd\x61"
+ "\xfe\x48\x5f\x75\x1d\x13\x00\x62"
+ "\x80\x24\x47\xe7\xbc\x37\xd7\xe3"
+ "\x15\xe8\x68\x22\xaf\x80\x6f\x4b"
+ "\xa8\x9f\x01\x10\x48\x14\xc3\x02"
+ "\x52\xd2\xc7\x75\x9b\x52\x6d\x30"
+ "\xac\x13\x85\xc8\xf7\xa3\x58\x4b"
+ "\x49\xf7\x1c\x45\x55\x8c\x39\x9a"
+ "\x99\x6d\x97\x27\x27\xe6\xab\xdd"
+ "\x2c\x42\x1b\x35\xdd\x9d\x73\xbb"
+ "\x6c\xf3\x64\xf1\xfb\xb9\xf7\xe6"
+ "\x4a\x3c\xc0\x92\xc0\x2e\xb7\x1a"
+ "\xbe\xab\xb3\x5a\xe5\xea\xb1\x48"
+ "\x58\x13\x53\x90\xfd\xc3\x8e\x54"
+ "\xf9\x18\x16\x73\xe8\xcb\x6d\x39"
+ "\x0e\xd7\xe0\xfe\xb6\x9f\x43\x97"
+ "\xe8\xd0\x85\x56\x83\x3e\x98\x68"
+ "\x7f\xbd\x95\xa8\x9a\x61\x21\x8f"
+ "\x06\x98\x34\xa6\xc8\xd6\x1d\xf3"
+ "\x3d\x43\xa4\x9a\x8c\xe5\xd3\x5a"
+ "\x32\xa2\x04\x22\xa4\x19\x1a\x46"
+ "\x42\x7e\x4d\xe5\xe0\xe6\x0e\xca"
+ "\xd5\x58\x9d\x2c\xaf\xda\x33\x5c"
+ "\xb0\x79\x9e\xc9\xfc\xca\xf0\x2f"
+ "\xa8\xb2\x77\xeb\x7a\xa2\xdd\x37"
+ "\x35\x83\x07\xd6\x02\x1a\xb6\x6c"
+ "\x24\xe2\x59\x08\x0e\xfd\x3e\x46"
+ "\xec\x40\x93\xf4\x00\x26\x4f\x2a"
+ "\xff\x47\x2f\xeb\x02\x92\x26\x5b"
+ "\x53\x17\xc2\x8d\x2a\xc7\xa3\x1b"
+ "\xcd\xbc\xa7\xe8\xd1\x76\xe3\x80"
+ "\x21\xca\x5d\x3b\xe4\x9c\x8f\xa9"
+ "\x5b\x7f\x29\x7f\x7c\xd8\xed\x6d"
+ "\x8c\xb2\x86\x85\xe7\x77\xf2\x85"
+ "\xab\x38\xa9\x9d\xc1\x4e\xc5\x64"
+ "\x33\x73\x8b\x59\x03\xad\x05\xdf"
+ "\x25\x98\x31\xde\xef\x13\xf1\x9b"
+ "\x3c\x91\x9d\x7b\xb1\xfa\xe6\xbf"
+ "\x5b\xed\xa5\x55\xe6\xea\x6c\x74"
+ "\xf4\xb9\xe4\x45\x64\x72\x81\xc2"
+ "\x4c\x28\xd4\xcd\xac\xe2\xde\xf9"
+ "\xeb\x5c\xeb\x61\x60\x5a\xe5\x28",
+ .ksize = 1088,
+ .plaintext = "",
+ .psize = 0,
+ .digest = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ }, {
+ .key = "\x29\x21\x43\xcb\xcb\x13\x07\xde"
+ "\xbf\x48\xdf\x8a\x7f\xa2\x84\xde"
+ "\x72\x23\x9d\xf5\xf0\x07\xf2\x4c"
+ "\x20\x3a\x93\xb9\xcd\x5d\xfe\xcb"
+ "\x99\x2c\x2b\x58\xc6\x50\x5f\x94"
+ "\x56\xc3\x7c\x0d\x02\x3f\xb8\x5e"
+ "\x7b\xc0\x6c\x51\x34\x76\xc0\x0e"
+ "\xc6\x22\xc8\x9e\x92\xa0\x21\xc9"
+ "\x85\x5c\x7c\xf8\xe2\x64\x47\xc9"
+ "\xe4\xa2\x57\x93\xf8\xa2\x69\xcd"
+ "\x62\x98\x99\xf4\xd7\x7b\x14\xb1"
+ "\xd8\x05\xff\x04\x15\xc9\xe1\x6e"
+ "\x9b\xe6\x50\x6b\x0b\x3f\x22\x1f"
+ "\x08\xde\x0c\x5b\x08\x7e\xc6\x2f"
+ "\x6c\xed\xd6\xb2\x15\xa4\xb3\xf9"
+ "\xa7\x46\x38\x2a\xea\x69\xa5\xde"
+ "\x02\xc3\x96\x89\x4d\x55\x3b\xed"
+ "\x3d\x3a\x85\x77\xbf\x97\x45\x5c"
+ "\x9e\x02\x69\xe2\x1b\x68\xbe\x96"
+ "\xfb\x64\x6f\x0f\xf6\x06\x40\x67"
+ "\xfa\x04\xe3\x55\xfa\xbe\xa4\x60"
+ "\xef\x21\x66\x97\xe6\x9d\x5c\x1f"
+ "\x62\x37\xaa\x31\xde\xe4\x9c\x28"
+ "\x95\xe0\x22\x86\xf4\x4d\xf3\x07"
+ "\xfd\x5f\x3a\x54\x2c\x51\x80\x71"
+ "\xba\x78\x69\x5b\x65\xab\x1f\x81"
+ "\xed\x3b\xff\x34\xa3\xfb\xbc\x73"
+ "\x66\x7d\x13\x7f\xdf\x6e\xe2\xe2"
+ "\xeb\x4f\x6c\xda\x7d\x33\x57\xd0"
+ "\xd3\x7c\x95\x4f\x33\x58\x21\xc7"
+ "\xc0\xe5\x6f\x42\x26\xc6\x1f\x5e"
+ "\x85\x1b\x98\x9a\xa2\x1e\x55\x77"
+ "\x23\xdf\x81\x5e\x79\x55\x05\xfc"
+ "\xfb\xda\xee\xba\x5a\xba\xf7\x77"
+ "\x7f\x0e\xd3\xe1\x37\xfe\x8d\x2b"
+ "\xd5\x3f\xfb\xd0\xc0\x3c\x0b\x3f"
+ "\xcf\x3c\x14\xcf\xfb\x46\x72\x4c"
+ "\x1f\x39\xe2\xda\x03\x71\x6d\x23"
+ "\xef\x93\xcd\x39\xd9\x37\x80\x4d"
+ "\x65\x61\xd1\x2c\x03\xa9\x47\x72"
+ "\x4d\x1e\x0e\x16\x33\x0f\x21\x17"
+ "\xec\x92\xea\x6f\x37\x22\xa4\xd8"
+ "\x03\x33\x9e\xd8\x03\x69\x9a\xe8"
+ "\xb2\x57\xaf\x78\x99\x05\x12\xab"
+ "\x48\x90\x80\xf0\x12\x9b\x20\x64"
+ "\x7a\x1d\x47\x5f\xba\x3c\xf9\xc3"
+ "\x0a\x0d\x8d\xa1\xf9\x1b\x82\x13"
+ "\x3e\x0d\xec\x0a\x83\xc0\x65\xe1"
+ "\xe9\x95\xff\x97\xd6\xf2\xe4\xd5"
+ "\x86\xc0\x1f\x29\x27\x63\xd7\xde"
+ "\xb7\x0a\x07\x99\x04\x2d\xa3\x89"
+ "\xa2\x43\xcf\xf3\xe1\x43\xac\x4a"
+ "\x06\x97\xd0\x05\x4f\x87\xfa\xf9"
+ "\x9b\xbf\x52\x70\xbd\xbc\x6c\xf3"
+ "\x03\x13\x60\x41\x28\x09\xec\xcc"
+ "\xb1\x1a\xec\xd6\xfb\x6f\x2a\x89"
+ "\x5d\x0b\x53\x9c\x59\xc1\x84\x21"
+ "\x33\x51\x47\x19\x31\x9c\xd4\x0a"
+ "\x4d\x04\xec\x50\x90\x61\xbd\xbc"
+ "\x7e\xc8\xd9\x6c\x98\x1d\x45\x41"
+ "\x17\x5e\x97\x1c\xc5\xa8\xe8\xea"
+ "\x46\x58\x53\xf7\x17\xd5\xad\x11"
+ "\xc8\x54\xf5\x7a\x33\x90\xf5\x19"
+ "\xba\x36\xb4\xfc\x52\xa5\x72\x3d"
+ "\x14\xbb\x55\xa7\xe9\xe3\x12\xf7"
+ "\x1c\x30\xa2\x82\x03\xbf\x53\x91"
+ "\x2e\x60\x41\x9f\x5b\x69\x39\xf6"
+ "\x4d\xc8\xf8\x46\x7a\x7f\xa4\x98"
+ "\x36\xff\x06\xcb\xca\xe7\x33\xf2"
+ "\xc0\x4a\xf4\x3c\x14\x44\x5f\x6b"
+ "\x75\xef\x02\x36\x75\x08\x14\xfd"
+ "\x10\x8e\xa5\x58\xd0\x30\x46\x49"
+ "\xaf\x3a\xf8\x40\x3d\x35\xdb\x84"
+ "\x11\x2e\x97\x6a\xb7\x87\x7f\xad"
+ "\xf1\xfa\xa5\x63\x60\xd8\x5e\xbf"
+ "\x41\x78\x49\xcf\x77\xbb\x56\xbb"
+ "\x7d\x01\x67\x05\x22\xc8\x8f\x41"
+ "\xba\x81\xd2\xca\x2c\x38\xac\x76"
+ "\x06\xc1\x1a\xc2\xce\xac\x90\x67"
+ "\x57\x3e\x20\x12\x5b\xd9\x97\x58"
+ "\x65\x05\xb7\x04\x61\x7e\xd8\x3a"
+ "\xbf\x55\x3b\x13\xe9\x34\x5a\x37"
+ "\x36\xcb\x94\x45\xc5\x32\xb3\xa0"
+ "\x0c\x3e\x49\xc5\xd3\xed\xa7\xf0"
+ "\x1c\x69\xcc\xea\xcc\x83\xc9\x16"
+ "\x95\x72\x4b\xf4\x89\xd5\xb9\x10"
+ "\xf6\x2d\x60\x15\xea\x3c\x06\x66"
+ "\x9f\x82\xad\x17\xce\xd2\xa4\x48"
+ "\x7c\x65\xd9\xf8\x02\x4d\x9b\x4c"
+ "\x89\x06\x3a\x34\x85\x48\x89\x86"
+ "\xf9\x24\xa9\x54\x72\xdb\x44\x95"
+ "\xc7\x44\x1c\x19\x11\x4c\x04\xdc"
+ "\x13\xb9\x67\xc8\xc3\x3a\x6a\x50"
+ "\xfa\xd1\xfb\xe1\x88\xb6\xf1\xa3"
+ "\xc5\x3b\xdc\x38\x45\x16\x26\x02"
+ "\x3b\xb8\x8f\x8b\x58\x7d\x23\x04"
+ "\x50\x6b\x81\x9f\xae\x66\xac\x6f"
+ "\xcf\x2a\x9d\xf1\xfd\x1d\x57\x07"
+ "\xbe\x58\xeb\x77\x0c\xe3\xc2\x19"
+ "\x14\x74\x1b\x51\x1c\x4f\x41\xf3"
+ "\x32\x89\xb3\xe7\xde\x62\xf6\x5f"
+ "\xc7\x6a\x4a\x2a\x5b\x0f\x5f\x87"
+ "\x9c\x08\xb9\x02\x88\xc8\x29\xb7"
+ "\x94\x52\xfa\x52\xfe\xaa\x50\x10"
+ "\xba\x48\x75\x5e\x11\x1b\xe6\x39"
+ "\xd7\x82\x2c\x87\xf1\x1e\xa4\x38"
+ "\x72\x3e\x51\xe7\xd8\x3e\x5b\x7b"
+ "\x31\x16\x89\xba\xd6\xad\x18\x5e"
+ "\xba\xf8\x12\xb3\xf4\x6c\x47\x30"
+ "\xc0\x38\x58\xb3\x10\x8d\x58\x5d"
+ "\xb4\xfb\x19\x7e\x41\xc3\x66\xb8"
+ "\xd6\x72\x84\xe1\x1a\xc2\x71\x4c"
+ "\x0d\x4a\x21\x7a\xab\xa2\xc0\x36"
+ "\x15\xc5\xe9\x46\xd7\x29\x17\x76"
+ "\x5e\x47\x36\x7f\x72\x05\xa7\xcc"
+ "\x36\x63\xf9\x47\x7d\xe6\x07\x3c"
+ "\x8b\x79\x1d\x96\x61\x8d\x90\x65"
+ "\x7c\xf5\xeb\x4e\x6e\x09\x59\x6d"
+ "\x62\x50\x1b\x0f\xe0\xdc\x78\xf2"
+ "\x5b\x83\x1a\xa1\x11\x75\xfd\x18"
+ "\xd7\xe2\x8d\x65\x14\x21\xce\xbe"
+ "\xb5\x87\xe3\x0a\xda\x24\x0a\x64"
+ "\xa9\x9f\x03\x8d\x46\x5d\x24\x1a"
+ "\x8a\x0c\x42\x01\xca\xb1\x5f\x7c"
+ "\xa5\xac\x32\x4a\xb8\x07\x91\x18"
+ "\x6f\xb0\x71\x3c\xc9\xb1\xa8\xf8"
+ "\x5f\x69\xa5\xa1\xca\x9e\x7a\xaa"
+ "\xac\xe9\xc7\x47\x41\x75\x25\xc3"
+ "\x73\xe2\x0b\xdd\x6d\x52\x71\xbe"
+ "\xc5\xdc\xb4\xe7\x01\x26\x53\x77"
+ "\x86\x90\x85\x68\x6b\x7b\x03\x53"
+ "\xda\x52\x52\x51\x68\xc8\xf3\xec"
+ "\x6c\xd5\x03\x7a\xa3\x0e\xb4\x02"
+ "\x5f\x1a\xab\xee\xca\x67\x29\x7b"
+ "\xbd\x96\x59\xb3\x8b\x32\x7a\x92"
+ "\x9f\xd8\x25\x2b\xdf\xc0\x4c\xda",
+ .ksize = 1088,
+ .plaintext = "\xbc\xda\x81\xa8\x78\x79\x1c\xbf"
+ "\x77\x53\xba\x4c\x30\x5b\xb8\x33",
+ .psize = 16,
+ .digest = "\x04\xbf\x7f\x6a\xce\x72\xea\x6a"
+ "\x79\xdb\xb0\xc9\x60\xf6\x12\xcc",
+ .np = 6,
+ .tap = { 4, 4, 1, 1, 1, 5 },
+ }, {
+ .key = "\x65\x4d\xe3\xf8\xd2\x4c\xac\x28"
+ "\x68\xf5\xb3\x81\x71\x4b\xa1\xfa"
+ "\x04\x0e\xd3\x81\x36\xbe\x0c\x81"
+ "\x5e\xaf\xbc\x3a\xa4\xc0\x8e\x8b"
+ "\x55\x63\xd3\x52\x97\x88\xd6\x19"
+ "\xbc\x96\xdf\x49\xff\x04\x63\xf5"
+ "\x0c\x11\x13\xaa\x9e\x1f\x5a\xf7"
+ "\xdd\xbd\x37\x80\xc3\xd0\xbe\xa7"
+ "\x05\xc8\x3c\x98\x1e\x05\x3c\x84"
+ "\x39\x61\xc4\xed\xed\x71\x1b\xc4"
+ "\x74\x45\x2c\xa1\x56\x70\x97\xfd"
+ "\x44\x18\x07\x7d\xca\x60\x1f\x73"
+ "\x3b\x6d\x21\xcb\x61\x87\x70\x25"
+ "\x46\x21\xf1\x1f\x21\x91\x31\x2d"
+ "\x5d\xcc\xb7\xd1\x84\x3e\x3d\xdb"
+ "\x03\x53\x2a\x82\xa6\x9a\x95\xbc"
+ "\x1a\x1e\x0a\x5e\x07\x43\xab\x43"
+ "\xaf\x92\x82\x06\x91\x04\x09\xf4"
+ "\x17\x0a\x9a\x2c\x54\xdb\xb8\xf4"
+ "\xd0\xf0\x10\x66\x24\x8d\xcd\xda"
+ "\xfe\x0e\x45\x9d\x6f\xc4\x4e\xf4"
+ "\x96\xaf\x13\xdc\xa9\xd4\x8c\xc4"
+ "\xc8\x57\x39\x3c\xc2\xd3\x0a\x76"
+ "\x4a\x1f\x75\x83\x44\xc7\xd1\x39"
+ "\xd8\xb5\x41\xba\x73\x87\xfa\x96"
+ "\xc7\x18\x53\xfb\x9b\xda\xa0\x97"
+ "\x1d\xee\x60\x85\x9e\x14\xc3\xce"
+ "\xc4\x05\x29\x3b\x95\x30\xa3\xd1"
+ "\x9f\x82\x6a\x04\xf5\xa7\x75\x57"
+ "\x82\x04\xfe\x71\x51\x71\xb1\x49"
+ "\x50\xf8\xe0\x96\xf1\xfa\xa8\x88"
+ "\x3f\xa0\x86\x20\xd4\x60\x79\x59"
+ "\x17\x2d\xd1\x09\xf4\xec\x05\x57"
+ "\xcf\x62\x7e\x0e\x7e\x60\x78\xe6"
+ "\x08\x60\x29\xd8\xd5\x08\x1a\x24"
+ "\xc4\x6c\x24\xe7\x92\x08\x3d\x8a"
+ "\x98\x7a\xcf\x99\x0a\x65\x0e\xdc"
+ "\x8c\x8a\xbe\x92\x82\x91\xcc\x62"
+ "\x30\xb6\xf4\x3f\xc6\x8a\x7f\x12"
+ "\x4a\x8a\x49\xfa\x3f\x5c\xd4\x5a"
+ "\xa6\x82\xa3\xe6\xaa\x34\x76\xb2"
+ "\xab\x0a\x30\xef\x6c\x77\x58\x3f"
+ "\x05\x6b\xcc\x5c\xae\xdc\xd7\xb9"
+ "\x51\x7e\x8d\x32\x5b\x24\x25\xbe"
+ "\x2b\x24\x01\xcf\x80\xda\x16\xd8"
+ "\x90\x72\x2c\xad\x34\x8d\x0c\x74"
+ "\x02\xcb\xfd\xcf\x6e\xef\x97\xb5"
+ "\x4c\xf2\x68\xca\xde\x43\x9e\x8a"
+ "\xc5\x5f\x31\x7f\x14\x71\x38\xec"
+ "\xbd\x98\xe5\x71\xc4\xb5\xdb\xef"
+ "\x59\xd2\xca\xc0\xc1\x86\x75\x01"
+ "\xd4\x15\x0d\x6f\xa4\xf7\x7b\x37"
+ "\x47\xda\x18\x93\x63\xda\xbe\x9e"
+ "\x07\xfb\xb2\x83\xd5\xc4\x34\x55"
+ "\xee\x73\xa1\x42\x96\xf9\x66\x41"
+ "\xa4\xcc\xd2\x93\x6e\xe1\x0a\xbb"
+ "\xd2\xdd\x18\x23\xe6\x6b\x98\x0b"
+ "\x8a\x83\x59\x2c\xc3\xa6\x59\x5b"
+ "\x01\x22\x59\xf7\xdc\xb0\x87\x7e"
+ "\xdb\x7d\xf4\x71\x41\xab\xbd\xee"
+ "\x79\xbe\x3c\x01\x76\x0b\x2d\x0a"
+ "\x42\xc9\x77\x8c\xbb\x54\x95\x60"
+ "\x43\x2e\xe0\x17\x52\xbd\x90\xc9"
+ "\xc2\x2c\xdd\x90\x24\x22\x76\x40"
+ "\x5c\xb9\x41\xc9\xa1\xd5\xbd\xe3"
+ "\x44\xe0\xa4\xab\xcc\xb8\xe2\x32"
+ "\x02\x15\x04\x1f\x8c\xec\x5d\x14"
+ "\xac\x18\xaa\xef\x6e\x33\x19\x6e"
+ "\xde\xfe\x19\xdb\xeb\x61\xca\x18"
+ "\xad\xd8\x3d\xbf\x09\x11\xc7\xa5"
+ "\x86\x0b\x0f\xe5\x3e\xde\xe8\xd9"
+ "\x0a\x69\x9e\x4c\x20\xff\xf9\xc5"
+ "\xfa\xf8\xf3\x7f\xa5\x01\x4b\x5e"
+ "\x0f\xf0\x3b\x68\xf0\x46\x8c\x2a"
+ "\x7a\xc1\x8f\xa0\xfe\x6a\x5b\x44"
+ "\x70\x5c\xcc\x92\x2c\x6f\x0f\xbd"
+ "\x25\x3e\xb7\x8e\x73\x58\xda\xc9"
+ "\xa5\xaa\x9e\xf3\x9b\xfd\x37\x3e"
+ "\xe2\x88\xa4\x7b\xc8\x5c\xa8\x93"
+ "\x0e\xe7\x9a\x9c\x2e\x95\x18\x9f"
+ "\xc8\x45\x0c\x88\x9e\x53\x4f\x3a"
+ "\x76\xc1\x35\xfa\x17\xd8\xac\xa0"
+ "\x0c\x2d\x47\x2e\x4f\x69\x9b\xf7"
+ "\xd0\xb6\x96\x0c\x19\xb3\x08\x01"
+ "\x65\x7a\x1f\xc7\x31\x86\xdb\xc8"
+ "\xc1\x99\x8f\xf8\x08\x4a\x9d\x23"
+ "\x22\xa8\xcf\x27\x01\x01\x88\x93"
+ "\x9c\x86\x45\xbd\xe0\x51\xca\x52"
+ "\x84\xba\xfe\x03\xf7\xda\xc5\xce"
+ "\x3e\x77\x75\x86\xaf\x84\xc8\x05"
+ "\x44\x01\x0f\x02\xf3\x58\xb0\x06"
+ "\x5a\xd7\x12\x30\x8d\xdf\x1f\x1f"
+ "\x0a\xe6\xd2\xea\xf6\x3a\x7a\x99"
+ "\x63\xe8\xd2\xc1\x4a\x45\x8b\x40"
+ "\x4d\x0a\xa9\x76\x92\xb3\xda\x87"
+ "\x36\x33\xf0\x78\xc3\x2f\x5f\x02"
+ "\x1a\x6a\x2c\x32\xcd\x76\xbf\xbd"
+ "\x5a\x26\x20\x28\x8c\x8c\xbc\x52"
+ "\x3d\x0a\xc9\xcb\xab\xa4\x21\xb0"
+ "\x54\x40\x81\x44\xc7\xd6\x1c\x11"
+ "\x44\xc6\x02\x92\x14\x5a\xbf\x1a"
+ "\x09\x8a\x18\xad\xcd\x64\x3d\x53"
+ "\x4a\xb6\xa5\x1b\x57\x0e\xef\xe0"
+ "\x8c\x44\x5f\x7d\xbd\x6c\xfd\x60"
+ "\xae\x02\x24\xb6\x99\xdd\x8c\xaf"
+ "\x59\x39\x75\x3c\xd1\x54\x7b\x86"
+ "\xcc\x99\xd9\x28\x0c\xb0\x94\x62"
+ "\xf9\x51\xd1\x19\x96\x2d\x66\xf5"
+ "\x55\xcf\x9e\x59\xe2\x6b\x2c\x08"
+ "\xc0\x54\x48\x24\x45\xc3\x8c\x73"
+ "\xea\x27\x6e\x66\x7d\x1d\x0e\x6e"
+ "\x13\xe8\x56\x65\x3a\xb0\x81\x5c"
+ "\xf0\xe8\xd8\x00\x6b\xcd\x8f\xad"
+ "\xdd\x53\xf3\xa4\x6c\x43\xd6\x31"
+ "\xaf\xd2\x76\x1e\x91\x12\xdb\x3c"
+ "\x8c\xc2\x81\xf0\x49\xdb\xe2\x6b"
+ "\x76\x62\x0a\x04\xe4\xaa\x8a\x7c"
+ "\x08\x0b\x5d\xd0\xee\x1d\xfb\xc4"
+ "\x02\x75\x42\xd6\xba\xa7\x22\xa8"
+ "\x47\x29\xb7\x85\x6d\x93\x3a\xdb"
+ "\x00\x53\x0b\xa2\xeb\xf8\xfe\x01"
+ "\x6f\x8a\x31\xd6\x17\x05\x6f\x67"
+ "\x88\x95\x32\xfe\x4f\xa6\x4b\xf8"
+ "\x03\xe4\xcd\x9a\x18\xe8\x4e\x2d"
+ "\xf7\x97\x9a\x0c\x7d\x9f\x7e\x44"
+ "\x69\x51\xe0\x32\x6b\x62\x86\x8f"
+ "\xa6\x8e\x0b\x21\x96\xe5\xaf\x77"
+ "\xc0\x83\xdf\xa5\x0e\xd0\xa1\x04"
+ "\xaf\xc1\x10\xcb\x5a\x40\xe4\xe3"
+ "\x38\x7e\x07\xe8\x4d\xfa\xed\xc5"
+ "\xf0\x37\xdf\xbb\x8a\xcf\x3d\xdc"
+ "\x61\xd2\xc6\x2b\xff\x07\xc9\x2f"
+ "\x0c\x2d\x5c\x07\xa8\x35\x6a\xfc"
+ "\xae\x09\x03\x45\x74\x51\x4d\xc4"
+ "\xb8\x23\x87\x4a\x99\x27\x20\x87"
+ "\x62\x44\x0a\x4a\xce\x78\x47\x22",
+ .ksize = 1088,
+ .plaintext = "\x8e\xb0\x4c\xde\x9c\x4a\x04\x5a"
+ "\xf6\xa9\x7f\x45\x25\xa5\x7b\x3a"
+ "\xbc\x4d\x73\x39\x81\xb5\xbd\x3d"
+ "\x21\x6f\xd7\x37\x50\x3c\x7b\x28"
+ "\xd1\x03\x3a\x17\xed\x7b\x7c\x2a"
+ "\x16\xbc\xdf\x19\x89\x52\x71\x31"
+ "\xb6\xc0\xfd\xb5\xd3\xba\x96\x99"
+ "\xb6\x34\x0b\xd0\x99\x93\xfc\x1a"
+ "\x01\x3c\x85\xc6\x9b\x78\x5c\x8b"
+ "\xfe\xae\xd2\xbf\xb2\x6f\xf9\xed"
+ "\xc8\x25\x17\xfe\x10\x3b\x7d\xda"
+ "\xf4\x8d\x35\x4b\x7c\x7b\x82\xe7"
+ "\xc2\xb3\xee\x60\x4a\x03\x86\xc9"
+ "\x4e\xb5\xc4\xbe\xd2\xbd\x66\xf1"
+ "\x13\xf1\x09\xab\x5d\xca\x63\x1f"
+ "\xfc\xfb\x57\x2a\xfc\xca\x66\xd8"
+ "\x77\x84\x38\x23\x1d\xac\xd3\xb3"
+ "\x7a\xad\x4c\x70\xfa\x9c\xc9\x61"
+ "\xa6\x1b\xba\x33\x4b\x4e\x33\xec"
+ "\xa0\xa1\x64\x39\x40\x05\x1c\xc2"
+ "\x3f\x49\x9d\xae\xf2\xc5\xf2\xc5"
+ "\xfe\xe8\xf4\xc2\xf9\x96\x2d\x28"
+ "\x92\x30\x44\xbc\xd2\x7f\xe1\x6e"
+ "\x62\x02\x8f\x3d\x1c\x80\xda\x0e"
+ "\x6a\x90\x7e\x75\xff\xec\x3e\xc4"
+ "\xcd\x16\x34\x3b\x05\x6d\x4d\x20"
+ "\x1c\x7b\xf5\x57\x4f\xfa\x3d\xac"
+ "\xd0\x13\x55\xe8\xb3\xe1\x1b\x78"
+ "\x30\xe6\x9f\x84\xd4\x69\xd1\x08"
+ "\x12\x77\xa7\x4a\xbd\xc0\xf2\xd2"
+ "\x78\xdd\xa3\x81\x12\xcb\x6c\x14"
+ "\x90\x61\xe2\x84\xc6\x2b\x16\xcc"
+ "\x40\x99\x50\x88\x01\x09\x64\x4f"
+ "\x0a\x80\xbe\x61\xae\x46\xc9\x0a"
+ "\x5d\xe0\xfb\x72\x7a\x1a\xdd\x61"
+ "\x63\x20\x05\xa0\x4a\xf0\x60\x69"
+ "\x7f\x92\xbc\xbf\x4e\x39\x4d\xdd"
+ "\x74\xd1\xb7\xc0\x5a\x34\xb7\xae"
+ "\x76\x65\x2e\xbc\x36\xb9\x04\x95"
+ "\x42\xe9\x6f\xca\x78\xb3\x72\x07"
+ "\xa3\xba\x02\x94\x67\x4c\xb1\xd7"
+ "\xe9\x30\x0d\xf0\x3b\xb8\x10\x6d"
+ "\xea\x2b\x21\xbf\x74\x59\x82\x97"
+ "\x85\xaa\xf1\xd7\x54\x39\xeb\x05"
+ "\xbd\xf3\x40\xa0\x97\xe6\x74\xfe"
+ "\xb4\x82\x5b\xb1\x36\xcb\xe8\x0d"
+ "\xce\x14\xd9\xdf\xf1\x94\x22\xcd"
+ "\xd6\x00\xba\x04\x4c\x05\x0c\xc0"
+ "\xd1\x5a\xeb\x52\xd5\xa8\x8e\xc8"
+ "\x97\xa1\xaa\xc1\xea\xc1\xbe\x7c"
+ "\x36\xb3\x36\xa0\xc6\x76\x66\xc5"
+ "\xe2\xaf\xd6\x5c\xe2\xdb\x2c\xb3"
+ "\x6c\xb9\x99\x7f\xff\x9f\x03\x24"
+ "\xe1\x51\x44\x66\xd8\x0c\x5d\x7f"
+ "\x5c\x85\x22\x2a\xcf\x6d\x79\x28"
+ "\xab\x98\x01\x72\xfe\x80\x87\x5f"
+ "\x46\xba\xef\x81\x24\xee\xbf\xb0"
+ "\x24\x74\xa3\x65\x97\x12\xc4\xaf"
+ "\x8b\xa0\x39\xda\x8a\x7e\x74\x6e"
+ "\x1b\x42\xb4\x44\x37\xfc\x59\xfd"
+ "\x86\xed\xfb\x8c\x66\x33\xda\x63"
+ "\x75\xeb\xe1\xa4\x85\x4f\x50\x8f"
+ "\x83\x66\x0d\xd3\x37\xfa\xe6\x9c"
+ "\x4f\x30\x87\x35\x18\xe3\x0b\xb7"
+ "\x6e\x64\x54\xcd\x70\xb3\xde\x54"
+ "\xb7\x1d\xe6\x4c\x4d\x55\x12\x12"
+ "\xaf\x5f\x7f\x5e\xee\x9d\xe8\x8e"
+ "\x32\x9d\x4e\x75\xeb\xc6\xdd\xaa"
+ "\x48\x82\xa4\x3f\x3c\xd7\xd3\xa8"
+ "\x63\x9e\x64\xfe\xe3\x97\x00\x62"
+ "\xe5\x40\x5d\xc3\xad\x72\xe1\x28"
+ "\x18\x50\xb7\x75\xef\xcd\x23\xbf"
+ "\x3f\xc0\x51\x36\xf8\x41\xc3\x08"
+ "\xcb\xf1\x8d\x38\x34\xbd\x48\x45"
+ "\x75\xed\xbc\x65\x7b\xb5\x0c\x9b"
+ "\xd7\x67\x7d\x27\xb4\xc4\x80\xd7"
+ "\xa9\xb9\xc7\x4a\x97\xaa\xda\xc8"
+ "\x3c\x74\xcf\x36\x8f\xe4\x41\xe3"
+ "\xd4\xd3\x26\xa7\xf3\x23\x9d\x8f"
+ "\x6c\x20\x05\x32\x3e\xe0\xc3\xc8"
+ "\x56\x3f\xa7\x09\xb7\xfb\xc7\xf7"
+ "\xbe\x2a\xdd\x0f\x06\x7b\x0d\xdd"
+ "\xb0\xb4\x86\x17\xfd\xb9\x04\xe5"
+ "\xc0\x64\x5d\xad\x2a\x36\x38\xdb"
+ "\x24\xaf\x5b\xff\xca\xf9\x41\xe8"
+ "\xf9\x2f\x1e\x5e\xf9\xf5\xd5\xf2"
+ "\xb2\x88\xca\xc9\xa1\x31\xe2\xe8"
+ "\x10\x95\x65\xbf\xf1\x11\x61\x7a"
+ "\x30\x1a\x54\x90\xea\xd2\x30\xf6"
+ "\xa5\xad\x60\xf9\x4d\x84\x21\x1b"
+ "\xe4\x42\x22\xc8\x12\x4b\xb0\x58"
+ "\x3e\x9c\x2d\x32\x95\x0a\x8e\xb0"
+ "\x0a\x7e\x77\x2f\xe8\x97\x31\x6a"
+ "\xf5\x59\xb4\x26\xe6\x37\x12\xc9"
+ "\xcb\xa0\x58\x33\x6f\xd5\x55\x55"
+ "\x3c\xa1\x33\xb1\x0b\x7e\x2e\xb4"
+ "\x43\x2a\x84\x39\xf0\x9c\xf4\x69"
+ "\x4f\x1e\x79\xa6\x15\x1b\x87\xbb"
+ "\xdb\x9b\xe0\xf1\x0b\xba\xe3\x6e"
+ "\xcc\x2f\x49\x19\x22\x29\xfc\x71"
+ "\xbb\x77\x38\x18\x61\xaf\x85\x76"
+ "\xeb\xd1\x09\xcc\x86\x04\x20\x9a"
+ "\x66\x53\x2f\x44\x8b\xc6\xa3\xd2"
+ "\x5f\xc7\x79\x82\x66\xa8\x6e\x75"
+ "\x7d\x94\xd1\x86\x75\x0f\xa5\x4f"
+ "\x3c\x7a\x33\xce\xd1\x6e\x9d\x7b"
+ "\x1f\x91\x37\xb8\x37\x80\xfb\xe0"
+ "\x52\x26\xd0\x9a\xd4\x48\x02\x41"
+ "\x05\xe3\x5a\x94\xf1\x65\x61\x19"
+ "\xb8\x88\x4e\x2b\xea\xba\x8b\x58"
+ "\x8b\x42\x01\x00\xa8\xfe\x00\x5c"
+ "\xfe\x1c\xee\x31\x15\x69\xfa\xb3"
+ "\x9b\x5f\x22\x8e\x0d\x2c\xe3\xa5"
+ "\x21\xb9\x99\x8a\x8e\x94\x5a\xef"
+ "\x13\x3e\x99\x96\x79\x6e\xd5\x42"
+ "\x36\x03\xa9\xe2\xca\x65\x4e\x8a"
+ "\x8a\x30\xd2\x7d\x74\xe7\xf0\xaa"
+ "\x23\x26\xdd\xcb\x82\x39\xfc\x9d"
+ "\x51\x76\x21\x80\xa2\xbe\x93\x03"
+ "\x47\xb0\xc1\xb6\xdc\x63\xfd\x9f"
+ "\xca\x9d\xa5\xca\x27\x85\xe2\xd8"
+ "\x15\x5b\x7e\x14\x7a\xc4\x89\xcc"
+ "\x74\x14\x4b\x46\xd2\xce\xac\x39"
+ "\x6b\x6a\x5a\xa4\x0e\xe3\x7b\x15"
+ "\x94\x4b\x0f\x74\xcb\x0c\x7f\xa9"
+ "\xbe\x09\x39\xa3\xdd\x56\x5c\xc7"
+ "\x99\x56\x65\x39\xf4\x0b\x7d\x87"
+ "\xec\xaa\xe3\x4d\x22\x65\x39\x4e",
+ .psize = 1024,
+ .digest = "\x64\x3a\xbc\xc3\x3f\x74\x40\x51"
+ "\x6e\x56\x01\x1a\x51\xec\x36\xde",
+ .np = 8,
+ .tap = { 64, 203, 267, 28, 263, 62, 54, 83 },
+ }, {
+ .key = "\x1b\x82\x2e\x1b\x17\x23\xb9\x6d"
+ "\xdc\x9c\xda\x99\x07\xe3\x5f\xd8"
+ "\xd2\xf8\x43\x80\x8d\x86\x7d\x80"
+ "\x1a\xd0\xcc\x13\xb9\x11\x05\x3f"
+ "\x7e\xcf\x7e\x80\x0e\xd8\x25\x48"
+ "\x8b\xaa\x63\x83\x92\xd0\x72\xf5"
+ "\x4f\x67\x7e\x50\x18\x25\xa4\xd1"
+ "\xe0\x7e\x1e\xba\xd8\xa7\x6e\xdb"
+ "\x1a\xcc\x0d\xfe\x9f\x6d\x22\x35"
+ "\xe1\xe6\xe0\xa8\x7b\x9c\xb1\x66"
+ "\xa3\xf8\xff\x4d\x90\x84\x28\xbc"
+ "\xdc\x19\xc7\x91\x49\xfc\xf6\x33"
+ "\xc9\x6e\x65\x7f\x28\x6f\x68\x2e"
+ "\xdf\x1a\x75\xe9\xc2\x0c\x96\xb9"
+ "\x31\x22\xc4\x07\xc6\x0a\x2f\xfd"
+ "\x36\x06\x5f\x5c\xc5\xb1\x3a\xf4"
+ "\x5e\x48\xa4\x45\x2b\x88\xa7\xee"
+ "\xa9\x8b\x52\xcc\x99\xd9\x2f\xb8"
+ "\xa4\x58\x0a\x13\xeb\x71\x5a\xfa"
+ "\xe5\x5e\xbe\xf2\x64\xad\x75\xbc"
+ "\x0b\x5b\x34\x13\x3b\x23\x13\x9a"
+ "\x69\x30\x1e\x9a\xb8\x03\xb8\x8b"
+ "\x3e\x46\x18\x6d\x38\xd9\xb3\xd8"
+ "\xbf\xf1\xd0\x28\xe6\x51\x57\x80"
+ "\x5e\x99\xfb\xd0\xce\x1e\x83\xf7"
+ "\xe9\x07\x5a\x63\xa9\xef\xce\xa5"
+ "\xfb\x3f\x37\x17\xfc\x0b\x37\x0e"
+ "\xbb\x4b\x21\x62\xb7\x83\x0e\xa9"
+ "\x9e\xb0\xc4\xad\x47\xbe\x35\xe7"
+ "\x51\xb2\xf2\xac\x2b\x65\x7b\x48"
+ "\xe3\x3f\x5f\xb6\x09\x04\x0c\x58"
+ "\xce\x99\xa9\x15\x2f\x4e\xc1\xf2"
+ "\x24\x48\xc0\xd8\x6c\xd3\x76\x17"
+ "\x83\x5d\xe6\xe3\xfd\x01\x8e\xf7"
+ "\x42\xa5\x04\x29\x30\xdf\xf9\x00"
+ "\x4a\xdc\x71\x22\x1a\x33\x15\xb6"
+ "\xd7\x72\xfb\x9a\xb8\xeb\x2b\x38"
+ "\xea\xa8\x61\xa8\x90\x11\x9d\x73"
+ "\x2e\x6c\xce\x81\x54\x5a\x9f\xcd"
+ "\xcf\xd5\xbd\x26\x5d\x66\xdb\xfb"
+ "\xdc\x1e\x7c\x10\xfe\x58\x82\x10"
+ "\x16\x24\x01\xce\x67\x55\x51\xd1"
+ "\xdd\x6b\x44\xa3\x20\x8e\xa9\xa6"
+ "\x06\xa8\x29\x77\x6e\x00\x38\x5b"
+ "\xde\x4d\x58\xd8\x1f\x34\xdf\xf9"
+ "\x2c\xac\x3e\xad\xfb\x92\x0d\x72"
+ "\x39\xa4\xac\x44\x10\xc0\x43\xc4"
+ "\xa4\x77\x3b\xfc\xc4\x0d\x37\xd3"
+ "\x05\x84\xda\x53\x71\xf8\x80\xd3"
+ "\x34\x44\xdb\x09\xb4\x2b\x8e\xe3"
+ "\x00\x75\x50\x9e\x43\x22\x00\x0b"
+ "\x7c\x70\xab\xd4\x41\xf1\x93\xcd"
+ "\x25\x2d\x84\x74\xb5\xf2\x92\xcd"
+ "\x0a\x28\xea\x9a\x49\x02\x96\xcb"
+ "\x85\x9e\x2f\x33\x03\x86\x1d\xdc"
+ "\x1d\x31\xd5\xfc\x9d\xaa\xc5\xe9"
+ "\x9a\xc4\x57\xf5\x35\xed\xf4\x4b"
+ "\x3d\x34\xc2\x29\x13\x86\x36\x42"
+ "\x5d\xbf\x90\x86\x13\x77\xe5\xc3"
+ "\x62\xb4\xfe\x0b\x70\x39\x35\x65"
+ "\x02\xea\xf6\xce\x57\x0c\xbb\x74"
+ "\x29\xe3\xfd\x60\x90\xfd\x10\x38"
+ "\xd5\x4e\x86\xbd\x37\x70\xf0\x97"
+ "\xa6\xab\x3b\x83\x64\x52\xca\x66"
+ "\x2f\xf9\xa4\xca\x3a\x55\x6b\xb0"
+ "\xe8\x3a\x34\xdb\x9e\x48\x50\x2f"
+ "\x3b\xef\xfd\x08\x2d\x5f\xc1\x37"
+ "\x5d\xbe\x73\xe4\xd8\xe9\xac\xca"
+ "\x8a\xaa\x48\x7c\x5c\xf4\xa6\x96"
+ "\x5f\xfa\x70\xa6\xb7\x8b\x50\xcb"
+ "\xa6\xf5\xa9\xbd\x7b\x75\x4c\x22"
+ "\x0b\x19\x40\x2e\xc9\x39\x39\x32"
+ "\x83\x03\xa8\xa4\x98\xe6\x8e\x16"
+ "\xb9\xde\x08\xc5\xfc\xbf\xad\x39"
+ "\xa8\xc7\x93\x6c\x6f\x23\xaf\xc1"
+ "\xab\xe1\xdf\xbb\x39\xae\x93\x29"
+ "\x0e\x7d\x80\x8d\x3e\x65\xf3\xfd"
+ "\x96\x06\x65\x90\xa1\x28\x64\x4b"
+ "\x69\xf9\xa8\x84\x27\x50\xfc\x87"
+ "\xf7\xbf\x55\x8e\x56\x13\x58\x7b"
+ "\x85\xb4\x6a\x72\x0f\x40\xf1\x4f"
+ "\x83\x81\x1f\x76\xde\x15\x64\x7a"
+ "\x7a\x80\xe4\xc7\x5e\x63\x01\x91"
+ "\xd7\x6b\xea\x0b\x9b\xa2\x99\x3b"
+ "\x6c\x88\xd8\xfd\x59\x3c\x8d\x22"
+ "\x86\x56\xbe\xab\xa1\x37\x08\x01"
+ "\x50\x85\x69\x29\xee\x9f\xdf\x21"
+ "\x3e\x20\x20\xf5\xb0\xbb\x6b\xd0"
+ "\x9c\x41\x38\xec\x54\x6f\x2d\xbd"
+ "\x0f\xe1\xbd\xf1\x2b\x6e\x60\x56"
+ "\x29\xe5\x7a\x70\x1c\xe2\xfc\x97"
+ "\x82\x68\x67\xd9\x3d\x1f\xfb\xd8"
+ "\x07\x9f\xbf\x96\x74\xba\x6a\x0e"
+ "\x10\x48\x20\xd8\x13\x1e\xb5\x44"
+ "\xf2\xcc\xb1\x8b\xfb\xbb\xec\xd7"
+ "\x37\x70\x1f\x7c\x55\xd2\x4b\xb9"
+ "\xfd\x70\x5e\xa3\x91\x73\x63\x52"
+ "\x13\x47\x5a\x06\xfb\x01\x67\xa5"
+ "\xc0\xd0\x49\x19\x56\x66\x9a\x77"
+ "\x64\xaf\x8c\x25\x91\x52\x87\x0e"
+ "\x18\xf3\x5f\x97\xfd\x71\x13\xf8"
+ "\x05\xa5\x39\xcc\x65\xd3\xcc\x63"
+ "\x5b\xdb\x5f\x7e\x5f\x6e\xad\xc4"
+ "\xf4\xa0\xc5\xc2\x2b\x4d\x97\x38"
+ "\x4f\xbc\xfa\x33\x17\xb4\x47\xb9"
+ "\x43\x24\x15\x8d\xd2\xed\x80\x68"
+ "\x84\xdb\x04\x80\xca\x5e\x6a\x35"
+ "\x2c\x2c\xe7\xc5\x03\x5f\x54\xb0"
+ "\x5e\x4f\x1d\x40\x54\x3d\x78\x9a"
+ "\xac\xda\x80\x27\x4d\x15\x4c\x1a"
+ "\x6e\x80\xc9\xc4\x3b\x84\x0e\xd9"
+ "\x2e\x93\x01\x8c\xc3\xc8\x91\x4b"
+ "\xb3\xaa\x07\x04\x68\x5b\x93\xa5"
+ "\xe7\xc4\x9d\xe7\x07\xee\xf5\x3b"
+ "\x40\x89\xcc\x60\x34\x9d\xb4\x06"
+ "\x1b\xef\x92\xe6\xc1\x2a\x7d\x0f"
+ "\x81\xaa\x56\xe3\xd7\xed\xa7\xd4"
+ "\xa7\x3a\x49\xc4\xad\x81\x5c\x83"
+ "\x55\x8e\x91\x54\xb7\x7d\x65\xa5"
+ "\x06\x16\xd5\x9a\x16\xc1\xb0\xa2"
+ "\x06\xd8\x98\x47\x73\x7e\x73\xa0"
+ "\xb8\x23\xb1\x52\xbf\x68\x74\x5d"
+ "\x0b\xcb\xfa\x8c\x46\xe3\x24\xe6"
+ "\xab\xd4\x69\x8d\x8c\xf2\x8a\x59"
+ "\xbe\x48\x46\x50\x8c\x9a\xe8\xe3"
+ "\x31\x55\x0a\x06\xed\x4f\xf8\xb7"
+ "\x4f\xe3\x85\x17\x30\xbd\xd5\x20"
+ "\xe7\x5b\xb2\x32\xcf\x6b\x16\x44"
+ "\xd2\xf5\x7e\xd7\xd1\x2f\xee\x64"
+ "\x3e\x9d\x10\xef\x27\x35\x43\x64"
+ "\x67\xfb\x7a\x7b\xe0\x62\x31\x9a"
+ "\x4d\xdf\xa5\xab\xc0\x20\xbb\x01"
+ "\xe9\x7b\x54\xf1\xde\xb2\x79\x50"
+ "\x6c\x4b\x91\xdb\x7f\xbb\x50\xc1"
+ "\x55\x44\x38\x9a\xe0\x9f\xe8\x29"
+ "\x6f\x15\xf8\x4e\xa6\xec\xa0\x60",
+ .ksize = 1088,
+ .plaintext = "\x15\x68\x9e\x2f\xad\x15\x52\xdf"
+ "\xf0\x42\x62\x24\x2a\x2d\xea\xbf"
+ "\xc7\xf3\xb4\x1a\xf5\xed\xb2\x08"
+ "\x15\x60\x1c\x00\x77\xbf\x0b\x0e"
+ "\xb7\x2c\xcf\x32\x3a\xc7\x01\x77"
+ "\xef\xa6\x75\xd0\x29\xc7\x68\x20"
+ "\xb2\x92\x25\xbf\x12\x34\xe9\xa4"
+ "\xfd\x32\x7b\x3f\x7c\xbd\xa5\x02"
+ "\x38\x41\xde\xc9\xc1\x09\xd9\xfc"
+ "\x6e\x78\x22\x83\x18\xf7\x50\x8d"
+ "\x8f\x9c\x2d\x02\xa5\x30\xac\xff"
+ "\xea\x63\x2e\x80\x37\x83\xb0\x58"
+ "\xda\x2f\xef\x21\x55\xba\x7b\xb1"
+ "\xb6\xed\xf5\xd2\x4d\xaa\x8c\xa9"
+ "\xdd\xdb\x0f\xb4\xce\xc1\x9a\xb1"
+ "\xc1\xdc\xbd\xab\x86\xc2\xdf\x0b"
+ "\xe1\x2c\xf9\xbe\xf6\xd8\xda\x62"
+ "\x72\xdd\x98\x09\x52\xc0\xc4\xb6"
+ "\x7b\x17\x5c\xf5\xd8\x4b\x88\xd6"
+ "\x6b\xbf\x84\x4a\x3f\xf5\x4d\xd2"
+ "\x94\xe2\x9c\xff\xc7\x3c\xd9\xc8"
+ "\x37\x38\xbc\x8c\xf3\xe7\xb7\xd0"
+ "\x1d\x78\xc4\x39\x07\xc8\x5e\x79"
+ "\xb6\x5a\x90\x5b\x6e\x97\xc9\xd4"
+ "\x82\x9c\xf3\x83\x7a\xe7\x97\xfc"
+ "\x1d\xbb\xef\xdb\xce\xe0\x82\xad"
+ "\xca\x07\x6c\x54\x62\x6f\x81\xe6"
+ "\x7a\x5a\x96\x6e\x80\x3a\xa2\x37"
+ "\x6f\xc6\xa4\x29\xc3\x9e\x19\x94"
+ "\x9f\xb0\x3e\x38\xfb\x3c\x2b\x7d"
+ "\xaa\xb8\x74\xda\x54\x23\x51\x12"
+ "\x4b\x96\x36\x8f\x91\x4f\x19\x37"
+ "\x83\xc9\xdd\xc7\x1a\x32\x2d\xab"
+ "\xc7\x89\xe2\x07\x47\x6c\xe8\xa6"
+ "\x70\x6b\x8e\x0c\xda\x5c\x6a\x59"
+ "\x27\x33\x0e\xe1\xe1\x20\xe8\xc8"
+ "\xae\xdc\xd0\xe3\x6d\xa8\xa6\x06"
+ "\x41\xb4\xd4\xd4\xcf\x91\x3e\x06"
+ "\xb0\x9a\xf7\xf1\xaa\xa6\x23\x92"
+ "\x10\x86\xf0\x94\xd1\x7c\x2e\x07"
+ "\x30\xfb\xc5\xd8\xf3\x12\xa9\xe8"
+ "\x22\x1c\x97\x1a\xad\x96\xb0\xa1"
+ "\x72\x6a\x6b\xb4\xfd\xf7\xe8\xfa"
+ "\xe2\x74\xd8\x65\x8d\x35\x17\x4b"
+ "\x00\x23\x5c\x8c\x70\xad\x71\xa2"
+ "\xca\xc5\x6c\x59\xbf\xb4\xc0\x6d"
+ "\x86\x98\x3e\x19\x5a\x90\x92\xb1"
+ "\x66\x57\x6a\x91\x68\x7c\xbc\xf3"
+ "\xf1\xdb\x94\xf8\x48\xf1\x36\xd8"
+ "\x78\xac\x1c\xa9\xcc\xd6\x27\xba"
+ "\x91\x54\x22\xf5\xe6\x05\x3f\xcc"
+ "\xc2\x8f\x2c\x3b\x2b\xc3\x2b\x2b"
+ "\x3b\xb8\xb6\x29\xb7\x2f\x94\xb6"
+ "\x7b\xfc\x94\x3e\xd0\x7a\x41\x59"
+ "\x7b\x1f\x9a\x09\xa6\xed\x4a\x82"
+ "\x9d\x34\x1c\xbd\x4e\x1c\x3a\x66"
+ "\x80\x74\x0e\x9a\x4f\x55\x54\x47"
+ "\x16\xba\x2a\x0a\x03\x35\x99\xa3"
+ "\x5c\x63\x8d\xa2\x72\x8b\x17\x15"
+ "\x68\x39\x73\xeb\xec\xf2\xe8\xf5"
+ "\x95\x32\x27\xd6\xc4\xfe\xb0\x51"
+ "\xd5\x0c\x50\xc5\xcd\x6d\x16\xb3"
+ "\xa3\x1e\x95\x69\xad\x78\x95\x06"
+ "\xb9\x46\xf2\x6d\x24\x5a\x99\x76"
+ "\x73\x6a\x91\xa6\xac\x12\xe1\x28"
+ "\x79\xbc\x08\x4e\x97\x00\x98\x63"
+ "\x07\x1c\x4e\xd1\x68\xf3\xb3\x81"
+ "\xa8\xa6\x5f\xf1\x01\xc9\xc1\xaf"
+ "\x3a\x96\xf9\x9d\xb5\x5a\x5f\x8f"
+ "\x7e\xc1\x7e\x77\x0a\x40\xc8\x8e"
+ "\xfc\x0e\xed\xe1\x0d\xb0\xe5\x5e"
+ "\x5e\x6f\xf5\x7f\xab\x33\x7d\xcd"
+ "\xf0\x09\x4b\xb2\x11\x37\xdc\x65"
+ "\x97\x32\x62\x71\x3a\x29\x54\xb9"
+ "\xc7\xa4\xbf\x75\x0f\xf9\x40\xa9"
+ "\x8d\xd7\x8b\xa7\xe0\x9a\xbe\x15"
+ "\xc6\xda\xd8\x00\x14\x69\x1a\xaf"
+ "\x5f\x79\xc3\xf5\xbb\x6c\x2a\x9d"
+ "\xdd\x3c\x5f\x97\x21\xe1\x3a\x03"
+ "\x84\x6a\xe9\x76\x11\x1f\xd3\xd5"
+ "\xf0\x54\x20\x4d\xc2\x91\xc3\xa4"
+ "\x36\x25\xbe\x1b\x2a\x06\xb7\xf3"
+ "\xd1\xd0\x55\x29\x81\x4c\x83\xa3"
+ "\xa6\x84\x1e\x5c\xd1\xd0\x6c\x90"
+ "\xa4\x11\xf0\xd7\x63\x6a\x48\x05"
+ "\xbc\x48\x18\x53\xcd\xb0\x8d\xdb"
+ "\xdc\xfe\x55\x11\x5c\x51\xb3\xab"
+ "\xab\x63\x3e\x31\x5a\x8b\x93\x63"
+ "\x34\xa9\xba\x2b\x69\x1a\xc0\xe3"
+ "\xcb\x41\xbc\xd7\xf5\x7f\x82\x3e"
+ "\x01\xa3\x3c\x72\xf4\xfe\xdf\xbe"
+ "\xb1\x67\x17\x2b\x37\x60\x0d\xca"
+ "\x6f\xc3\x94\x2c\xd2\x92\x6d\x9d"
+ "\x75\x18\x77\xaa\x29\x38\x96\xed"
+ "\x0e\x20\x70\x92\xd5\xd0\xb4\x00"
+ "\xc0\x31\xf2\xc9\x43\x0e\x75\x1d"
+ "\x4b\x64\xf2\x1f\xf2\x29\x6c\x7b"
+ "\x7f\xec\x59\x7d\x8c\x0d\xd4\xd3"
+ "\xac\x53\x4c\xa3\xde\x42\x92\x95"
+ "\x6d\xa3\x4f\xd0\xe6\x3d\xe7\xec"
+ "\x7a\x4d\x68\xf1\xfe\x67\x66\x09"
+ "\x83\x22\xb1\x98\x43\x8c\xab\xb8"
+ "\x45\xe6\x6d\xdf\x5e\x50\x71\xce"
+ "\xf5\x4e\x40\x93\x2b\xfa\x86\x0e"
+ "\xe8\x30\xbd\x82\xcc\x1c\x9c\x5f"
+ "\xad\xfd\x08\x31\xbe\x52\xe7\xe6"
+ "\xf2\x06\x01\x62\x25\x15\x99\x74"
+ "\x33\x51\x52\x57\x3f\x57\x87\x61"
+ "\xb9\x7f\x29\x3d\xcd\x92\x5e\xa6"
+ "\x5c\x3b\xf1\xed\x5f\xeb\x82\xed"
+ "\x56\x7b\x61\xe7\xfd\x02\x47\x0e"
+ "\x2a\x15\xa4\xce\x43\x86\x9b\xe1"
+ "\x2b\x4c\x2a\xd9\x42\x97\xf7\x9a"
+ "\xe5\x47\x46\x48\xd3\x55\x6f\x4d"
+ "\xd9\xeb\x4b\xdd\x7b\x21\x2f\xb3"
+ "\xa8\x36\x28\xdf\xca\xf1\xf6\xd9"
+ "\x10\xf6\x1c\xfd\x2e\x0c\x27\xe0"
+ "\x01\xb3\xff\x6d\x47\x08\x4d\xd4"
+ "\x00\x25\xee\x55\x4a\xe9\xe8\x5b"
+ "\xd8\xf7\x56\x12\xd4\x50\xb2\xe5"
+ "\x51\x6f\x34\x63\x69\xd2\x4e\x96"
+ "\x4e\xbc\x79\xbf\x18\xae\xc6\x13"
+ "\x80\x92\x77\xb0\xb4\x0f\x29\x94"
+ "\x6f\x4c\xbb\x53\x11\x36\xc3\x9f"
+ "\x42\x8e\x96\x8a\x91\xc8\xe9\xfc"
+ "\xfe\xbf\x7c\x2d\x6f\xf9\xb8\x44"
+ "\x89\x1b\x09\x53\x0a\x2a\x92\xc3"
+ "\x54\x7a\x3a\xf9\xe2\xe4\x75\x87"
+ "\xa0\x5e\x4b\x03\x7a\x0d\x8a\xf4"
+ "\x55\x59\x94\x2b\x63\x96\x0e\xf5",
+ .psize = 1040,
+ .digest = "\xb5\xb9\x08\xb3\x24\x3e\x03\xf0"
+ "\xd6\x0b\x57\xbc\x0a\x6d\x89\x59",
+ }, {
+ .key = "\xf6\x34\x42\x71\x35\x52\x8b\x58"
+ "\x02\x3a\x8e\x4a\x8d\x41\x13\xe9"
+ "\x7f\xba\xb9\x55\x9d\x73\x4d\xf8"
+ "\x3f\x5d\x73\x15\xff\xd3\x9e\x7f"
+ "\x20\x2a\x6a\xa8\xd1\xf0\x8f\x12"
+ "\x6b\x02\xd8\x6c\xde\xba\x80\x22"
+ "\x19\x37\xc8\xd0\x4e\x89\x17\x7c"
+ "\x7c\xdd\x88\xfd\x41\xc0\x04\xb7"
+ "\x1d\xac\x19\xe3\x20\xc7\x16\xcf"
+ "\x58\xee\x1d\x7a\x61\x69\xa9\x12"
+ "\x4b\xef\x4f\xb6\x38\xdd\x78\xf8"
+ "\x28\xee\x70\x08\xc7\x7c\xcc\xc8"
+ "\x1e\x41\xf5\x80\x86\x70\xd0\xf0"
+ "\xa3\x87\x6b\x0a\x00\xd2\x41\x28"
+ "\x74\x26\xf1\x24\xf3\xd0\x28\x77"
+ "\xd7\xcd\xf6\x2d\x61\xf4\xa2\x13"
+ "\x77\xb4\x6f\xa0\xf4\xfb\xd6\xb5"
+ "\x38\x9d\x5a\x0c\x51\xaf\xad\x63"
+ "\x27\x67\x8c\x01\xea\x42\x1a\x66"
+ "\xda\x16\x7c\x3c\x30\x0c\x66\x53"
+ "\x1c\x88\xa4\x5c\xb2\xe3\x78\x0a"
+ "\x13\x05\x6d\xe2\xaf\xb3\xe4\x75"
+ "\x00\x99\x58\xee\x76\x09\x64\xaa"
+ "\xbb\x2e\xb1\x81\xec\xd8\x0e\xd3"
+ "\x0c\x33\x5d\xb7\x98\xef\x36\xb6"
+ "\xd2\x65\x69\x41\x70\x12\xdc\x25"
+ "\x41\x03\x99\x81\x41\x19\x62\x13"
+ "\xd1\x0a\x29\xc5\x8c\xe0\x4c\xf3"
+ "\xd6\xef\x4c\xf4\x1d\x83\x2e\x6d"
+ "\x8e\x14\x87\xed\x80\xe0\xaa\xd3"
+ "\x08\x04\x73\x1a\x84\x40\xf5\x64"
+ "\xbd\x61\x32\x65\x40\x42\xfb\xb0"
+ "\x40\xf6\x40\x8d\xc7\x7f\x14\xd0"
+ "\x83\x99\xaa\x36\x7e\x60\xc6\xbf"
+ "\x13\x8a\xf9\x21\xe4\x7e\x68\x87"
+ "\xf3\x33\x86\xb4\xe0\x23\x7e\x0a"
+ "\x21\xb1\xf5\xad\x67\x3c\x9c\x9d"
+ "\x09\xab\xaf\x5f\xba\xe0\xd0\x82"
+ "\x48\x22\x70\xb5\x6d\x53\xd6\x0e"
+ "\xde\x64\x92\x41\xb0\xd3\xfb\xda"
+ "\x21\xfe\xab\xea\x20\xc4\x03\x58"
+ "\x18\x2e\x7d\x2f\x03\xa9\x47\x66"
+ "\xdf\x7b\xa4\x6b\x34\x6b\x55\x9c"
+ "\x4f\xd7\x9c\x47\xfb\xa9\x42\xec"
+ "\x5a\x12\xfd\xfe\x76\xa0\x92\x9d"
+ "\xfe\x1e\x16\xdd\x24\x2a\xe4\x27"
+ "\xd5\xa9\xf2\x05\x4f\x83\xa2\xaf"
+ "\xfe\xee\x83\x7a\xad\xde\xdf\x9a"
+ "\x80\xd5\x81\x14\x93\x16\x7e\x46"
+ "\x47\xc2\x14\xef\x49\x6e\xb9\xdb"
+ "\x40\xe8\x06\x6f\x9c\x2a\xfd\x62"
+ "\x06\x46\xfd\x15\x1d\x36\x61\x6f"
+ "\x77\x77\x5e\x64\xce\x78\x1b\x85"
+ "\xbf\x50\x9a\xfd\x67\xa6\x1a\x65"
+ "\xad\x5b\x33\x30\xf1\x71\xaa\xd9"
+ "\x23\x0d\x92\x24\x5f\xae\x57\xb0"
+ "\x24\x37\x0a\x94\x12\xfb\xb5\xb1"
+ "\xd3\xb8\x1d\x12\x29\xb0\x80\x24"
+ "\x2d\x47\x9f\x96\x1f\x95\xf1\xb1"
+ "\xda\x35\xf6\x29\xe0\xe1\x23\x96"
+ "\xc7\xe8\x22\x9b\x7c\xac\xf9\x41"
+ "\x39\x01\xe5\x73\x15\x5e\x99\xec"
+ "\xb4\xc1\xf4\xe7\xa7\x97\x6a\xd5"
+ "\x90\x9a\xa0\x1d\xf3\x5a\x8b\x5f"
+ "\xdf\x01\x52\xa4\x93\x31\x97\xb0"
+ "\x93\x24\xb5\xbc\xb2\x14\x24\x98"
+ "\x4a\x8f\x19\x85\xc3\x2d\x0f\x74"
+ "\x9d\x16\x13\x80\x5e\x59\x62\x62"
+ "\x25\xe0\xd1\x2f\x64\xef\xba\xac"
+ "\xcd\x09\x07\x15\x8a\xcf\x73\xb5"
+ "\x8b\xc9\xd8\x24\xb0\x53\xd5\x6f"
+ "\xe1\x2b\x77\xb1\xc5\xe4\xa7\x0e"
+ "\x18\x45\xab\x36\x03\x59\xa8\xbd"
+ "\x43\xf0\xd8\x2c\x1a\x69\x96\xbb"
+ "\x13\xdf\x6c\x33\x77\xdf\x25\x34"
+ "\x5b\xa5\x5b\x8c\xf9\x51\x05\xd4"
+ "\x8b\x8b\x44\x87\x49\xfc\xa0\x8f"
+ "\x45\x15\x5b\x40\x42\xc4\x09\x92"
+ "\x98\x0c\x4d\xf4\x26\x37\x1b\x13"
+ "\x76\x01\x93\x8d\x4f\xe6\xed\x18"
+ "\xd0\x79\x7b\x3f\x44\x50\xcb\xee"
+ "\xf7\x4a\xc9\x9e\xe0\x96\x74\xa7"
+ "\xe6\x93\xb2\x53\xca\x55\xa8\xdc"
+ "\x1e\x68\x07\x87\xb7\x2e\xc1\x08"
+ "\xb2\xa4\x5b\xaf\xc6\xdb\x5c\x66"
+ "\x41\x1c\x51\xd9\xb0\x07\x00\x0d"
+ "\xf0\x4c\xdc\x93\xde\xa9\x1e\x8e"
+ "\xd3\x22\x62\xd8\x8b\x88\x2c\xea"
+ "\x5e\xf1\x6e\x14\x40\xc7\xbe\xaa"
+ "\x42\x28\xd0\x26\x30\x78\x01\x9b"
+ "\x83\x07\xbc\x94\xc7\x57\xa2\x9f"
+ "\x03\x07\xff\x16\xff\x3c\x6e\x48"
+ "\x0a\xd0\xdd\x4c\xf6\x64\x9a\xf1"
+ "\xcd\x30\x12\x82\x2c\x38\xd3\x26"
+ "\x83\xdb\xab\x3e\xc6\xf8\xe6\xfa"
+ "\x77\x0a\x78\x82\x75\xf8\x63\x51"
+ "\x59\xd0\x8d\x24\x9f\x25\xe6\xa3"
+ "\x4c\xbc\x34\xfc\xe3\x10\xc7\x62"
+ "\xd4\x23\xc8\x3d\xa7\xc6\xa6\x0a"
+ "\x4f\x7e\x29\x9d\x6d\xbe\xb5\xf1"
+ "\xdf\xa4\x53\xfa\xc0\x23\x0f\x37"
+ "\x84\x68\xd0\xb5\xc8\xc6\xae\xf8"
+ "\xb7\x8d\xb3\x16\xfe\x8f\x87\xad"
+ "\xd0\xc1\x08\xee\x12\x1c\x9b\x1d"
+ "\x90\xf8\xd1\x63\xa4\x92\x3c\xf0"
+ "\xc7\x34\xd8\xf1\x14\xed\xa3\xbc"
+ "\x17\x7e\xd4\x62\x42\x54\x57\x2c"
+ "\x3e\x7a\x35\x35\x17\x0f\x0b\x7f"
+ "\x81\xa1\x3f\xd0\xcd\xc8\x3b\x96"
+ "\xe9\xe0\x4a\x04\xe1\xb6\x3c\xa1"
+ "\xd6\xca\xc4\xbd\xb6\xb5\x95\x34"
+ "\x12\x9d\xc5\x96\xf2\xdf\xba\x54"
+ "\x76\xd1\xb2\x6b\x3b\x39\xe0\xb9"
+ "\x18\x62\xfb\xf7\xfc\x12\xf1\x5f"
+ "\x7e\xc7\xe3\x59\x4c\xa6\xc2\x3d"
+ "\x40\x15\xf9\xa3\x95\x64\x4c\x74"
+ "\x8b\x73\x77\x33\x07\xa7\x04\x1d"
+ "\x33\x5a\x7e\x8f\xbd\x86\x01\x4f"
+ "\x3e\xb9\x27\x6f\xe2\x41\xf7\x09"
+ "\x67\xfd\x29\x28\xc5\xe4\xf6\x18"
+ "\x4c\x1b\x49\xb2\x9c\x5b\xf6\x81"
+ "\x4f\xbb\x5c\xcc\x0b\xdf\x84\x23"
+ "\x58\xd6\x28\x34\x93\x3a\x25\x97"
+ "\xdf\xb2\xc3\x9e\x97\x38\x0b\x7d"
+ "\x10\xb3\x54\x35\x23\x8c\x64\xee"
+ "\xf0\xd8\x66\xff\x8b\x22\xd2\x5b"
+ "\x05\x16\x3c\x89\xf7\xb1\x75\xaf"
+ "\xc0\xae\x6a\x4f\x3f\xaf\x9a\xf4"
+ "\xf4\x9a\x24\xd9\x80\x82\xc0\x12"
+ "\xde\x96\xd1\xbe\x15\x0b\x8d\x6a"
+ "\xd7\x12\xe4\x85\x9f\x83\xc9\xc3"
+ "\xff\x0b\xb5\xaf\x3b\xd8\x6d\x67"
+ "\x81\x45\xe6\xac\xec\xc1\x7b\x16"
+ "\x18\x0a\xce\x4b\xc0\x2e\x76\xbc"
+ "\x1b\xfa\xb4\x34\xb8\xfc\x3e\xc8"
+ "\x5d\x90\x71\x6d\x7a\x79\xef\x06",
+ .ksize = 1088,
+ .plaintext = "\xaa\x5d\x54\xcb\xea\x1e\x46\x0f"
+ "\x45\x87\x70\x51\x8a\x66\x7a\x33"
+ "\xb4\x18\xff\xa9\x82\xf9\x45\x4b"
+ "\x93\xae\x2e\x7f\xab\x98\xfe\xbf"
+ "\x01\xee\xe5\xa0\x37\x8f\x57\xa6"
+ "\xb0\x76\x0d\xa4\xd6\x28\x2b\x5d"
+ "\xe1\x03\xd6\x1c\x6f\x34\x0d\xe7"
+ "\x61\x2d\x2e\xe5\xae\x5d\x47\xc7"
+ "\x80\x4b\x18\x8f\xa8\x99\xbc\x28"
+ "\xed\x1d\x9d\x86\x7d\xd7\x41\xd1"
+ "\xe0\x2b\xe1\x8c\x93\x2a\xa7\x80"
+ "\xe1\x07\xa0\xa9\x9f\x8c\x8d\x1a"
+ "\x55\xfc\x6b\x24\x7a\xbd\x3e\x51"
+ "\x68\x4b\x26\x59\xc8\xa7\x16\xd9"
+ "\xb9\x61\x13\xde\x8b\x63\x1c\xf6"
+ "\x60\x01\xfb\x08\xb3\x5b\x0a\xbf"
+ "\x34\x73\xda\x87\x87\x3d\x6f\x97"
+ "\x4a\x0c\xa3\x58\x20\xa2\xc0\x81"
+ "\x5b\x8c\xef\xa9\xc2\x01\x1e\x64"
+ "\x83\x8c\xbc\x03\xb6\xd0\x29\x9f"
+ "\x54\xe2\xce\x8b\xc2\x07\x85\x78"
+ "\x25\x38\x96\x4c\xb4\xbe\x17\x4a"
+ "\x65\xa6\xfa\x52\x9d\x66\x9d\x65"
+ "\x4a\xd1\x01\x01\xf0\xcb\x13\xcc"
+ "\xa5\x82\xf3\xf2\x66\xcd\x3f\x9d"
+ "\xd1\xaa\xe4\x67\xea\xf2\xad\x88"
+ "\x56\x76\xa7\x9b\x59\x3c\xb1\x5d"
+ "\x78\xfd\x69\x79\x74\x78\x43\x26"
+ "\x7b\xde\x3f\xf1\xf5\x4e\x14\xd9"
+ "\x15\xf5\x75\xb5\x2e\x19\xf3\x0c"
+ "\x48\x72\xd6\x71\x6d\x03\x6e\xaa"
+ "\xa7\x08\xf9\xaa\x70\xa3\x0f\x4d"
+ "\x12\x8a\xdd\xe3\x39\x73\x7e\xa7"
+ "\xea\x1f\x6d\x06\x26\x2a\xf2\xc5"
+ "\x52\xb4\xbf\xfd\x52\x0c\x06\x60"
+ "\x90\xd1\xb2\x7b\x56\xae\xac\x58"
+ "\x5a\x6b\x50\x2a\xf5\xe0\x30\x3c"
+ "\x2a\x98\x0f\x1b\x5b\x0a\x84\x6c"
+ "\x31\xae\x92\xe2\xd4\xbb\x7f\x59"
+ "\x26\x10\xb9\x89\x37\x68\x26\xbf"
+ "\x41\xc8\x49\xc4\x70\x35\x7d\xff"
+ "\x2d\x7f\xf6\x8a\x93\x68\x8c\x78"
+ "\x0d\x53\xce\x7d\xff\x7d\xfb\xae"
+ "\x13\x1b\x75\xc4\x78\xd7\x71\xd8"
+ "\xea\xd3\xf4\x9d\x95\x64\x8e\xb4"
+ "\xde\xb8\xe4\xa6\x68\xc8\xae\x73"
+ "\x58\xaf\xa8\xb0\x5a\x20\xde\x87"
+ "\x43\xb9\x0f\xe3\xad\x41\x4b\xd5"
+ "\xb7\xad\x16\x00\xa6\xff\xf6\x74"
+ "\xbf\x8c\x9f\xb3\x58\x1b\xb6\x55"
+ "\xa9\x90\x56\x28\xf0\xb5\x13\x4e"
+ "\x9e\xf7\x25\x86\xe0\x07\x7b\x98"
+ "\xd8\x60\x5d\x38\x95\x3c\xe4\x22"
+ "\x16\x2f\xb2\xa2\xaf\xe8\x90\x17"
+ "\xec\x11\x83\x1a\xf4\xa9\x26\xda"
+ "\x39\x72\xf5\x94\x61\x05\x51\xec"
+ "\xa8\x30\x8b\x2c\x13\xd0\x72\xac"
+ "\xb9\xd2\xa0\x4c\x4b\x78\xe8\x6e"
+ "\x04\x85\xe9\x04\x49\x82\x91\xff"
+ "\x89\xe5\xab\x4c\xaa\x37\x03\x12"
+ "\xca\x8b\x74\x10\xfd\x9e\xd9\x7b"
+ "\xcb\xdb\x82\x6e\xce\x2e\x33\x39"
+ "\xce\xd2\x84\x6e\x34\x71\x51\x6e"
+ "\x0d\xd6\x01\x87\xc7\xfa\x0a\xd3"
+ "\xad\x36\xf3\x4c\x9f\x96\x5e\x62"
+ "\x62\x54\xc3\x03\x78\xd6\xab\xdd"
+ "\x89\x73\x55\x25\x30\xf8\xa7\xe6"
+ "\x4f\x11\x0c\x7c\x0a\xa1\x2b\x7b"
+ "\x3d\x0d\xde\x81\xd4\x9d\x0b\xae"
+ "\xdf\x00\xf9\x4c\xb6\x90\x8e\x16"
+ "\xcb\x11\xc8\xd1\x2e\x73\x13\x75"
+ "\x75\x3e\xaa\xf5\xee\x02\xb3\x18"
+ "\xa6\x2d\xf5\x3b\x51\xd1\x1f\x47"
+ "\x6b\x2c\xdb\xc4\x10\xe0\xc8\xba"
+ "\x9d\xac\xb1\x9d\x75\xd5\x41\x0e"
+ "\x7e\xbe\x18\x5b\xa4\x1f\xf8\x22"
+ "\x4c\xc1\x68\xda\x6d\x51\x34\x6c"
+ "\x19\x59\xec\xb5\xb1\xec\xa7\x03"
+ "\xca\x54\x99\x63\x05\x6c\xb1\xac"
+ "\x9c\x31\xd6\xdb\xba\x7b\x14\x12"
+ "\x7a\xc3\x2f\xbf\x8d\xdc\x37\x46"
+ "\xdb\xd2\xbc\xd4\x2f\xab\x30\xd5"
+ "\xed\x34\x99\x8e\x83\x3e\xbe\x4c"
+ "\x86\x79\x58\xe0\x33\x8d\x9a\xb8"
+ "\xa9\xa6\x90\x46\xa2\x02\xb8\xdd"
+ "\xf5\xf9\x1a\x5c\x8c\x01\xaa\x6e"
+ "\xb4\x22\x12\xf5\x0c\x1b\x9b\x7a"
+ "\xc3\x80\xf3\x06\x00\x5f\x30\xd5"
+ "\x06\xdb\x7d\x82\xc2\xd4\x0b\x4c"
+ "\x5f\xe9\xc5\xf5\xdf\x97\x12\xbf"
+ "\x56\xaf\x9b\x69\xcd\xee\x30\xb4"
+ "\xa8\x71\xff\x3e\x7d\x73\x7a\xb4"
+ "\x0d\xa5\x46\x7a\xf3\xf4\x15\x87"
+ "\x5d\x93\x2b\x8c\x37\x64\xb5\xdd"
+ "\x48\xd1\xe5\x8c\xae\xd4\xf1\x76"
+ "\xda\xf4\xba\x9e\x25\x0e\xad\xa3"
+ "\x0d\x08\x7c\xa8\x82\x16\x8d\x90"
+ "\x56\x40\x16\x84\xe7\x22\x53\x3a"
+ "\x58\xbc\xb9\x8f\x33\xc8\xc2\x84"
+ "\x22\xe6\x0d\xe7\xb3\xdc\x5d\xdf"
+ "\xd7\x2a\x36\xe4\x16\x06\x07\xd2"
+ "\x97\x60\xb2\xf5\x5e\x14\xc9\xfd"
+ "\x8b\x05\xd1\xce\xee\x9a\x65\x99"
+ "\xb7\xae\x19\xb7\xc8\xbc\xd5\xa2"
+ "\x7b\x95\xe1\xcc\xba\x0d\xdc\x8a"
+ "\x1d\x59\x52\x50\xaa\x16\x02\x82"
+ "\xdf\x61\x33\x2e\x44\xce\x49\xc7"
+ "\xe5\xc6\x2e\x76\xcf\x80\x52\xf0"
+ "\x3d\x17\x34\x47\x3f\xd3\x80\x48"
+ "\xa2\xba\xd5\xc7\x7b\x02\x28\xdb"
+ "\xac\x44\xc7\x6e\x05\x5c\xc2\x79"
+ "\xb3\x7d\x6a\x47\x77\x66\xf1\x38"
+ "\xf0\xf5\x4f\x27\x1a\x31\xca\x6c"
+ "\x72\x95\x92\x8e\x3f\xb0\xec\x1d"
+ "\xc7\x2a\xff\x73\xee\xdf\x55\x80"
+ "\x93\xd2\xbd\x34\xd3\x9f\x00\x51"
+ "\xfb\x2e\x41\xba\x6c\x5a\x7c\x17"
+ "\x7f\xe6\x70\xac\x8d\x39\x3f\x77"
+ "\xe2\x23\xac\x8f\x72\x4e\xe4\x53"
+ "\xcc\xf1\x1b\xf1\x35\xfe\x52\xa4"
+ "\xd6\xb8\x40\x6b\xc1\xfd\xa0\xa1"
+ "\xf5\x46\x65\xc2\x50\xbb\x43\xe2"
+ "\xd1\x43\x28\x34\x74\xf5\x87\xa0"
+ "\xf2\x5e\x27\x3b\x59\x2b\x3e\x49"
+ "\xdf\x46\xee\xaf\x71\xd7\x32\x36"
+ "\xc7\x14\x0b\x58\x6e\x3e\x2d\x41"
+ "\xfa\x75\x66\x3a\x54\xe0\xb2\xb9"
+ "\xaf\xdd\x04\x80\x15\x19\x3f\x6f"
+ "\xce\x12\xb4\xd8\xe8\x89\x3c\x05"
+ "\x30\xeb\xf3\x3d\xcd\x27\xec\xdc"
+ "\x56\x70\x12\xcf\x78\x2b\x77\xbf"
+ "\x22\xf0\x1b\x17\x9c\xcc\xd6\x1b"
+ "\x2d\x3d\xa0\x3b\xd8\xc9\x70\xa4"
+ "\x7a\x3e\x07\xb9\x06\xc3\xfa\xb0"
+ "\x33\xee\xc1\xd8\xf6\xe0\xf0\xb2"
+ "\x61\x12\x69\xb0\x5f\x28\x99\xda"
+ "\xc3\x61\x48\xfa\x07\x16\x03\xc4"
+ "\xa8\xe1\x3c\xe8\x0e\x64\x15\x30"
+ "\xc1\x9d\x84\x2f\x73\x98\x0e\x3a"
+ "\xf2\x86\x21\xa4\x9e\x1d\xb5\x86"
+ "\x16\xdb\x2b\x9a\x06\x64\x8e\x79"
+ "\x8d\x76\x3e\xc3\xc2\x64\x44\xe3"
+ "\xda\xbc\x1a\x52\xd7\x61\x03\x65"
+ "\x54\x32\x77\x01\xed\x9d\x8a\x43"
+ "\x25\x24\xe3\xc1\xbe\xb8\x2f\xcb"
+ "\x89\x14\x64\xab\xf6\xa0\x6e\x02"
+ "\x57\xe4\x7d\xa9\x4e\x9a\x03\x36"
+ "\xad\xf1\xb1\xfc\x0b\xe6\x79\x51"
+ "\x9f\x81\x77\xc4\x14\x78\x9d\xbf"
+ "\xb6\xd6\xa3\x8c\xba\x0b\x26\xe7"
+ "\xc8\xb9\x5c\xcc\xe1\x5f\xd5\xc6"
+ "\xc4\xca\xc2\xa3\x45\xba\x94\x13"
+ "\xb2\x8f\xc3\x54\x01\x09\xe7\x8b"
+ "\xda\x2a\x0a\x11\x02\x43\xcb\x57"
+ "\xc9\xcc\xb5\x5c\xab\xc4\xec\x54"
+ "\x00\x06\x34\xe1\x6e\x03\x89\x7c"
+ "\xc6\xfb\x6a\xc7\x60\x43\xd6\xc5"
+ "\xb5\x68\x72\x89\x8f\x42\xc3\x74"
+ "\xbd\x25\xaa\x9f\x67\xb5\xdf\x26"
+ "\x20\xe8\xb7\x01\x3c\xe4\x77\xce"
+ "\xc4\x65\xa7\x23\x79\xea\x33\xc7"
+ "\x82\x14\x5c\x82\xf2\x4e\x3d\xf6"
+ "\xc6\x4a\x0e\x29\xbb\xec\x44\xcd"
+ "\x2f\xd1\x4f\x21\x71\xa9\xce\x0f"
+ "\x5c\xf2\x72\x5c\x08\x2e\x21\xd2"
+ "\xc3\x29\x13\xd8\xac\xc3\xda\x13"
+ "\x1a\x9d\xa7\x71\x1d\x27\x1d\x27"
+ "\x1d\xea\xab\x44\x79\xad\xe5\xeb"
+ "\xef\x1f\x22\x0a\x44\x4f\xcb\x87"
+ "\xa7\x58\x71\x0e\x66\xf8\x60\xbf"
+ "\x60\x74\x4a\xb4\xec\x2e\xfe\xd3"
+ "\xf5\xb8\xfe\x46\x08\x50\x99\x6c"
+ "\x66\xa5\xa8\x34\x44\xb5\xe5\xf0"
+ "\xdd\x2c\x67\x4e\x35\x96\x8e\x67"
+ "\x48\x3f\x5f\x37\x44\x60\x51\x2e"
+ "\x14\x91\x5e\x57\xc3\x0e\x79\x77"
+ "\x2f\x03\xf4\xe2\x1c\x72\xbf\x85"
+ "\x5d\xd3\x17\xdf\x6c\xc5\x70\x24"
+ "\x42\xdf\x51\x4e\x2a\xb2\xd2\x5b"
+ "\x9e\x69\x83\x41\x11\xfe\x73\x22"
+ "\xde\x8a\x9e\xd8\x8a\xfb\x20\x38"
+ "\xd8\x47\x6f\xd5\xed\x8f\x41\xfd"
+ "\x13\x7a\x18\x03\x7d\x0f\xcd\x7d"
+ "\xa6\x7d\x31\x9e\xf1\x8f\x30\xa3"
+ "\x8b\x4c\x24\xb7\xf5\x48\xd7\xd9"
+ "\x12\xe7\x84\x97\x5c\x31\x6d\xfb"
+ "\xdf\xf3\xd3\xd1\xd5\x0c\x30\x06"
+ "\x01\x6a\xbc\x6c\x78\x7b\xa6\x50"
+ "\xfa\x0f\x3c\x42\x2d\xa5\xa3\x3b"
+ "\xcf\x62\x50\xff\x71\x6d\xe7\xda"
+ "\x27\xab\xc6\x67\x16\x65\x68\x64"
+ "\xc7\xd5\x5f\x81\xa9\xf6\x65\xb3"
+ "\x5e\x43\x91\x16\xcd\x3d\x55\x37"
+ "\x55\xb3\xf0\x28\xc5\x54\x19\xc0"
+ "\xe0\xd6\x2a\x61\xd4\xc8\x72\x51"
+ "\xe9\xa1\x7b\x48\x21\xad\x44\x09"
+ "\xe4\x01\x61\x3c\x8a\x5b\xf9\xa1"
+ "\x6e\x1b\xdf\xc0\x04\xa8\x8b\xf2"
+ "\x21\xbe\x34\x7b\xfc\xa1\xcd\xc9"
+ "\xa9\x96\xf4\xa4\x4c\xf7\x4e\x8f"
+ "\x84\xcc\xd3\xa8\x92\x77\x8f\x36"
+ "\xe2\x2e\x8c\x33\xe8\x84\xa6\x0c"
+ "\x6c\x8a\xda\x14\x32\xc2\x96\xff"
+ "\xc6\x4a\xc2\x9b\x30\x7f\xd1\x29"
+ "\xc0\xd5\x78\x41\x00\x80\x80\x03"
+ "\x2a\xb1\xde\x26\x03\x48\x49\xee"
+ "\x57\x14\x76\x51\x3c\x36\x5d\x0a"
+ "\x5c\x9f\xe8\xd8\x53\xdb\x4f\xd4"
+ "\x38\xbf\x66\xc9\x75\x12\x18\x75"
+ "\x34\x2d\x93\x22\x96\x51\x24\x6e"
+ "\x4e\xd9\x30\xea\x67\xff\x92\x1c"
+ "\x16\x26\xe9\xb5\x33\xab\x8c\x22"
+ "\x47\xdb\xa0\x2c\x08\xf0\x12\x69"
+ "\x7e\x93\x52\xda\xa5\xe5\xca\xc1"
+ "\x0f\x55\x2a\xbd\x09\x30\x88\x1b"
+ "\x9c\xc6\x9f\xe6\xdb\xa6\x92\xeb"
+ "\xf4\xbd\x5c\xc4\xdb\xc6\x71\x09"
+ "\xab\x5e\x48\x0c\xed\x6f\xda\x8e"
+ "\x8d\x0c\x98\x71\x7d\x10\xd0\x9c"
+ "\x20\x9b\x79\x53\x26\x5d\xb9\x85"
+ "\x8a\x31\xb8\xc5\x1c\x97\xde\x88"
+ "\x61\x55\x7f\x7c\x21\x06\xea\xc4"
+ "\x5f\xaf\xf2\xf0\xd5\x5e\x7d\xb4"
+ "\x6e\xcf\xe9\xae\x1b\x0e\x11\x80"
+ "\xc1\x9a\x74\x7e\x52\x6f\xa0\xb7"
+ "\x24\xcd\x8d\x0a\x11\x40\x63\x72"
+ "\xfa\xe2\xc5\xb3\x94\xef\x29\xa2"
+ "\x1a\x23\x43\x04\x37\x55\x0d\xe9"
+ "\x83\xb2\x29\x51\x49\x64\xa0\xbd"
+ "\xde\x73\xfd\xa5\x7c\x95\x70\x62"
+ "\x58\xdc\xe2\xd0\xbf\x98\xf5\x8a"
+ "\x6a\xfd\xce\xa8\x0e\x42\x2a\xeb"
+ "\xd2\xff\x83\x27\x53\x5c\xa0\x6e"
+ "\x93\xef\xe2\xb9\x5d\x35\xd6\x98"
+ "\xf6\x71\x19\x7a\x54\xa1\xa7\xe8"
+ "\x09\xfe\xf6\x9e\xc7\xbd\x3e\x29"
+ "\xbd\x6b\x17\xf4\xe7\x3e\x10\x5c"
+ "\xc1\xd2\x59\x4f\x4b\x12\x1a\x5b"
+ "\x50\x80\x59\xb9\xec\x13\x66\xa8"
+ "\xd2\x31\x7b\x6a\x61\x22\xdd\x7d"
+ "\x61\xee\x87\x16\x46\x9f\xf9\xc7"
+ "\x41\xee\x74\xf8\xd0\x96\x2c\x76"
+ "\x2a\xac\x7d\x6e\x9f\x0e\x7f\x95"
+ "\xfe\x50\x16\xb2\x23\xca\x62\xd5"
+ "\x68\xcf\x07\x3f\x3f\x97\x85\x2a"
+ "\x0c\x25\x45\xba\xdb\x32\xcb\x83"
+ "\x8c\x4f\xe0\x6d\x9a\x99\xf9\xc9"
+ "\xda\xd4\x19\x31\xc1\x7c\x6d\xd9"
+ "\x9c\x56\xd3\xec\xc1\x81\x4c\xed"
+ "\x28\x9d\x87\xeb\x19\xd7\x1a\x4f"
+ "\x04\x6a\xcb\x1f\xcf\x1f\xa2\x16"
+ "\xfc\x2a\x0d\xa1\x14\x2d\xfa\xc5"
+ "\x5a\xd2\xc5\xf9\x19\x7c\x20\x1f"
+ "\x2d\x10\xc0\x66\x7c\xd9\x2d\xe5"
+ "\x88\x70\x59\xa7\x85\xd5\x2e\x7c"
+ "\x5c\xe3\xb7\x12\xd6\x97\x3f\x29",
+ .psize = 2048,
+ .digest = "\x37\x90\x92\xc2\xeb\x01\x87\xd9"
+ "\x95\xc7\x91\xc3\x17\x8b\x38\x52",
+ }
+};
+
+
/*
* DES test vectors.
*/
@@ -11449,6 +12797,82 @@ static const struct cipher_testvec aes_cbc_tv_template[] = {
},
};
+static const struct cipher_testvec aes_cfb_tv_template[] = {
+ { /* From NIST SP800-38A */
+ .key = "\x2b\x7e\x15\x16\x28\xae\xd2\xa6"
+ "\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+ .klen = 16,
+ .iv = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .ctext = "\x3b\x3f\xd9\x2e\xb7\x2d\xad\x20"
+ "\x33\x34\x49\xf8\xe8\x3c\xfb\x4a"
+ "\xc8\xa6\x45\x37\xa0\xb3\xa9\x3f"
+ "\xcd\xe3\xcd\xad\x9f\x1c\xe5\x8b"
+ "\x26\x75\x1f\x67\xa3\xcb\xb1\x40"
+ "\xb1\x80\x8c\xf1\x87\xa4\xf4\xdf"
+ "\xc0\x4b\x05\x35\x7c\x5d\x1c\x0e"
+ "\xea\xc4\xc6\x6f\x9f\xf7\xf2\xe6",
+ .len = 64,
+ }, {
+ .key = "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+ "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+ "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+ .klen = 24,
+ .iv = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .ctext = "\xcd\xc8\x0d\x6f\xdd\xf1\x8c\xab"
+ "\x34\xc2\x59\x09\xc9\x9a\x41\x74"
+ "\x67\xce\x7f\x7f\x81\x17\x36\x21"
+ "\x96\x1a\x2b\x70\x17\x1d\x3d\x7a"
+ "\x2e\x1e\x8a\x1d\xd5\x9b\x88\xb1"
+ "\xc8\xe6\x0f\xed\x1e\xfa\xc4\xc9"
+ "\xc0\x5f\x9f\x9c\xa9\x83\x4f\xa0"
+ "\x42\xae\x8f\xba\x58\x4b\x09\xff",
+ .len = 64,
+ }, {
+ .key = "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+ "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+ "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+ "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+ .klen = 32,
+ .iv = "\x00\x01\x02\x03\x04\x05\x06\x07"
+ "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+ .ptext = "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+ "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+ "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+ "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+ "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+ "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+ "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+ "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+ .ctext = "\xdc\x7e\x84\xbf\xda\x79\x16\x4b"
+ "\x7e\xcd\x84\x86\x98\x5d\x38\x60"
+ "\x39\xff\xed\x14\x3b\x28\xb1\xc8"
+ "\x32\x11\x3c\x63\x31\xe5\x40\x7b"
+ "\xdf\x10\x13\x24\x15\xe5\x4b\x92"
+ "\xa1\x3e\xd0\xa8\x26\x7a\xe2\xf9"
+ "\x75\xa3\x85\x74\x1a\xb9\xce\xf8"
+ "\x20\x31\x62\x3d\x55\xb1\xe4\x71",
+ .len = 64,
+ },
+};
+
static const struct aead_testvec hmac_md5_ecb_cipher_null_enc_tv_template[] = {
{ /* Input data from RFC 2410 Case 1 */
#ifdef __LITTLE_ENDIAN
@@ -30802,6 +32226,1794 @@ static const struct cipher_testvec chacha20_tv_template[] = {
},
};
+static const struct cipher_testvec xchacha20_tv_template[] = {
+ { /* from libsodium test/default/xchacha20.c */
+ .key = "\x79\xc9\x97\x98\xac\x67\x30\x0b"
+ "\xbb\x27\x04\xc9\x5c\x34\x1e\x32"
+ "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e"
+ "\x52\xff\x45\xb2\x4f\x30\x4f\xc4",
+ .klen = 32,
+ .iv = "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf"
+ "\xbc\x9a\xee\x49\x41\x76\x88\xa0"
+ "\xa2\x55\x4f\x8d\x95\x38\x94\x19"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00",
+ .ctext = "\xc6\xe9\x75\x81\x60\x08\x3a\xc6"
+ "\x04\xef\x90\xe7\x12\xce\x6e\x75"
+ "\xd7\x79\x75\x90\x74\x4e\x0c\xf0"
+ "\x60\xf0\x13\x73\x9c",
+ .len = 29,
+ }, { /* from libsodium test/default/xchacha20.c */
+ .key = "\x9d\x23\xbd\x41\x49\xcb\x97\x9c"
+ "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98"
+ "\x08\xcb\x0e\x50\xcd\x0f\x67\x81"
+ "\x22\x35\xea\xaf\x60\x1d\x62\x32",
+ .klen = 32,
+ .iv = "\xc0\x47\x54\x82\x66\xb7\xc3\x70"
+ "\xd3\x35\x66\xa2\x42\x5c\xbf\x30"
+ "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00",
+ .ctext = "\xa2\x12\x09\x09\x65\x94\xde\x8c"
+ "\x56\x67\xb1\xd1\x3a\xd9\x3f\x74"
+ "\x41\x06\xd0\x54\xdf\x21\x0e\x47"
+ "\x82\xcd\x39\x6f\xec\x69\x2d\x35"
+ "\x15\xa2\x0b\xf3\x51\xee\xc0\x11"
+ "\xa9\x2c\x36\x78\x88\xbc\x46\x4c"
+ "\x32\xf0\x80\x7a\xcd\x6c\x20\x3a"
+ "\x24\x7e\x0d\xb8\x54\x14\x84\x68"
+ "\xe9\xf9\x6b\xee\x4c\xf7\x18\xd6"
+ "\x8d\x5f\x63\x7c\xbd\x5a\x37\x64"
+ "\x57\x78\x8e\x6f\xae\x90\xfc\x31"
+ "\x09\x7c\xfc",
+ .len = 91,
+ }, { /* Taken from the ChaCha20 test vectors, appended 12 random bytes
+ to the nonce, zero-padded the stream position from 4 to 8 bytes,
+ and recomputed the ciphertext using libsodium's XChaCha20 */
+ .key = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x67\xc6\x69\x73"
+ "\x51\xff\x4a\xec\x29\xcd\xba\xab"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ctext = "\x9c\x49\x2a\xe7\x8a\x2f\x93\xc7"
+ "\xb3\x33\x6f\x82\x17\xd8\xc4\x1e"
+ "\xad\x80\x11\x11\x1d\x4c\x16\x18"
+ "\x07\x73\x9b\x4f\xdb\x7c\xcb\x47"
+ "\xfd\xef\x59\x74\xfa\x3f\xe5\x4c"
+ "\x9b\xd0\xea\xbc\xba\x56\xad\x32"
+ "\x03\xdc\xf8\x2b\xc1\xe1\x75\x67"
+ "\x23\x7b\xe6\xfc\xd4\x03\x86\x54",
+ .len = 64,
+ }, { /* Derived from a ChaCha20 test vector, via the process above */
+ .key = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x01",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x02\xf2\xfb\xe3\x46"
+ "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d"
+ "\x01\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+ "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+ "\x6f\x20\x74\x68\x65\x20\x49\x45"
+ "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+ "\x64\x65\x64\x20\x62\x79\x20\x74"
+ "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+ "\x69\x62\x75\x74\x6f\x72\x20\x66"
+ "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+ "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+ "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+ "\x20\x70\x61\x72\x74\x20\x6f\x66"
+ "\x20\x61\x6e\x20\x49\x45\x54\x46"
+ "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+ "\x74\x2d\x44\x72\x61\x66\x74\x20"
+ "\x6f\x72\x20\x52\x46\x43\x20\x61"
+ "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+ "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+ "\x20\x6d\x61\x64\x65\x20\x77\x69"
+ "\x74\x68\x69\x6e\x20\x74\x68\x65"
+ "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+ "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+ "\x45\x54\x46\x20\x61\x63\x74\x69"
+ "\x76\x69\x74\x79\x20\x69\x73\x20"
+ "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+ "\x65\x64\x20\x61\x6e\x20\x22\x49"
+ "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+ "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+ "\x22\x2e\x20\x53\x75\x63\x68\x20"
+ "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+ "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+ "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+ "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+ "\x74\x73\x20\x69\x6e\x20\x49\x45"
+ "\x54\x46\x20\x73\x65\x73\x73\x69"
+ "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+ "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+ "\x77\x72\x69\x74\x74\x65\x6e\x20"
+ "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+ "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+ "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+ "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+ "\x64\x65\x20\x61\x74\x20\x61\x6e"
+ "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+ "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+ "\x20\x77\x68\x69\x63\x68\x20\x61"
+ "\x72\x65\x20\x61\x64\x64\x72\x65"
+ "\x73\x73\x65\x64\x20\x74\x6f",
+ .ctext = "\xf9\xab\x7a\x4a\x60\xb8\x5f\xa0"
+ "\x50\xbb\x57\xce\xef\x8c\xc1\xd9"
+ "\x24\x15\xb3\x67\x5e\x7f\x01\xf6"
+ "\x1c\x22\xf6\xe5\x71\xb1\x43\x64"
+ "\x63\x05\xd5\xfc\x5c\x3d\xc0\x0e"
+ "\x23\xef\xd3\x3b\xd9\xdc\x7f\xa8"
+ "\x58\x26\xb3\xd0\xc2\xd5\x04\x3f"
+ "\x0a\x0e\x8f\x17\xe4\xcd\xf7\x2a"
+ "\xb4\x2c\x09\xe4\x47\xec\x8b\xfb"
+ "\x59\x37\x7a\xa1\xd0\x04\x7e\xaa"
+ "\xf1\x98\x5f\x24\x3d\x72\x9a\x43"
+ "\xa4\x36\x51\x92\x22\x87\xff\x26"
+ "\xce\x9d\xeb\x59\x78\x84\x5e\x74"
+ "\x97\x2e\x63\xc0\xef\x29\xf7\x8a"
+ "\xb9\xee\x35\x08\x77\x6a\x35\x9a"
+ "\x3e\xe6\x4f\x06\x03\x74\x1b\xc1"
+ "\x5b\xb3\x0b\x89\x11\x07\xd3\xb7"
+ "\x53\xd6\x25\x04\xd9\x35\xb4\x5d"
+ "\x4c\x33\x5a\xc2\x42\x4c\xe6\xa4"
+ "\x97\x6e\x0e\xd2\xb2\x8b\x2f\x7f"
+ "\x28\xe5\x9f\xac\x4b\x2e\x02\xab"
+ "\x85\xfa\xa9\x0d\x7c\x2d\x10\xe6"
+ "\x91\xab\x55\x63\xf0\xde\x3a\x94"
+ "\x25\x08\x10\x03\xc2\x68\xd1\xf4"
+ "\xaf\x7d\x9c\x99\xf7\x86\x96\x30"
+ "\x60\xfc\x0b\xe6\xa8\x80\x15\xb0"
+ "\x81\xb1\x0c\xbe\xb9\x12\x18\x25"
+ "\xe9\x0e\xb1\xe7\x23\xb2\xef\x4a"
+ "\x22\x8f\xc5\x61\x89\xd4\xe7\x0c"
+ "\x64\x36\x35\x61\xb6\x34\x60\xf7"
+ "\x7b\x61\x37\x37\x12\x10\xa2\xf6"
+ "\x7e\xdb\x7f\x39\x3f\xb6\x8e\x89"
+ "\x9e\xf3\xfe\x13\x98\xbb\x66\x5a"
+ "\xec\xea\xab\x3f\x9c\x87\xc4\x8c"
+ "\x8a\x04\x18\x49\xfc\x77\x11\x50"
+ "\x16\xe6\x71\x2b\xee\xc0\x9c\xb6"
+ "\x87\xfd\x80\xff\x0b\x1d\x73\x38"
+ "\xa4\x1d\x6f\xae\xe4\x12\xd7\x93"
+ "\x9d\xcd\x38\x26\x09\x40\x52\xcd"
+ "\x67\x01\x67\x26\xe0\x3e\x98\xa8"
+ "\xe8\x1a\x13\x41\xbb\x90\x4d\x87"
+ "\xbb\x42\x82\x39\xce\x3a\xd0\x18"
+ "\x6d\x7b\x71\x8f\xbb\x2c\x6a\xd1"
+ "\xbd\xf5\xc7\x8a\x7e\xe1\x1e\x0f"
+ "\x0d\x0d\x13\x7c\xd9\xd8\x3c\x91"
+ "\xab\xff\x1f\x12\xc3\xee\xe5\x65"
+ "\x12\x8d\x7b\x61\xe5\x1f\x98",
+ .len = 375,
+ .also_non_np = 1,
+ .np = 3,
+ .tap = { 375 - 20, 4, 16 },
+
+ }, { /* Derived from a ChaCha20 test vector, via the process above */
+ .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+ "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+ "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+ "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x02\x76\x5a\x2e\x63"
+ "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7"
+ "\x2a\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x27\x54\x77\x61\x73\x20\x62\x72"
+ "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
+ "\x6e\x64\x20\x74\x68\x65\x20\x73"
+ "\x6c\x69\x74\x68\x79\x20\x74\x6f"
+ "\x76\x65\x73\x0a\x44\x69\x64\x20"
+ "\x67\x79\x72\x65\x20\x61\x6e\x64"
+ "\x20\x67\x69\x6d\x62\x6c\x65\x20"
+ "\x69\x6e\x20\x74\x68\x65\x20\x77"
+ "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
+ "\x20\x6d\x69\x6d\x73\x79\x20\x77"
+ "\x65\x72\x65\x20\x74\x68\x65\x20"
+ "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
+ "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
+ "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
+ "\x72\x61\x74\x68\x73\x20\x6f\x75"
+ "\x74\x67\x72\x61\x62\x65\x2e",
+ .ctext = "\x95\xb9\x51\xe7\x8f\xb4\xa4\x03"
+ "\xca\x37\xcc\xde\x60\x1d\x8c\xe2"
+ "\xf1\xbb\x8a\x13\x7f\x61\x85\xcc"
+ "\xad\xf4\xf0\xdc\x86\xa6\x1e\x10"
+ "\xbc\x8e\xcb\x38\x2b\xa5\xc8\x8f"
+ "\xaa\x03\x3d\x53\x4a\x42\xb1\x33"
+ "\xfc\xd3\xef\xf0\x8e\x7e\x10\x9c"
+ "\x6f\x12\x5e\xd4\x96\xfe\x5b\x08"
+ "\xb6\x48\xf0\x14\x74\x51\x18\x7c"
+ "\x07\x92\xfc\xac\x9d\xf1\x94\xc0"
+ "\xc1\x9d\xc5\x19\x43\x1f\x1d\xbb"
+ "\x07\xf0\x1b\x14\x25\x45\xbb\xcb"
+ "\x5c\xe2\x8b\x28\xf3\xcf\x47\x29"
+ "\x27\x79\x67\x24\xa6\x87\xc2\x11"
+ "\x65\x03\xfa\x45\xf7\x9e\x53\x7a"
+ "\x99\xf1\x82\x25\x4f\x8d\x07",
+ .len = 127,
+ }, { /* Derived from a ChaCha20 test vector, via the process above */
+ .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+ "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+ "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+ "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x01\x31\x58\xa3\x5a"
+ "\x25\x5d\x05\x17\x58\xe9\x5e\xd4"
+ "\x1c\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x49\xee\xe0\xdc\x24\x90\x40\xcd"
+ "\xc5\x40\x8f\x47\x05\xbc\xdd\x81"
+ "\x47\xc6\x8d\xe6\xb1\x8f\xd7\xcb"
+ "\x09\x0e\x6e\x22\x48\x1f\xbf\xb8"
+ "\x5c\xf7\x1e\x8a\xc1\x23\xf2\xd4"
+ "\x19\x4b\x01\x0f\x4e\xa4\x43\xce"
+ "\x01\xc6\x67\xda\x03\x91\x18\x90"
+ "\xa5\xa4\x8e\x45\x03\xb3\x2d\xac"
+ "\x74\x92\xd3\x53\x47\xc8\xdd\x25"
+ "\x53\x6c\x02\x03\x87\x0d\x11\x0c"
+ "\x58\xe3\x12\x18\xfd\x2a\x5b\x40"
+ "\x0c\x30\xf0\xb8\x3f\x43\xce\xae"
+ "\x65\x3a\x7d\x7c\xf4\x54\xaa\xcc"
+ "\x33\x97\xc3\x77\xba\xc5\x70\xde"
+ "\xd7\xd5\x13\xa5\x65\xc4\x5f\x0f"
+ "\x46\x1a\x0d\x97\xb5\xf3\xbb\x3c"
+ "\x84\x0f\x2b\xc5\xaa\xea\xf2\x6c"
+ "\xc9\xb5\x0c\xee\x15\xf3\x7d\xbe"
+ "\x9f\x7b\x5a\xa6\xae\x4f\x83\xb6"
+ "\x79\x49\x41\xf4\x58\x18\xcb\x86"
+ "\x7f\x30\x0e\xf8\x7d\x44\x36\xea"
+ "\x75\xeb\x88\x84\x40\x3c\xad\x4f"
+ "\x6f\x31\x6b\xaa\x5d\xe5\xa5\xc5"
+ "\x21\x66\xe9\xa7\xe3\xb2\x15\x88"
+ "\x78\xf6\x79\xa1\x59\x47\x12\x4e"
+ "\x9f\x9f\x64\x1a\xa0\x22\x5b\x08"
+ "\xbe\x7c\x36\xc2\x2b\x66\x33\x1b"
+ "\xdd\x60\x71\xf7\x47\x8c\x61\xc3"
+ "\xda\x8a\x78\x1e\x16\xfa\x1e\x86"
+ "\x81\xa6\x17\x2a\xa7\xb5\xc2\xe7"
+ "\xa4\xc7\x42\xf1\xcf\x6a\xca\xb4"
+ "\x45\xcf\xf3\x93\xf0\xe7\xea\xf6"
+ "\xf4\xe6\x33\x43\x84\x93\xa5\x67"
+ "\x9b\x16\x58\x58\x80\x0f\x2b\x5c"
+ "\x24\x74\x75\x7f\x95\x81\xb7\x30"
+ "\x7a\x33\xa7\xf7\x94\x87\x32\x27"
+ "\x10\x5d\x14\x4c\x43\x29\xdd\x26"
+ "\xbd\x3e\x3c\x0e\xfe\x0e\xa5\x10"
+ "\xea\x6b\x64\xfd\x73\xc6\xed\xec"
+ "\xa8\xc9\xbf\xb3\xba\x0b\x4d\x07"
+ "\x70\xfc\x16\xfd\x79\x1e\xd7\xc5"
+ "\x49\x4e\x1c\x8b\x8d\x79\x1b\xb1"
+ "\xec\xca\x60\x09\x4c\x6a\xd5\x09"
+ "\x49\x46\x00\x88\x22\x8d\xce\xea"
+ "\xb1\x17\x11\xde\x42\xd2\x23\xc1"
+ "\x72\x11\xf5\x50\x73\x04\x40\x47"
+ "\xf9\x5d\xe7\xa7\x26\xb1\x7e\xb0"
+ "\x3f\x58\xc1\x52\xab\x12\x67\x9d"
+ "\x3f\x43\x4b\x68\xd4\x9c\x68\x38"
+ "\x07\x8a\x2d\x3e\xf3\xaf\x6a\x4b"
+ "\xf9\xe5\x31\x69\x22\xf9\xa6\x69"
+ "\xc6\x9c\x96\x9a\x12\x35\x95\x1d"
+ "\x95\xd5\xdd\xbe\xbf\x93\x53\x24"
+ "\xfd\xeb\xc2\x0a\x64\xb0\x77\x00"
+ "\x6f\x88\xc4\x37\x18\x69\x7c\xd7"
+ "\x41\x92\x55\x4c\x03\xa1\x9a\x4b"
+ "\x15\xe5\xdf\x7f\x37\x33\x72\xc1"
+ "\x8b\x10\x67\xa3\x01\x57\x94\x25"
+ "\x7b\x38\x71\x7e\xdd\x1e\xcc\x73"
+ "\x55\xd2\x8e\xeb\x07\xdd\xf1\xda"
+ "\x58\xb1\x47\x90\xfe\x42\x21\x72"
+ "\xa3\x54\x7a\xa0\x40\xec\x9f\xdd"
+ "\xc6\x84\x6e\xca\xae\xe3\x68\xb4"
+ "\x9d\xe4\x78\xff\x57\xf2\xf8\x1b"
+ "\x03\xa1\x31\xd9\xde\x8d\xf5\x22"
+ "\x9c\xdd\x20\xa4\x1e\x27\xb1\x76"
+ "\x4f\x44\x55\xe2\x9b\xa1\x9c\xfe"
+ "\x54\xf7\x27\x1b\xf4\xde\x02\xf5"
+ "\x1b\x55\x48\x5c\xdc\x21\x4b\x9e"
+ "\x4b\x6e\xed\x46\x23\xdc\x65\xb2"
+ "\xcf\x79\x5f\x28\xe0\x9e\x8b\xe7"
+ "\x4c\x9d\x8a\xff\xc1\xa6\x28\xb8"
+ "\x65\x69\x8a\x45\x29\xef\x74\x85"
+ "\xde\x79\xc7\x08\xae\x30\xb0\xf4"
+ "\xa3\x1d\x51\x41\xab\xce\xcb\xf6"
+ "\xb5\xd8\x6d\xe0\x85\xe1\x98\xb3"
+ "\x43\xbb\x86\x83\x0a\xa0\xf5\xb7"
+ "\x04\x0b\xfa\x71\x1f\xb0\xf6\xd9"
+ "\x13\x00\x15\xf0\xc7\xeb\x0d\x5a"
+ "\x9f\xd7\xb9\x6c\x65\x14\x22\x45"
+ "\x6e\x45\x32\x3e\x7e\x60\x1a\x12"
+ "\x97\x82\x14\xfb\xaa\x04\x22\xfa"
+ "\xa0\xe5\x7e\x8c\x78\x02\x48\x5d"
+ "\x78\x33\x5a\x7c\xad\xdb\x29\xce"
+ "\xbb\x8b\x61\xa4\xb7\x42\xe2\xac"
+ "\x8b\x1a\xd9\x2f\x0b\x8b\x62\x21"
+ "\x83\x35\x7e\xad\x73\xc2\xb5\x6c"
+ "\x10\x26\x38\x07\xe5\xc7\x36\x80"
+ "\xe2\x23\x12\x61\xf5\x48\x4b\x2b"
+ "\xc5\xdf\x15\xd9\x87\x01\xaa\xac"
+ "\x1e\x7c\xad\x73\x78\x18\x63\xe0"
+ "\x8b\x9f\x81\xd8\x12\x6a\x28\x10"
+ "\xbe\x04\x68\x8a\x09\x7c\x1b\x1c"
+ "\x83\x66\x80\x47\x80\xe8\xfd\x35"
+ "\x1c\x97\x6f\xae\x49\x10\x66\xcc"
+ "\xc6\xd8\xcc\x3a\x84\x91\x20\x77"
+ "\x72\xe4\x24\xd2\x37\x9f\xc5\xc9"
+ "\x25\x94\x10\x5f\x40\x00\x64\x99"
+ "\xdc\xae\xd7\x21\x09\x78\x50\x15"
+ "\xac\x5f\xc6\x2c\xa2\x0b\xa9\x39"
+ "\x87\x6e\x6d\xab\xde\x08\x51\x16"
+ "\xc7\x13\xe9\xea\xed\x06\x8e\x2c"
+ "\xf8\x37\x8c\xf0\xa6\x96\x8d\x43"
+ "\xb6\x98\x37\xb2\x43\xed\xde\xdf"
+ "\x89\x1a\xe7\xeb\x9d\xa1\x7b\x0b"
+ "\x77\xb0\xe2\x75\xc0\xf1\x98\xd9"
+ "\x80\x55\xc9\x34\x91\xd1\x59\xe8"
+ "\x4b\x0f\xc1\xa9\x4b\x7a\x84\x06"
+ "\x20\xa8\x5d\xfa\xd1\xde\x70\x56"
+ "\x2f\x9e\x91\x9c\x20\xb3\x24\xd8"
+ "\x84\x3d\xe1\x8c\x7e\x62\x52\xe5"
+ "\x44\x4b\x9f\xc2\x93\x03\xea\x2b"
+ "\x59\xc5\xfa\x3f\x91\x2b\xbb\x23"
+ "\xf5\xb2\x7b\xf5\x38\xaf\xb3\xee"
+ "\x63\xdc\x7b\xd1\xff\xaa\x8b\xab"
+ "\x82\x6b\x37\x04\xeb\x74\xbe\x79"
+ "\xb9\x83\x90\xef\x20\x59\x46\xff"
+ "\xe9\x97\x3e\x2f\xee\xb6\x64\x18"
+ "\x38\x4c\x7a\x4a\xf9\x61\xe8\x9a"
+ "\xa1\xb5\x01\xa6\x47\xd3\x11\xd4"
+ "\xce\xd3\x91\x49\x88\xc7\xb8\x4d"
+ "\xb1\xb9\x07\x6d\x16\x72\xae\x46"
+ "\x5e\x03\xa1\x4b\xb6\x02\x30\xa8"
+ "\x3d\xa9\x07\x2a\x7c\x19\xe7\x62"
+ "\x87\xe3\x82\x2f\x6f\xe1\x09\xd9"
+ "\x94\x97\xea\xdd\x58\x9e\xae\x76"
+ "\x7e\x35\xe5\xb4\xda\x7e\xf4\xde"
+ "\xf7\x32\x87\xcd\x93\xbf\x11\x56"
+ "\x11\xbe\x08\x74\xe1\x69\xad\xe2"
+ "\xd7\xf8\x86\x75\x8a\x3c\xa4\xbe"
+ "\x70\xa7\x1b\xfc\x0b\x44\x2a\x76"
+ "\x35\xea\x5d\x85\x81\xaf\x85\xeb"
+ "\xa0\x1c\x61\xc2\xf7\x4f\xa5\xdc"
+ "\x02\x7f\xf6\x95\x40\x6e\x8a\x9a"
+ "\xf3\x5d\x25\x6e\x14\x3a\x22\xc9"
+ "\x37\x1c\xeb\x46\x54\x3f\xa5\x91"
+ "\xc2\xb5\x8c\xfe\x53\x08\x97\x32"
+ "\x1b\xb2\x30\x27\xfe\x25\x5d\xdc"
+ "\x08\x87\xd0\xe5\x94\x1a\xd4\xf1"
+ "\xfe\xd6\xb4\xa3\xe6\x74\x81\x3c"
+ "\x1b\xb7\x31\xa7\x22\xfd\xd4\xdd"
+ "\x20\x4e\x7c\x51\xb0\x60\x73\xb8"
+ "\x9c\xac\x91\x90\x7e\x01\xb0\xe1"
+ "\x8a\x2f\x75\x1c\x53\x2a\x98\x2a"
+ "\x06\x52\x95\x52\xb2\xe9\x25\x2e"
+ "\x4c\xe2\x5a\x00\xb2\x13\x81\x03"
+ "\x77\x66\x0d\xa5\x99\xda\x4e\x8c"
+ "\xac\xf3\x13\x53\x27\x45\xaf\x64"
+ "\x46\xdc\xea\x23\xda\x97\xd1\xab"
+ "\x7d\x6c\x30\x96\x1f\xbc\x06\x34"
+ "\x18\x0b\x5e\x21\x35\x11\x8d\x4c"
+ "\xe0\x2d\xe9\x50\x16\x74\x81\xa8"
+ "\xb4\x34\xb9\x72\x42\xa6\xcc\xbc"
+ "\xca\x34\x83\x27\x10\x5b\x68\x45"
+ "\x8f\x52\x22\x0c\x55\x3d\x29\x7c"
+ "\xe3\xc0\x66\x05\x42\x91\x5f\x58"
+ "\xfe\x4a\x62\xd9\x8c\xa9\x04\x19"
+ "\x04\xa9\x08\x4b\x57\xfc\x67\x53"
+ "\x08\x7c\xbc\x66\x8a\xb0\xb6\x9f"
+ "\x92\xd6\x41\x7c\x5b\x2a\x00\x79"
+ "\x72",
+ .ctext = "\x3a\x92\xee\x53\x31\xaf\x2b\x60"
+ "\x5f\x55\x8d\x00\x5d\xfc\x74\x97"
+ "\x28\x54\xf4\xa5\x75\xf1\x9b\x25"
+ "\x62\x1c\xc0\xe0\x13\xc8\x87\x53"
+ "\xd0\xf3\xa7\x97\x1f\x3b\x1e\xea"
+ "\xe0\xe5\x2a\xd1\xdd\xa4\x3b\x50"
+ "\x45\xa3\x0d\x7e\x1b\xc9\xa0\xad"
+ "\xb9\x2c\x54\xa6\xc7\x55\x16\xd0"
+ "\xc5\x2e\x02\x44\x35\xd0\x7e\x67"
+ "\xf2\xc4\x9b\xcd\x95\x10\xcc\x29"
+ "\x4b\xfa\x86\x87\xbe\x40\x36\xbe"
+ "\xe1\xa3\x52\x89\x55\x20\x9b\xc2"
+ "\xab\xf2\x31\x34\x16\xad\xc8\x17"
+ "\x65\x24\xc0\xff\x12\x37\xfe\x5a"
+ "\x62\x3b\x59\x47\x6c\x5f\x3a\x8e"
+ "\x3b\xd9\x30\xc8\x7f\x2f\x88\xda"
+ "\x80\xfd\x02\xda\x7f\x9a\x7a\x73"
+ "\x59\xc5\x34\x09\x9a\x11\xcb\xa7"
+ "\xfc\xf6\xa1\xa0\x60\xfb\x43\xbb"
+ "\xf1\xe9\xd7\xc6\x79\x27\x4e\xff"
+ "\x22\xb4\x24\xbf\x76\xee\x47\xb9"
+ "\x6d\x3f\x8b\xb0\x9c\x3c\x43\xdd"
+ "\xff\x25\x2e\x6d\xa4\x2b\xfb\x5d"
+ "\x1b\x97\x6c\x55\x0a\x82\x7a\x7b"
+ "\x94\x34\xc2\xdb\x2f\x1f\xc1\xea"
+ "\xd4\x4d\x17\x46\x3b\x51\x69\x09"
+ "\xe4\x99\x32\x25\xfd\x94\xaf\xfb"
+ "\x10\xf7\x4f\xdd\x0b\x3c\x8b\x41"
+ "\xb3\x6a\xb7\xd1\x33\xa8\x0c\x2f"
+ "\x62\x4c\x72\x11\xd7\x74\xe1\x3b"
+ "\x38\x43\x66\x7b\x6c\x36\x48\xe7"
+ "\xe3\xe7\x9d\xb9\x42\x73\x7a\x2a"
+ "\x89\x20\x1a\x41\x80\x03\xf7\x8f"
+ "\x61\x78\x13\xbf\xfe\x50\xf5\x04"
+ "\x52\xf9\xac\x47\xf8\x62\x4b\xb2"
+ "\x24\xa9\xbf\x64\xb0\x18\x69\xd2"
+ "\xf5\xe4\xce\xc8\xb1\x87\x75\xd6"
+ "\x2c\x24\x79\x00\x7d\x26\xfb\x44"
+ "\xe7\x45\x7a\xee\x58\xa5\x83\xc1"
+ "\xb4\x24\xab\x23\x2f\x4d\xd7\x4f"
+ "\x1c\xc7\xaa\xa9\x50\xf4\xa3\x07"
+ "\x12\x13\x89\x74\xdc\x31\x6a\xb2"
+ "\xf5\x0f\x13\x8b\xb9\xdb\x85\x1f"
+ "\xf5\xbc\x88\xd9\x95\xea\x31\x6c"
+ "\x36\x60\xb6\x49\xdc\xc4\xf7\x55"
+ "\x3f\x21\xc1\xb5\x92\x18\x5e\xbc"
+ "\x9f\x87\x7f\xe7\x79\x25\x40\x33"
+ "\xd6\xb9\x33\xd5\x50\xb3\xc7\x89"
+ "\x1b\x12\xa0\x46\xdd\xa7\xd8\x3e"
+ "\x71\xeb\x6f\x66\xa1\x26\x0c\x67"
+ "\xab\xb2\x38\x58\x17\xd8\x44\x3b"
+ "\x16\xf0\x8e\x62\x8d\x16\x10\x00"
+ "\x32\x8b\xef\xb9\x28\xd3\xc5\xad"
+ "\x0a\x19\xa2\xe4\x03\x27\x7d\x94"
+ "\x06\x18\xcd\xd6\x27\x00\xf9\x1f"
+ "\xb6\xb3\xfe\x96\x35\x5f\xc4\x1c"
+ "\x07\x62\x10\x79\x68\x50\xf1\x7e"
+ "\x29\xe7\xc4\xc4\xe7\xee\x54\xd6"
+ "\x58\x76\x84\x6d\x8d\xe4\x59\x31"
+ "\xe9\xf4\xdc\xa1\x1f\xe5\x1a\xd6"
+ "\xe6\x64\x46\xf5\x77\x9c\x60\x7a"
+ "\x5e\x62\xe3\x0a\xd4\x9f\x7a\x2d"
+ "\x7a\xa5\x0a\x7b\x29\x86\x7a\x74"
+ "\x74\x71\x6b\xca\x7d\x1d\xaa\xba"
+ "\x39\x84\x43\x76\x35\xfe\x4f\x9b"
+ "\xbb\xbb\xb5\x6a\x32\xb5\x5d\x41"
+ "\x51\xf0\x5b\x68\x03\x47\x4b\x8a"
+ "\xca\x88\xf6\x37\xbd\x73\x51\x70"
+ "\x66\xfe\x9e\x5f\x21\x9c\xf3\xdd"
+ "\xc3\xea\x27\xf9\x64\x94\xe1\x19"
+ "\xa0\xa9\xab\x60\xe0\x0e\xf7\x78"
+ "\x70\x86\xeb\xe0\xd1\x5c\x05\xd3"
+ "\xd7\xca\xe0\xc0\x47\x47\x34\xee"
+ "\x11\xa3\xa3\x54\x98\xb7\x49\x8e"
+ "\x84\x28\x70\x2c\x9e\xfb\x55\x54"
+ "\x4d\xf8\x86\xf7\x85\x7c\xbd\xf3"
+ "\x17\xd8\x47\xcb\xac\xf4\x20\x85"
+ "\x34\x66\xad\x37\x2d\x5e\x52\xda"
+ "\x8a\xfe\x98\x55\x30\xe7\x2d\x2b"
+ "\x19\x10\x8e\x7b\x66\x5e\xdc\xe0"
+ "\x45\x1f\x7b\xb4\x08\xfb\x8f\xf6"
+ "\x8c\x89\x21\x34\x55\x27\xb2\x76"
+ "\xb2\x07\xd9\xd6\x68\x9b\xea\x6b"
+ "\x2d\xb4\xc4\x35\xdd\xd2\x79\xae"
+ "\xc7\xd6\x26\x7f\x12\x01\x8c\xa7"
+ "\xe3\xdb\xa8\xf4\xf7\x2b\xec\x99"
+ "\x11\x00\xf1\x35\x8c\xcf\xd5\xc9"
+ "\xbd\x91\x36\x39\x70\xcf\x7d\x70"
+ "\x47\x1a\xfc\x6b\x56\xe0\x3f\x9c"
+ "\x60\x49\x01\x72\xa9\xaf\x2c\x9c"
+ "\xe8\xab\xda\x8c\x14\x19\xf3\x75"
+ "\x07\x17\x9d\x44\x67\x7a\x2e\xef"
+ "\xb7\x83\x35\x4a\xd1\x3d\x1c\x84"
+ "\x32\xdd\xaa\xea\xca\x1d\xdc\x72"
+ "\x2c\xcc\x43\xcd\x5d\xe3\x21\xa4"
+ "\xd0\x8a\x4b\x20\x12\xa3\xd5\x86"
+ "\x76\x96\xff\x5f\x04\x57\x0f\xe6"
+ "\xba\xe8\x76\x50\x0c\x64\x1d\x83"
+ "\x9c\x9b\x9a\x9a\x58\x97\x9c\x5c"
+ "\xb4\xa4\xa6\x3e\x19\xeb\x8f\x5a"
+ "\x61\xb2\x03\x7b\x35\x19\xbe\xa7"
+ "\x63\x0c\xfd\xdd\xf9\x90\x6c\x08"
+ "\x19\x11\xd3\x65\x4a\xf5\x96\x92"
+ "\x59\xaa\x9c\x61\x0c\x29\xa7\xf8"
+ "\x14\x39\x37\xbf\x3c\xf2\x16\x72"
+ "\x02\xfa\xa2\xf3\x18\x67\x5d\xcb"
+ "\xdc\x4d\xbb\x96\xff\x70\x08\x2d"
+ "\xc2\xa8\x52\xe1\x34\x5f\x72\xfe"
+ "\x64\xbf\xca\xa7\x74\x38\xfb\x74"
+ "\x55\x9c\xfa\x8a\xed\xfb\x98\xeb"
+ "\x58\x2e\x6c\xe1\x52\x76\x86\xd7"
+ "\xcf\xa1\xa4\xfc\xb2\x47\x41\x28"
+ "\xa3\xc1\xe5\xfd\x53\x19\x28\x2b"
+ "\x37\x04\x65\x96\x99\x7a\x28\x0f"
+ "\x07\x68\x4b\xc7\x52\x0a\x55\x35"
+ "\x40\x19\x95\x61\xe8\x59\x40\x1f"
+ "\x9d\xbf\x78\x7d\x8f\x84\xff\x6f"
+ "\xd0\xd5\x63\xd2\x22\xbd\xc8\x4e"
+ "\xfb\xe7\x9f\x06\xe6\xe7\x39\x6d"
+ "\x6a\x96\x9f\xf0\x74\x7e\xc9\x35"
+ "\xb7\x26\xb8\x1c\x0a\xa6\x27\x2c"
+ "\xa2\x2b\xfe\xbe\x0f\x07\x73\xae"
+ "\x7f\x7f\x54\xf5\x7c\x6a\x0a\x56"
+ "\x49\xd4\x81\xe5\x85\x53\x99\x1f"
+ "\x95\x05\x13\x58\x8d\x0e\x1b\x90"
+ "\xc3\x75\x48\x64\x58\x98\x67\x84"
+ "\xae\xe2\x21\xa2\x8a\x04\x0a\x0b"
+ "\x61\xaa\xb0\xd4\x28\x60\x7a\xf8"
+ "\xbc\x52\xfb\x24\x7f\xed\x0d\x2a"
+ "\x0a\xb2\xf9\xc6\x95\xb5\x11\xc9"
+ "\xf4\x0f\x26\x11\xcf\x2a\x57\x87"
+ "\x7a\xf3\xe7\x94\x65\xc2\xb5\xb3"
+ "\xab\x98\xe3\xc1\x2b\x59\x19\x7c"
+ "\xd6\xf3\xf9\xbf\xff\x6d\xc6\x82"
+ "\x13\x2f\x4a\x2e\xcd\x26\xfe\x2d"
+ "\x01\x70\xf4\xc2\x7f\x1f\x4c\xcb"
+ "\x47\x77\x0c\xa0\xa3\x03\xec\xda"
+ "\xa9\xbf\x0d\x2d\xae\xe4\xb8\x7b"
+ "\xa9\xbc\x08\xb4\x68\x2e\xc5\x60"
+ "\x8d\x87\x41\x2b\x0f\x69\xf0\xaf"
+ "\x5f\xba\x72\x20\x0f\x33\xcd\x6d"
+ "\x36\x7d\x7b\xd5\x05\xf1\x4b\x05"
+ "\xc4\xfc\x7f\x80\xb9\x4d\xbd\xf7"
+ "\x7c\x84\x07\x01\xc2\x40\x66\x5b"
+ "\x98\xc7\x2c\xe3\x97\xfa\xdf\x87"
+ "\xa0\x1f\xe9\x21\x42\x0f\x3b\xeb"
+ "\x89\x1c\x3b\xca\x83\x61\x77\x68"
+ "\x84\xbb\x60\x87\x38\x2e\x25\xd5"
+ "\x9e\x04\x41\x70\xac\xda\xc0\x9c"
+ "\x9c\x69\xea\x8d\x4e\x55\x2a\x29"
+ "\xed\x05\x4b\x7b\x73\x71\x90\x59"
+ "\x4d\xc8\xd8\x44\xf0\x4c\xe1\x5e"
+ "\x84\x47\x55\xcc\x32\x3f\xe7\x97"
+ "\x42\xc6\x32\xac\x40\xe5\xa5\xc7"
+ "\x8b\xed\xdb\xf7\x83\xd6\xb1\xc2"
+ "\x52\x5e\x34\xb7\xeb\x6e\xd9\xfc"
+ "\xe5\x93\x9a\x97\x3e\xb0\xdc\xd9"
+ "\xd7\x06\x10\xb6\x1d\x80\x59\xdd"
+ "\x0d\xfe\x64\x35\xcd\x5d\xec\xf0"
+ "\xba\xd0\x34\xc9\x2d\x91\xc5\x17"
+ "\x11",
+ .len = 1281,
+ .also_non_np = 1,
+ .np = 3,
+ .tap = { 1200, 1, 80 },
+ }, { /* test vector from https://tools.ietf.org/html/draft-arciszewski-xchacha-02#appendix-A.3.2 */
+ .key = "\x80\x81\x82\x83\x84\x85\x86\x87"
+ "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+ "\x90\x91\x92\x93\x94\x95\x96\x97"
+ "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f",
+ .klen = 32,
+ .iv = "\x40\x41\x42\x43\x44\x45\x46\x47"
+ "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+ "\x50\x51\x52\x53\x54\x55\x56\x58"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x54\x68\x65\x20\x64\x68\x6f\x6c"
+ "\x65\x20\x28\x70\x72\x6f\x6e\x6f"
+ "\x75\x6e\x63\x65\x64\x20\x22\x64"
+ "\x6f\x6c\x65\x22\x29\x20\x69\x73"
+ "\x20\x61\x6c\x73\x6f\x20\x6b\x6e"
+ "\x6f\x77\x6e\x20\x61\x73\x20\x74"
+ "\x68\x65\x20\x41\x73\x69\x61\x74"
+ "\x69\x63\x20\x77\x69\x6c\x64\x20"
+ "\x64\x6f\x67\x2c\x20\x72\x65\x64"
+ "\x20\x64\x6f\x67\x2c\x20\x61\x6e"
+ "\x64\x20\x77\x68\x69\x73\x74\x6c"
+ "\x69\x6e\x67\x20\x64\x6f\x67\x2e"
+ "\x20\x49\x74\x20\x69\x73\x20\x61"
+ "\x62\x6f\x75\x74\x20\x74\x68\x65"
+ "\x20\x73\x69\x7a\x65\x20\x6f\x66"
+ "\x20\x61\x20\x47\x65\x72\x6d\x61"
+ "\x6e\x20\x73\x68\x65\x70\x68\x65"
+ "\x72\x64\x20\x62\x75\x74\x20\x6c"
+ "\x6f\x6f\x6b\x73\x20\x6d\x6f\x72"
+ "\x65\x20\x6c\x69\x6b\x65\x20\x61"
+ "\x20\x6c\x6f\x6e\x67\x2d\x6c\x65"
+ "\x67\x67\x65\x64\x20\x66\x6f\x78"
+ "\x2e\x20\x54\x68\x69\x73\x20\x68"
+ "\x69\x67\x68\x6c\x79\x20\x65\x6c"
+ "\x75\x73\x69\x76\x65\x20\x61\x6e"
+ "\x64\x20\x73\x6b\x69\x6c\x6c\x65"
+ "\x64\x20\x6a\x75\x6d\x70\x65\x72"
+ "\x20\x69\x73\x20\x63\x6c\x61\x73"
+ "\x73\x69\x66\x69\x65\x64\x20\x77"
+ "\x69\x74\x68\x20\x77\x6f\x6c\x76"
+ "\x65\x73\x2c\x20\x63\x6f\x79\x6f"
+ "\x74\x65\x73\x2c\x20\x6a\x61\x63"
+ "\x6b\x61\x6c\x73\x2c\x20\x61\x6e"
+ "\x64\x20\x66\x6f\x78\x65\x73\x20"
+ "\x69\x6e\x20\x74\x68\x65\x20\x74"
+ "\x61\x78\x6f\x6e\x6f\x6d\x69\x63"
+ "\x20\x66\x61\x6d\x69\x6c\x79\x20"
+ "\x43\x61\x6e\x69\x64\x61\x65\x2e",
+ .ctext = "\x45\x59\xab\xba\x4e\x48\xc1\x61"
+ "\x02\xe8\xbb\x2c\x05\xe6\x94\x7f"
+ "\x50\xa7\x86\xde\x16\x2f\x9b\x0b"
+ "\x7e\x59\x2a\x9b\x53\xd0\xd4\xe9"
+ "\x8d\x8d\x64\x10\xd5\x40\xa1\xa6"
+ "\x37\x5b\x26\xd8\x0d\xac\xe4\xfa"
+ "\xb5\x23\x84\xc7\x31\xac\xbf\x16"
+ "\xa5\x92\x3c\x0c\x48\xd3\x57\x5d"
+ "\x4d\x0d\x2c\x67\x3b\x66\x6f\xaa"
+ "\x73\x10\x61\x27\x77\x01\x09\x3a"
+ "\x6b\xf7\xa1\x58\xa8\x86\x42\x92"
+ "\xa4\x1c\x48\xe3\xa9\xb4\xc0\xda"
+ "\xec\xe0\xf8\xd9\x8d\x0d\x7e\x05"
+ "\xb3\x7a\x30\x7b\xbb\x66\x33\x31"
+ "\x64\xec\x9e\x1b\x24\xea\x0d\x6c"
+ "\x3f\xfd\xdc\xec\x4f\x68\xe7\x44"
+ "\x30\x56\x19\x3a\x03\xc8\x10\xe1"
+ "\x13\x44\xca\x06\xd8\xed\x8a\x2b"
+ "\xfb\x1e\x8d\x48\xcf\xa6\xbc\x0e"
+ "\xb4\xe2\x46\x4b\x74\x81\x42\x40"
+ "\x7c\x9f\x43\x1a\xee\x76\x99\x60"
+ "\xe1\x5b\xa8\xb9\x68\x90\x46\x6e"
+ "\xf2\x45\x75\x99\x85\x23\x85\xc6"
+ "\x61\xf7\x52\xce\x20\xf9\xda\x0c"
+ "\x09\xab\x6b\x19\xdf\x74\xe7\x6a"
+ "\x95\x96\x74\x46\xf8\xd0\xfd\x41"
+ "\x5e\x7b\xee\x2a\x12\xa1\x14\xc2"
+ "\x0e\xb5\x29\x2a\xe7\xa3\x49\xae"
+ "\x57\x78\x20\xd5\x52\x0a\x1f\x3f"
+ "\xb6\x2a\x17\xce\x6a\x7e\x68\xfa"
+ "\x7c\x79\x11\x1d\x88\x60\x92\x0b"
+ "\xc0\x48\xef\x43\xfe\x84\x48\x6c"
+ "\xcb\x87\xc2\x5f\x0a\xe0\x45\xf0"
+ "\xcc\xe1\xe7\x98\x9a\x9a\xa2\x20"
+ "\xa2\x8b\xdd\x48\x27\xe7\x51\xa2"
+ "\x4a\x6d\x5c\x62\xd7\x90\xa6\x63"
+ "\x93\xb9\x31\x11\xc1\xa5\x5d\xd7"
+ "\x42\x1a\x10\x18\x49\x74\xc7\xc5",
+ .len = 304,
+ }
+};
+
+/*
+ * Same as XChaCha20 test vectors above, but recomputed the ciphertext with
+ * XChaCha12, using a modified libsodium.
+ */
+static const struct cipher_testvec xchacha12_tv_template[] = {
+ {
+ .key = "\x79\xc9\x97\x98\xac\x67\x30\x0b"
+ "\xbb\x27\x04\xc9\x5c\x34\x1e\x32"
+ "\x45\xf3\xdc\xb2\x17\x61\xb9\x8e"
+ "\x52\xff\x45\xb2\x4f\x30\x4f\xc4",
+ .klen = 32,
+ .iv = "\xb3\x3f\xfd\x30\x96\x47\x9b\xcf"
+ "\xbc\x9a\xee\x49\x41\x76\x88\xa0"
+ "\xa2\x55\x4f\x8d\x95\x38\x94\x19"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00",
+ .ctext = "\x1b\x78\x7f\xd7\xa1\x41\x68\xab"
+ "\x3d\x3f\xd1\x7b\x69\x56\xb2\xd5"
+ "\x43\xce\xeb\xaf\x36\xf0\x29\x9d"
+ "\x3a\xfb\x18\xae\x1b",
+ .len = 29,
+ }, {
+ .key = "\x9d\x23\xbd\x41\x49\xcb\x97\x9c"
+ "\xcf\x3c\x5c\x94\xdd\x21\x7e\x98"
+ "\x08\xcb\x0e\x50\xcd\x0f\x67\x81"
+ "\x22\x35\xea\xaf\x60\x1d\x62\x32",
+ .klen = 32,
+ .iv = "\xc0\x47\x54\x82\x66\xb7\xc3\x70"
+ "\xd3\x35\x66\xa2\x42\x5c\xbf\x30"
+ "\xd8\x2d\x1e\xaf\x52\x94\x10\x9e"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00",
+ .ctext = "\xfb\x32\x09\x1d\x83\x05\xae\x4c"
+ "\x13\x1f\x12\x71\xf2\xca\xb2\xeb"
+ "\x5b\x83\x14\x7d\x83\xf6\x57\x77"
+ "\x2e\x40\x1f\x92\x2c\xf9\xec\x35"
+ "\x34\x1f\x93\xdf\xfb\x30\xd7\x35"
+ "\x03\x05\x78\xc1\x20\x3b\x7a\xe3"
+ "\x62\xa3\x89\xdc\x11\x11\x45\xa8"
+ "\x82\x89\xa0\xf1\x4e\xc7\x0f\x11"
+ "\x69\xdd\x0c\x84\x2b\x89\x5c\xdc"
+ "\xf0\xde\x01\xef\xc5\x65\x79\x23"
+ "\x87\x67\xd6\x50\xd9\x8d\xd9\x92"
+ "\x54\x5b\x0e",
+ .len = 91,
+ }, {
+ .key = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x67\xc6\x69\x73"
+ "\x51\xff\x4a\xec\x29\xcd\xba\xab"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00",
+ .ctext = "\xdf\x2d\xc6\x21\x2a\x9d\xa1\xbb"
+ "\xc2\x77\x66\x0c\x5c\x46\xef\xa7"
+ "\x79\x1b\xb9\xdf\x55\xe2\xf9\x61"
+ "\x4c\x7b\xa4\x52\x24\xaf\xa2\xda"
+ "\xd1\x8f\x8f\xa2\x9e\x53\x4d\xc4"
+ "\xb8\x55\x98\x08\x7c\x08\xd4\x18"
+ "\x67\x8f\xef\x50\xb1\x5f\xa5\x77"
+ "\x4c\x25\xe7\x86\x26\x42\xca\x44",
+ .len = 64,
+ }, {
+ .key = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x00\x00\x00\x00\x01",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x02\xf2\xfb\xe3\x46"
+ "\x7c\xc2\x54\xf8\x1b\xe8\xe7\x8d"
+ "\x01\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x41\x6e\x79\x20\x73\x75\x62\x6d"
+ "\x69\x73\x73\x69\x6f\x6e\x20\x74"
+ "\x6f\x20\x74\x68\x65\x20\x49\x45"
+ "\x54\x46\x20\x69\x6e\x74\x65\x6e"
+ "\x64\x65\x64\x20\x62\x79\x20\x74"
+ "\x68\x65\x20\x43\x6f\x6e\x74\x72"
+ "\x69\x62\x75\x74\x6f\x72\x20\x66"
+ "\x6f\x72\x20\x70\x75\x62\x6c\x69"
+ "\x63\x61\x74\x69\x6f\x6e\x20\x61"
+ "\x73\x20\x61\x6c\x6c\x20\x6f\x72"
+ "\x20\x70\x61\x72\x74\x20\x6f\x66"
+ "\x20\x61\x6e\x20\x49\x45\x54\x46"
+ "\x20\x49\x6e\x74\x65\x72\x6e\x65"
+ "\x74\x2d\x44\x72\x61\x66\x74\x20"
+ "\x6f\x72\x20\x52\x46\x43\x20\x61"
+ "\x6e\x64\x20\x61\x6e\x79\x20\x73"
+ "\x74\x61\x74\x65\x6d\x65\x6e\x74"
+ "\x20\x6d\x61\x64\x65\x20\x77\x69"
+ "\x74\x68\x69\x6e\x20\x74\x68\x65"
+ "\x20\x63\x6f\x6e\x74\x65\x78\x74"
+ "\x20\x6f\x66\x20\x61\x6e\x20\x49"
+ "\x45\x54\x46\x20\x61\x63\x74\x69"
+ "\x76\x69\x74\x79\x20\x69\x73\x20"
+ "\x63\x6f\x6e\x73\x69\x64\x65\x72"
+ "\x65\x64\x20\x61\x6e\x20\x22\x49"
+ "\x45\x54\x46\x20\x43\x6f\x6e\x74"
+ "\x72\x69\x62\x75\x74\x69\x6f\x6e"
+ "\x22\x2e\x20\x53\x75\x63\x68\x20"
+ "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+ "\x74\x73\x20\x69\x6e\x63\x6c\x75"
+ "\x64\x65\x20\x6f\x72\x61\x6c\x20"
+ "\x73\x74\x61\x74\x65\x6d\x65\x6e"
+ "\x74\x73\x20\x69\x6e\x20\x49\x45"
+ "\x54\x46\x20\x73\x65\x73\x73\x69"
+ "\x6f\x6e\x73\x2c\x20\x61\x73\x20"
+ "\x77\x65\x6c\x6c\x20\x61\x73\x20"
+ "\x77\x72\x69\x74\x74\x65\x6e\x20"
+ "\x61\x6e\x64\x20\x65\x6c\x65\x63"
+ "\x74\x72\x6f\x6e\x69\x63\x20\x63"
+ "\x6f\x6d\x6d\x75\x6e\x69\x63\x61"
+ "\x74\x69\x6f\x6e\x73\x20\x6d\x61"
+ "\x64\x65\x20\x61\x74\x20\x61\x6e"
+ "\x79\x20\x74\x69\x6d\x65\x20\x6f"
+ "\x72\x20\x70\x6c\x61\x63\x65\x2c"
+ "\x20\x77\x68\x69\x63\x68\x20\x61"
+ "\x72\x65\x20\x61\x64\x64\x72\x65"
+ "\x73\x73\x65\x64\x20\x74\x6f",
+ .ctext = "\xe4\xa6\xc8\x30\xc4\x23\x13\xd6"
+ "\x08\x4d\xc9\xb7\xa5\x64\x7c\xb9"
+ "\x71\xe2\xab\x3e\xa8\x30\x8a\x1c"
+ "\x4a\x94\x6d\x9b\xe0\xb3\x6f\xf1"
+ "\xdc\xe3\x1b\xb3\xa9\x6d\x0d\xd6"
+ "\xd0\xca\x12\xef\xe7\x5f\xd8\x61"
+ "\x3c\x82\xd3\x99\x86\x3c\x6f\x66"
+ "\x02\x06\xdc\x55\xf9\xed\xdf\x38"
+ "\xb4\xa6\x17\x00\x7f\xef\xbf\x4f"
+ "\xf8\x36\xf1\x60\x7e\x47\xaf\xdb"
+ "\x55\x9b\x12\xcb\x56\x44\xa7\x1f"
+ "\xd3\x1a\x07\x3b\x00\xec\xe6\x4c"
+ "\xa2\x43\x27\xdf\x86\x19\x4f\x16"
+ "\xed\xf9\x4a\xf3\x63\x6f\xfa\x7f"
+ "\x78\x11\xf6\x7d\x97\x6f\xec\x6f"
+ "\x85\x0f\x5c\x36\x13\x8d\x87\xe0"
+ "\x80\xb1\x69\x0b\x98\x89\x9c\x4e"
+ "\xf8\xdd\xee\x5c\x0a\x85\xce\xd4"
+ "\xea\x1b\x48\xbe\x08\xf8\xe2\xa8"
+ "\xa5\xb0\x3c\x79\xb1\x15\xb4\xb9"
+ "\x75\x10\x95\x35\x81\x7e\x26\xe6"
+ "\x78\xa4\x88\xcf\xdb\x91\x34\x18"
+ "\xad\xd7\x8e\x07\x7d\xab\x39\xf9"
+ "\xa3\x9e\xa5\x1d\xbb\xed\x61\xfd"
+ "\xdc\xb7\x5a\x27\xfc\xb5\xc9\x10"
+ "\xa8\xcc\x52\x7f\x14\x76\x90\xe7"
+ "\x1b\x29\x60\x74\xc0\x98\x77\xbb"
+ "\xe0\x54\xbb\x27\x49\x59\x1e\x62"
+ "\x3d\xaf\x74\x06\xa4\x42\x6f\xc6"
+ "\x52\x97\xc4\x1d\xc4\x9f\xe2\xe5"
+ "\x38\x57\x91\xd1\xa2\x28\xcc\x40"
+ "\xcc\x70\x59\x37\xfc\x9f\x4b\xda"
+ "\xa0\xeb\x97\x9a\x7d\xed\x14\x5c"
+ "\x9c\xb7\x93\x26\x41\xa8\x66\xdd"
+ "\x87\x6a\xc0\xd3\xc2\xa9\x3e\xae"
+ "\xe9\x72\xfe\xd1\xb3\xac\x38\xea"
+ "\x4d\x15\xa9\xd5\x36\x61\xe9\x96"
+ "\x6c\x23\xf8\x43\xe4\x92\x29\xd9"
+ "\x8b\x78\xf7\x0a\x52\xe0\x19\x5b"
+ "\x59\x69\x5b\x5d\xa1\x53\xc4\x68"
+ "\xe1\xbb\xac\x89\x14\xe2\xe2\x85"
+ "\x41\x18\xf5\xb3\xd1\xfa\x68\x19"
+ "\x44\x78\xdc\xcf\xe7\x88\x2d\x52"
+ "\x5f\x40\xb5\x7e\xf8\x88\xa2\xae"
+ "\x4a\xb2\x07\x35\x9d\x9b\x07\x88"
+ "\xb7\x00\xd0\x0c\xb6\xa0\x47\x59"
+ "\xda\x4e\xc9\xab\x9b\x8a\x7b",
+
+ .len = 375,
+ .also_non_np = 1,
+ .np = 3,
+ .tap = { 375 - 20, 4, 16 },
+
+ }, {
+ .key = "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a"
+ "\xf3\x33\x88\x86\x04\xf6\xb5\xf0"
+ "\x47\x39\x17\xc1\x40\x2b\x80\x09"
+ "\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
+ .klen = 32,
+ .iv = "\x00\x00\x00\x00\x00\x00\x00\x00"
+ "\x00\x00\x00\x02\x76\x5a\x2e\x63"
+ "\x33\x9f\xc9\x9a\x66\x32\x0d\xb7"
+ "\x2a\x00\x00\x00\x00\x00\x00\x00",
+ .ptext = "\x27\x54\x77\x61\x73\x20\x62\x72"
+ "\x69\x6c\x6c\x69\x67\x2c\x20\x61"
+ "\x6e\x64\x20\x74\x68\x65\x20\x73"
+ "\x6c\x69\x74\x68\x79\x20\x74\x6f"
+ "\x76\x65\x73\x0a\x44\x69\x64\x20"
+ "\x67\x79\x72\x65\x20\x61\x6e\x64"
+ "\x20\x67\x69\x6d\x62\x6c\x65\x20"
+ "\x69\x6e\x20\x74\x68\x65\x20\x77"
+ "\x61\x62\x65\x3a\x0a\x41\x6c\x6c"
+ "\x20\x6d\x69\x6d\x73\x79\x20\x77"
+ "\x65\x72\x65\x20\x74\x68\x65\x20"
+ "\x62\x6f\x72\x6f\x67\x6f\x76\x65"
+ "\x73\x2c\x0a\x41\x6e\x64\x20\x74"
+ "\x68\x65\x20\x6d\x6f\x6d\x65\x20"
+ "\x72\x61\x74\x68\x73\x20\x6f\x75"
+ "\x74\x67\x72\x61\x62\x65\x2e",
+ .ctext = "\xb9\x68\xbc\x6a\x24\xbc\xcc\xd8"
+ "\x9b\x2a\x8d\x5b\x96\xaf\x56\xe3"
+ "\x11\x61\xe7\xa7\x9b\xce\x4e\x7d"
+ "\x60\x02\x48\xac\xeb\xd5\x3a\x26"</