[go] crypto/aes: ARM assembly versions of encrypt, decrypt and expandKey

222 views
Skip to first unread message

Nick Craig-Wood (Gerrit)

unread,
Mar 22, 2017, 4:34:08 PM3/22/17
to Ian Lance Taylor, golang-co...@googlegroups.com

Nick Craig-Wood has uploaded this change for review.

View Change

crypto/aes: ARM assembly versions of encrypt, decrypt and expandKey

ARM assembly for AES crypto adapted from openssl giving an
encrypt/decrypt speed up of 1.6-2.7x and a key scheduling speedup of
2.3-4.5x.

Raspberry Pi 3 BCM2709 ARMv7 Processor rev 5 (v7l)

name       old time/op    new time/op     delta
Encrypt-4    3.12µs ± 1%     1.13µs ± 2%   -63.86%  (p=0.000 n=20+20)
Decrypt-4    3.10µs ± 1%     1.21µs ± 2%   -60.91%  (p=0.000 n=20+20)
Expand-4     11.3µs ± 1%      2.5µs ± 1%   -78.11%  (p=0.000 n=16+16)

name       old speed      new speed       delta
Encrypt-4  5.13MB/s ± 2%  14.18MB/s ± 2%  +176.58%  (p=0.000 n=20+20)
Decrypt-4  5.16MB/s ± 1%  13.19MB/s ± 2%  +155.78%  (p=0.000 n=20+20)

Chrombook Samsung Exynos5 ARMv7 Processor rev 4 (v7l)

name       old time/op    new time/op    delta
Encrypt-2     342ns ± 1%     210ns ± 1%  -38.80%  (p=0.000 n=16+20)
Decrypt-2     343ns ± 6%     209ns ± 2%  -39.16%  (p=0.000 n=17+18)
Expand-2     1.64µs ± 5%    0.70µs ± 1%  -57.29%  (p=0.000 n=17+19)

name       old speed      new speed      delta
Encrypt-2  46.7MB/s ± 1%  76.2MB/s ± 1%  +63.35%  (p=0.000 n=16+20)
Decrypt-2  46.4MB/s ± 7%  76.4MB/s ± 2%  +64.75%  (p=0.000 n=18+18)

Issue #4299

Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
---
A src/crypto/aes/asm_arm.s
M src/crypto/aes/block.go
A src/crypto/aes/cipher_arm.go
M src/crypto/aes/cipher_generic.go
4 files changed, 946 insertions(+), 1 deletion(-)

diff --git a/src/crypto/aes/asm_arm.s b/src/crypto/aes/asm_arm.s
new file mode 100644
index 0000000..8b24e16
--- /dev/null
+++ b/src/crypto/aes/asm_arm.s
@@ -0,0 +1,842 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is a derived work from OpenSSL of AES using assembly optimizations. The
+// original code was written by Andy Polyakov <ap...@openssl.org> and it's dual
+// licensed under OpenSSL and CRYPTOGAMS licenses depending on where you obtain
+// it. For further details see http://www.openssl.org/~appro/cryptogams/.
+
+// Original code can be found at the link bellow:
+// https://git.openssl.org/?p=openssl.git;a=blob;f=crypto/aes/asm/aes-armv4.pl
+
+// This code is based on crypto/aes/asm/aes-armv4.pl version
+// 6aa36e8e5a062e31543e7796f0351ff9628832ce from 21 May 2017
+
+// Apart from assembler syntax and calling convention changes, the
+// major change needed was to spill one register to the stack as go
+// can't use R10
+
+// AES for ARMv4
+
+// January 2007.
+//
+// Code uses single 1K S-box and is >2 times faster than code generated
+// by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
+// allows to merge logical or arithmetic operation with shift or rotate
+// in one instruction and emit combined result every cycle. The module
+// is endian-neutral. The performance is ~42 cycles/byte for 128-bit
+// key [on single-issue Xscale PXA250 core].
+
+// May 2007.
+//
+// AES_set_[en|de]crypt_key is added.
+
+// July 2010.
+//
+// Rescheduling for dual-issue pipeline resulted in 12% improvement on
+// Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
+
+// February 2011.
+//
+// Profiler-assisted and platform-specific optimization resulted in 16%
+// improvement on Cortex A8 core and ~21.5 cycles per byte.
+
+#include "textflag.h"
+
+#define s0 R0
+#define s1 R1
+#define s2 R2
+#define s3 R3
+#define t1 R4
+#define t2 R5
+#define t3 R6
+#define i1 R7
+#define i2 R8
+#define i3 R9
+#define mask80 i1
+#define mask1b i2
+#define mask7f i3
+
+#define tbl R11
+#define key R12
+#define t4 R14
+
+// #define ARM_ARCH_7 1
+
+// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
+TEXT	·encryptBlockAsm(SB), NOSPLIT, $4-16
+	MOVW	src+12(FP), t4
+	MOVW	$·te0(SB), tbl
+	MOVW	xk+4(FP), key
+
+#ifndef ARM_ARCH_7
+	MOVBU	3(t4),s0	// load input data in endian-neutral
+	MOVBU	2(t4),t1	// manner...
+	MOVBU	1(t4),t2
+	MOVBU	0(t4),t3
+	ORR	t1<<8,s0,s0
+	MOVBU	7(t4),s1
+	ORR	t2<<16,s0,s0
+	MOVBU	6(t4),t1
+	ORR	t3<<24,s0,s0
+	MOVBU	5(t4),t2
+	MOVBU	4(t4),t3
+	ORR	t1<<8,s1,s1
+	MOVBU	11(t4),s2
+	ORR	t2<<16,s1,s1
+	MOVBU	10(t4),t1
+	ORR	t3<<24,s1,s1
+	MOVBU	9(t4),t2
+	MOVBU	8(t4),t3
+	ORR	t1<<8,s2,s2
+	MOVBU	15(t4),s3
+	ORR	t2<<16,s2,s2
+	MOVBU	14(t4),t1
+	ORR	t3<<24,s2,s2
+	MOVBU	13(t4),t2
+	MOVBU	12(t4),t3
+	ORR	t1<<8,s3,s3
+	ORR	t2<<16,s3,s3
+	ORR	t3<<24,s3,s3
+#else
+	MOVW	0(t4),s0
+	MOVW	4(t4),s1
+	MOVW	8(t4),s2
+	MOVW	12(t4),s3
+	REV	s0,s0
+	REV	s1,s1
+	REV	s2,s2
+	REV	s3,s3
+#endif
+
+	MOVM.IA.W	(key),[t1-i1]
+	EOR	t1,s0,s0
+	EOR	t2,s1,s1
+	EOR	t3,s2,s2
+	EOR	i1,s3,s3
+	MOVW	$255,t4
+
+	AND	s0,t4,i1
+	AND	s0>>8,t4,i2
+	AND	s0>>16,t4,i3
+	MOVW	s0>>24,s0
+enc_loop:
+	MOVW	i1<<2(tbl),t1	// Te3[s0>>0]
+	AND	s1>>16,t4,i1	// i0
+	MOVW	i2<<2(tbl),t2	// Te2[s0>>8]
+	AND	s1,t4,i2
+	MOVW	i3<<2(tbl),t3	// Te1[s0>>16]
+	AND	s1>>8,t4,i3
+	MOVW	s0<<2(tbl),s0	// Te0[s0>>24]
+	MOVW	s1>>24,s1
+
+	MOVW	i1<<2(tbl),i1	// Te1[s1>>16]
+	MOVW	i2<<2(tbl),i2	// Te3[s1>>0]
+	MOVW	i3<<2(tbl),i3	// Te2[s1>>8]
+	EOR	i1@>8,s0,s0
+	MOVW	s1<<2(tbl),s1	// Te0[s1>>24]
+	AND	s2>>8,t4,i1	// i0
+	EOR	i2@>8,t2,t2
+	AND	s2>>16,t4,i2	// i1
+	EOR	i3@>8,t3,t3
+	AND	s2,t4,i3
+	MOVW	i1<<2(tbl),i1	// Te2[s2>>8]
+	EOR	t1@>24,s1,s1
+	MOVW	i2<<2(tbl),i2	// Te1[s2>>16]
+	MOVW	s2>>24,s2
+
+	MOVW	i3<<2(tbl),i3	// Te3[s2>>0]
+	EOR	i1@>16,s0,s0
+	MOVW	s2<<2(tbl),s2	// Te0[s2>>24]
+	AND	s3,t4,i1	// i0
+	EOR	i2@>8,s1,s1
+	AND	s3>>8,t4,i2	// i1
+	EOR	i3@>16,t3,t3
+	AND	s3>>16,t4,i3	// i2
+	MOVW	i1<<2(tbl),i1	// Te3[s3>>0]
+	EOR	t2@>16,s2,s2
+	MOVW	i2<<2(tbl),i2	// Te2[s3>>8]
+	MOVW	s3>>24,s3
+
+	MOVW	i3<<2(tbl),i3	// Te1[s3>>16]
+	EOR	i1@>24,s0,s0
+	MOVW.P	16(key),i1
+	EOR	i2@>16,s1,s1
+	MOVW	s3<<2(tbl),s3	// Te0[s3>>24]
+	EOR	i3@>8,s2,s2
+	MOVW	-12(key),t1
+	EOR	t3@>8,s3,s3
+
+	MOVW	-8(key),t2
+	EOR	i1,s0,s0
+	MOVW	-4(key),t3
+	AND	s0,t4,i1
+	EOR	t1,s1,s1
+	MOVW	nr+0(FP), t1
+	AND	s0>>8,t4,i2
+	EOR	t2,s2,s2
+	AND	s0>>16,t4,i3
+	EOR	t3,s3,s3
+	SUB.S	$1,t1,t1
+	MOVW	s0>>24,s0
+
+	MOVW	t1, nr+0(FP)
+	BGT	enc_loop
+
+	ADD	$2,tbl,tbl
+
+	MOVBU	i1<<2(tbl),t1	// Te4[s0>>0]
+	AND	s1>>16,t4,i1	// i0
+	MOVBU	i2<<2(tbl),t2	// Te4[s0>>8]
+	AND	s1,t4,i2
+	MOVBU	i3<<2(tbl),t3	// Te4[s0>>16]
+	AND	s1>>8,t4,i3
+	MOVBU	s0<<2(tbl),s0	// Te4[s0>>24]
+	MOVW	s1>>24,s1
+
+	MOVBU	i1<<2(tbl),i1	// Te4[s1>>16]
+	MOVBU	i2<<2(tbl),i2	// Te4[s1>>0]
+	MOVBU	i3<<2(tbl),i3	// Te4[s1>>8]
+	EOR	s0<<8,i1,s0
+	MOVBU	s1<<2(tbl),s1	// Te4[s1>>24]
+	AND	s2>>8,t4,i1	// i0
+	EOR	t2<<8,i2,t2
+	AND	s2>>16,t4,i2	// i1
+	EOR	t3<<8,i3,t3
+	AND	s2,t4,i3
+	MOVBU	i1<<2(tbl),i1	// Te4[s2>>8]
+	EOR	s1<<24,t1,s1
+	MOVBU	i2<<2(tbl),i2	// Te4[s2>>16]
+	MOVW	s2>>24,s2
+
+	MOVBU	i3<<2(tbl),i3	// Te4[s2>>0]
+	EOR	s0<<8,i1,s0
+	MOVBU	s2<<2(tbl),s2	// Te4[s2>>24]
+	AND	s3,t4,i1	// i0
+	EOR	i2<<16,s1,s1
+	AND	s3>>8,t4,i2	// i1
+	EOR	t3<<8,i3,t3
+	AND	s3>>16,t4,i3	// i2
+	MOVBU	i1<<2(tbl),i1	// Te4[s3>>0]
+	EOR	s2<<24,t2,s2
+	MOVBU	i2<<2(tbl),i2	// Te4[s3>>8]
+	MOVW	s3>>24,s3
+
+	MOVBU	i3<<2(tbl),i3	// Te4[s3>>16]
+	EOR	s0<<8,i1,s0
+	MOVW	0(key),i1
+	MOVBU	s3<<2(tbl),s3	// Te4[s3>>24]
+	EOR	i2<<8,s1,s1
+	MOVW	4(key),t1
+	EOR	i3<<16,s2,s2
+	MOVW	8(key),t2
+	EOR	s3<<24,t3,s3
+	MOVW	12(key),t3
+
+	EOR	i1,s0,s0
+	EOR	t1,s1,s1
+	EOR	t2,s2,s2
+	EOR	t3,s3,s3
+
+	MOVW	dst+8(FP), t4
+
+#ifdef ARM_ARCH_7
+	REV	s0,s0
+	REV	s1,s1
+	REV	s2,s2
+	REV	s3,s3
+	MOVW	s0,0(t4)
+	MOVW	s1,4(t4)
+	MOVW	s2,8(t4)
+	MOVW	s3,12(t4)
+#else
+	MOVW	s0>>24,t1	// write output in endian-neutral
+	MOVW	s0>>16,t2	// manner...
+	MOVW	s0>>8,t3
+	MOVBU	t1,0(t4)
+	MOVBU	t2,1(t4)
+	MOVW	s1>>24,t1
+	MOVBU	t3,2(t4)
+	MOVW	s1>>16,t2
+	MOVBU	s0,3(t4)
+	MOVW	s1>>8,t3
+	MOVBU	t1,4(t4)
+	MOVBU	t2,5(t4)
+	MOVW	s2>>24,t1
+	MOVBU	t3,6(t4)
+	MOVW	s2>>16,t2
+	MOVBU	s1,7(t4)
+	MOVW	s2>>8,t3
+	MOVBU	t1,8(t4)
+	MOVBU	t2,9(t4)
+	MOVW	s3>>24,t1
+	MOVBU	t3,10(t4)
+	MOVW	s3>>16,t2
+	MOVBU	s2,11(t4)
+	MOVW	s3>>8,t3
+	MOVBU	t1,12(t4)
+	MOVBU	t2,13(t4)
+	MOVBU	t3,14(t4)
+	MOVBU	s3,15(t4)
+#endif
+	RET
+
+// func expandKeyEncAsm(nr int, userKey *byte, enc *uint32)
+TEXT	·expandKeyEncAsm(SB), NOSPLIT, $4-12
+	MOVW	userKey+4(FP), t4	// inp
+	MOVW	nr+0(FP), tbl		// rounds
+	MOVW	enc+8(FP), key
+
+#ifndef ARM_ARCH_7
+	MOVBU	3(t4),s0	// load input data in endian-neutral
+	MOVBU	2(t4),t1	// manner...
+	MOVBU	1(t4),t2
+	MOVBU	0(t4),t3
+	ORR	t1<<8,s0,s0
+	MOVBU	7(t4),s1
+	ORR	t2<<16,s0,s0
+	MOVBU	6(t4),t1
+	ORR	t3<<24,s0,s0
+	MOVBU	5(t4),t2
+	MOVBU	4(t4),t3
+	ORR	t1<<8,s1,s1
+	MOVBU	11(t4),s2
+	ORR	t2<<16,s1,s1
+	MOVBU	10(t4),t1
+	ORR	t3<<24,s1,s1
+	MOVBU	9(t4),t2
+	MOVBU	8(t4),t3
+	ORR	t1<<8,s2,s2
+	MOVBU	15(t4),s3
+	ORR	t2<<16,s2,s2
+	MOVBU	14(t4),t1
+	ORR	t3<<24,s2,s2
+	MOVBU	13(t4),t2
+	MOVBU	12(t4),t3
+	ORR	t1<<8,s3,s3
+	MOVW.P	s0,16(key)
+	ORR	t2<<16,s3,s3
+	MOVW	s1,-12(key)
+	ORR	t3<<24,s3,s3
+	MOVW	s2,-8(key)
+	MOVW	s3,-4(key)
+#else
+	MOVW	0(t4),s0
+	MOVW	4(t4),s1
+	MOVW	8(t4),s2
+	MOVW	12(t4),s3
+	REV	s0,s0
+	REV	s1,s1
+	REV	s2,s2
+	REV	s3,s3
+	MOVW.P	s0,16(key)
+	MOVW	s1,-12(key)
+	MOVW	s2,-8(key)
+	MOVW	s3,-4(key)
+#endif
+
+	TEQ	$10,tbl
+	BNE	ek_not128
+	MOVW	$·rcon(SB), t3
+	MOVW	$·sbox0(SB), tbl	// Te4
+	MOVW	$255,t4
+
+ek_128_loop:
+	AND	s3>>24,t4,t2
+	AND	s3>>16,t4,i1
+	MOVBU	t2<<0(tbl),t2
+	AND	s3>>8,t4,i2
+	MOVBU	i1<<0(tbl),i1
+	AND	s3,t4,i3
+	MOVBU	i2<<0(tbl),i2
+	ORR	i1<<24,t2,t2
+	MOVBU	i3<<0(tbl),i3
+	ORR	i2<<16,t2,t2
+	MOVW.P	4(t3),t1	// rcon[i++]
+	ORR	i3<<8,t2,t2
+	EOR	t1,t2,t2
+	MOVW	nr+0(FP), t1
+	EOR	t2,s0,s0	// rk[4]=rk[0]^...
+	EOR	s0,s1,s1	// rk[5]=rk[1]^rk[4]
+	MOVW.P	s0,16(key)
+	EOR	s1,s2,s2	// rk[6]=rk[2]^rk[5]
+	MOVW	s1,-12(key)
+	EOR	s2,s3,s3	// rk[7]=rk[3]^rk[6]
+	SUB.S	$1,t1,t1
+	MOVW	s2,-8(key)
+	MOVW	t1, nr+0(FP)
+	MOVW	s3,-4(key)
+	BNE	ek_128_loop
+	SUB	$176,key,R2
+	B	ek_done
+
+ek_not128:
+#ifndef ARM_ARCH_7
+	MOVBU	19(t4),i2
+	MOVBU	18(t4),t1
+	MOVBU	17(t4),t2
+	MOVBU	16(t4),t3
+	ORR	t1<<8,i2,i2
+	MOVBU	23(t4),i3
+	ORR	t2<<16,i2,i2
+	MOVBU	22(t4),t1
+	ORR	t3<<24,i2,i2
+	MOVBU	21(t4),t2
+	MOVBU	20(t4),t3
+	ORR	t1<<8,i3,i3
+	ORR	t2<<16,i3,i3
+	MOVW.P	i2,8(key)
+	ORR	t3<<24,i3,i3
+	MOVW	i3,-4(key)
+#else
+	MOVW	16(t4),i2
+	MOVW	20(t4),i3
+	REV	i2,i2
+	REV	i3,i3
+	MOVW.P	i2,8(key)
+	MOVW	i3,-4(key)
+#endif
+
+	TEQ	$12,tbl
+	BNE	ek_not192
+	MOVW	$·sbox0(SB), tbl	// Te4
+	MOVW	$·rcon(SB), t3
+	MOVW	$8,t1
+	MOVW	$255,t4
+	MOVW	t1, nr+0(FP)
+
+ek_192_loop:
+	AND	i3>>24,t4,t2
+	AND	i3>>16,t4,i1
+	MOVBU	t2<<0(tbl),t2
+	AND	i3>>8,t4,i2
+	MOVBU	i1<<0(tbl),i1
+	AND	i3,t4,i3
+	MOVBU	i2<<0(tbl),i2
+	ORR	i1<<24,t2,t2
+	MOVBU	i3<<0(tbl),i3
+	ORR	i2<<16,t2,t2
+	MOVW.P	4(t3),t1	// rcon[i++]
+	ORR	i3<<8,t2,t2
+	EOR	t1,t2,i3
+	MOVW	nr+0(FP), t1
+	EOR	i3,s0,s0	// rk[6]=rk[0]^...
+	EOR	s0,s1,s1	// rk[7]=rk[1]^rk[6]
+	MOVW.P	s0,24(key)
+	EOR	s1,s2,s2	// rk[8]=rk[2]^rk[7]
+	MOVW	s1,-20(key)
+	EOR	s2,s3,s3	// rk[9]=rk[3]^rk[8]
+	SUB.S	$1,t1,t1
+	MOVW	s2,-16(key)
+	MOVW	t1, nr+0(FP)
+	MOVW	s3,-12(key)
+	BEQ	ek_done
+
+	MOVW	-32(key),i1
+	MOVW	-28(key),i2
+	EOR	s3,i1,i1	// rk[10]=rk[4]^rk[9]
+	EOR	i1,i2,i3	// rk[11]=rk[5]^rk[10]
+	MOVW	i1,-8(key)
+	MOVW	i3,-4(key)
+	B	ek_192_loop
+
+ek_not192:
+#ifndef ARM_ARCH_7
+	MOVBU	27(t4),i2
+	MOVBU	26(t4),t1
+	MOVBU	25(t4),t2
+	MOVBU	24(t4),t3
+	ORR	t1<<8,i2,i2
+	MOVBU	31(t4),i3
+	ORR	t2<<16,i2,i2
+	MOVBU	30(t4),t1
+	ORR	t3<<24,i2,i2
+	MOVBU	29(t4),t2
+	MOVBU	28(t4),t3
+	ORR	t1<<8,i3,i3
+	ORR	t2<<16,i3,i3
+	MOVW.P	i2,8(key)
+	ORR	t3<<24,i3,i3
+	MOVW	i3,-4(key)
+#else
+	MOVW	24(t4),i2
+	MOVW	28(t4),i3
+	REV	i2,i2
+	REV	i3,i3
+	MOVW.P	i2,8(key)
+	MOVW	i3,-4(key)
+#endif
+
+	MOVW	$·sbox0(SB), tbl	// Te4
+	MOVW	$·rcon(SB), t3
+	MOVW	$7,t1
+	MOVW	$255,t4
+	MOVW	t1, nr+0(FP)
+
+ek_256_loop:
+	AND	i3>>24,t4,t2
+	AND	i3>>16,t4,i1
+	MOVBU	t2<<0(tbl),t2
+	AND	i3>>8,t4,i2
+	MOVBU	i1<<0(tbl),i1
+	AND	i3,t4,i3
+	MOVBU	i2<<0(tbl),i2
+	ORR	i1<<24,t2,t2
+	MOVBU	i3<<0(tbl),i3
+	ORR	i2<<16,t2,t2
+	MOVW.P	4(t3),t1	// rcon[i++]
+	ORR	i3<<8,t2,t2
+	EOR	t1,t2,i3
+	MOVW	nr+0(FP), t1
+	EOR	i3,s0,s0	// rk[8]=rk[0]^...
+	EOR	s0,s1,s1	// rk[9]=rk[1]^rk[8]
+	MOVW.P	s0,32(key)
+	EOR	s1,s2,s2	// rk[10]=rk[2]^rk[9]
+	MOVW	s1,-28(key)
+	EOR	s2,s3,s3	// rk[11]=rk[3]^rk[10]
+	SUB.S	$1,t1,t1
+	MOVW	s2,-24(key)
+	MOVW	t1, nr+0(FP)
+	MOVW	s3,-20(key)
+	BEQ	ek_done
+
+	AND	s3,t4,t2
+	AND	s3>>8,t4,i1
+	MOVBU	t2<<0(tbl),t2
+	AND	s3>>16,t4,i2
+	MOVBU	i1<<0(tbl),i1
+	AND	s3>>24,t4,i3
+	MOVBU	i2<<0(tbl),i2
+	ORR	i1<<8,t2,t2
+	MOVBU	i3<<0(tbl),i3
+	ORR	i2<<16,t2,t2
+	MOVW	-48(key),t1
+	ORR	i3<<24,t2,t2
+
+	MOVW	-44(key),i1
+	MOVW	-40(key),i2
+	EOR	t2,t1,t1	// rk[12]=rk[4]^...
+	MOVW	-36(key),i3
+	EOR	t1,i1,i1	// rk[13]=rk[5]^rk[12]
+	MOVW	t1,-16(key)
+	EOR	i1,i2,i2	// rk[14]=rk[6]^rk[13]
+	MOVW	i1,-12(key)
+	EOR	i2,i3,i3	// rk[15]=rk[7]^rk[14]
+	MOVW	i2,-8(key)
+	MOVW	i3,-4(key)
+	B	ek_256_loop
+
+ek_done:
+	RET
+
+// func expandKeyDecAsm(nr int, enc *uint32, dec *uint32)
+TEXT	·expandKeyDecAsm(SB), NOSPLIT, $4-12
+	MOVW	nr+0(FP), t4	// rounds
+	MOVW	enc+4(FP), i1	// input
+	ADD	t4<<4,i1,i2
+	MOVW	dec+8(FP), key	// output
+	ADD	t4<<4,key,tbl
+
+dk_inv:	MOVW.P	16(i1),s0
+	MOVW	-12(i1),s1
+	MOVW	-8(i1),s2
+	MOVW	-4(i1),s3
+	MOVW.P	-16(i2),t1
+	MOVW	16+4(i2),t2
+	MOVW	16+8(i2),t3
+	MOVW	16+12(i2),i3
+	MOVW.P	s0,-16(tbl)
+	MOVW	s1,16+4(tbl)
+	MOVW	s2,16+8(tbl)
+	MOVW	s3,16+12(tbl)
+	MOVW.P	t1,16(key)
+	MOVW	t2,-12(key)
+	MOVW	t3,-8(key)
+	MOVW	i3,-4(key)
+	TEQ	i2,i1
+	BNE	dk_inv
+
+	MOVW	(i1),s0
+	MOVW	4(i1),s1
+	MOVW	8(i1),s2
+	MOVW	12(i1),s3
+	MOVW	s0,(key)
+	MOVW	s1,4(key)
+	MOVW	s2,8(key)
+	MOVW	s3,12(key)
+	SUB	t4<<3,key,key
+
+	MOVW.W	16(key),s0	// prefetch tp1
+	MOVW	$0x80,mask80
+	MOVW	$0x1b,mask1b
+	ORR	$0x8000,mask80,mask80
+	ORR	$0x1b00,mask1b,mask1b
+	ORR	mask80<<16,mask80,mask80
+	ORR	mask1b<<16,mask1b,mask1b
+	SUB	$1,t4,t4
+	MVN	mask80,mask7f
+	MOVW	t4<<2,t4	// (rounds-1)*4
+
+dk_mix:	AND	mask80,s0,t1
+	AND	mask7f,s0,s1
+	SUB	t1>>7,t1,t1
+	AND	mask1b,t1,t1
+	EOR	s1<<1,t1,s1	// tp2
+
+	AND	mask80,s1,t1
+	AND	mask7f,s1,s2
+	SUB	t1>>7,t1,t1
+	AND	mask1b,t1,t1
+	EOR	s2<<1,t1,s2	// tp4
+
+	AND	mask80,s2,t1
+	AND	mask7f,s2,s3
+	SUB	t1>>7,t1,t1
+	AND	mask1b,t1,t1
+	EOR	s3<<1,t1,s3	// tp8
+
+	EOR	s2,s1,t1
+	EOR	s3,s0,t2	// tp9
+	EOR	s3,t1,t1	// tpe
+	EOR	s1@>24,t1,t1
+	EOR	t2@>24,t1,t1	// ^= ROTATE(tpb=tp9^tp2,8)
+	EOR	s2@>16,t1,t1
+	EOR	t2@>16,t1,t1	// ^= ROTATE(tpd=tp9^tp4,16)
+	EOR	t2@>8,t1,t1	// ^= ROTATE(tp9,24)
+
+	MOVW	4(key),s0	// prefetch tp1
+	MOVW.P	t1,4(key)
+	SUB.S	$1,t4,t4
+	BNE	dk_mix
+
+	RET
+
+// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
+TEXT	·decryptBlockAsm(SB), NOSPLIT, $4-16
+	MOVW	src+12(FP), t4
+	MOVW	$·td0(SB), tbl
+	MOVW	xk+4(FP), key
+
+#ifndef ARM_ARCH_7
+	MOVBU	3(t4),s0	// load input data in endian-neutral
+	MOVBU	2(t4),t1	// manner...
+	MOVBU	1(t4),t2
+	MOVBU	0(t4),t3
+	ORR	t1<<8,s0,s0
+	MOVBU	7(t4),s1
+	ORR	t2<<16,s0,s0
+	MOVBU	6(t4),t1
+	ORR	t3<<24,s0,s0
+	MOVBU	5(t4),t2
+	MOVBU	4(t4),t3
+	ORR	t1<<8,s1,s1
+	MOVBU	11(t4),s2
+	ORR	t2<<16,s1,s1
+	MOVBU	10(t4),t1
+	ORR	t3<<24,s1,s1
+	MOVBU	9(t4),t2
+	MOVBU	8(t4),t3
+	ORR	t1<<8,s2,s2
+	MOVBU	15(t4),s3
+	ORR	t2<<16,s2,s2
+	MOVBU	14(t4),t1
+	ORR	t3<<24,s2,s2
+	MOVBU	13(t4),t2
+	MOVBU	12(t4),t3
+	ORR	t1<<8,s3,s3
+	ORR	t2<<16,s3,s3
+	ORR	t3<<24,s3,s3
+#else
+	MOVW	0(t4),s0
+	MOVW	4(t4),s1
+	MOVW	8(t4),s2
+	MOVW	12(t4),s3
+	REV	s0,s0
+	REV	s1,s1
+	REV	s2,s2
+	REV	s3,s3
+#endif
+
+	MOVM.IA.W	(key),[t1-i1]
+	EOR	t1,s0,s0
+	MOVW	240-16(key),t4
+	EOR	t2,s1,s1
+	EOR	t3,s2,s2
+	EOR	i1,s3,s3
+	SUB	$1,t4,t4
+	MOVW	$255,t4
+
+	AND	s0>>16,t4,i1
+	AND	s0>>8,t4,i2
+	AND	s0,t4,i3
+	MOVW	s0>>24,s0
+dec_loop:
+	MOVW	i1<<2(tbl),t1	// Td1[s0>>16]
+	AND	s1,t4,i1	// i0
+	MOVW	i2<<2(tbl),t2	// Td2[s0>>8]
+	AND	s1>>16,t4,i2
+	MOVW	i3<<2(tbl),t3	// Td3[s0>>0]
+	AND	s1>>8,t4,i3
+	MOVW	s0<<2(tbl),s0	// Td0[s0>>24]
+	MOVW	s1>>24,s1
+
+	MOVW	i1<<2(tbl),i1	// Td3[s1>>0]
+	MOVW	i2<<2(tbl),i2	// Td1[s1>>16]
+	MOVW	i3<<2(tbl),i3	// Td2[s1>>8]
+	EOR	i1@>24,s0,s0
+	MOVW	s1<<2(tbl),s1	// Td0[s1>>24]
+	AND	s2>>8,t4,i1	// i0
+	EOR	t2@>8,i2,t2
+	AND	s2,t4,i2	// i1
+	EOR	t3@>8,i3,t3
+	AND	s2>>16,t4,i3
+	MOVW	i1<<2(tbl),i1	// Td2[s2>>8]
+	EOR	t1@>8,s1,s1
+	MOVW	i2<<2(tbl),i2	// Td3[s2>>0]
+	MOVW	s2>>24,s2
+
+	MOVW	i3<<2(tbl),i3	// Td1[s2>>16]
+	EOR	i1@>16,s0,s0
+	MOVW	s2<<2(tbl),s2	// Td0[s2>>24]
+	AND	s3>>16,t4,i1	// i0
+	EOR	i2@>24,s1,s1
+	AND	s3>>8,t4,i2	// i1
+	EOR	t3@>8,i3,t3
+	AND	s3,t4,i3	// i2
+	MOVW	i1<<2(tbl),i1	// Td1[s3>>16]
+	EOR	t2@>8,s2,s2
+	MOVW	i2<<2(tbl),i2	// Td2[s3>>8]
+	MOVW	s3>>24,s3
+
+	MOVW	i3<<2(tbl),i3	// Td3[s3>>0]
+	EOR	i1@>8,s0,s0
+	MOVW.P	16(key),i1
+	EOR	i2@>16,s1,s1
+	MOVW	s3<<2(tbl),s3	// Td0[s3>>24]
+	EOR	i3@>24,s2,s2
+
+	MOVW	-12(key),t1
+	EOR	i1,s0,s0
+	MOVW	-8(key),t2
+	EOR	t3@>8,s3,s3
+	MOVW	-4(key),t3
+	AND	s0>>16,t4,i1
+	EOR	t1,s1,s1
+	MOVW	nr+0(FP), t1
+	AND	s0>>8,t4,i2
+	EOR	t2,s2,s2
+	AND	s0,t4,i3
+	EOR	t3,s3,s3
+	SUB.S	$1,t1,t1
+	MOVW	s0>>24,s0
+
+	MOVW	t1, nr+0(FP)
+	BGT	dec_loop
+
+	MOVW	$·sbox1(SB),tbl
+
+	MOVW	0(tbl),t2	// prefetch Td4
+	MOVW	32(tbl),t3
+	MOVW	64(tbl),t1
+	MOVW	96(tbl),t2
+	MOVW	128(tbl),t3
+	MOVW	160(tbl),t1
+	MOVW	192(tbl),t2
+	MOVW	224(tbl),t3
+
+	MOVBU	s0<<0(tbl),s0	// Td4[s0>>24]
+	MOVBU	i1<<0(tbl),t1	// Td4[s0>>16]
+	AND	s1,t4,i1	// i0
+	MOVBU	i2<<0(tbl),t2	// Td4[s0>>8]
+	AND	s1>>16,t4,i2
+	MOVBU	i3<<0(tbl),t3	// Td4[s0>>0]
+	AND	s1>>8,t4,i3
+
+	ADD	s1>>24,tbl,s1
+	MOVBU	i1<<0(tbl),i1	// Td4[s1>>0]
+	MOVBU	(s1),s1		// Td4[s1>>24]
+	MOVBU	i2<<0(tbl),i2	// Td4[s1>>16]
+	EOR	s0<<24,i1,s0
+	MOVBU	i3<<0(tbl),i3	// Td4[s1>>8]
+	EOR	s1<<8,t1,s1
+	AND	s2>>8,t4,i1	// i0
+	EOR	i2<<8,t2,t2
+	AND	s2,t4,i2	// i1
+	MOVBU	i1<<0(tbl),i1	// Td4[s2>>8]
+	EOR	i3<<8,t3,t3
+	MOVBU	i2<<0(tbl),i2	// Td4[s2>>0]
+	AND	s2>>16,t4,i3
+
+	ADD	s2>>24,tbl,s2
+	MOVBU	(s2),s2		// Td4[s2>>24]
+	EOR	i1<<8,s0,s0
+	MOVBU	i3<<0(tbl),i3	// Td4[s2>>16]
+	EOR	s1<<16,i2,s1
+	AND	s3>>16,t4,i1	// i0
+	EOR	s2<<16,t2,s2
+	AND	s3>>8,t4,i2	// i1
+	MOVBU	i1<<0(tbl),i1	// Td4[s3>>16]
+	EOR	i3<<16,t3,t3
+	MOVBU	i2<<0(tbl),i2	// Td4[s3>>8]
+	AND	s3,t4,i3	// i2
+
+	ADD	s3>>24,tbl,s3
+	MOVBU	i3<<0(tbl),i3	// Td4[s3>>0]
+	MOVBU	(s3),s3		// Td4[s3>>24]
+	EOR	i1<<16,s0,s0
+	MOVW	0(key),i1
+	EOR	i2<<8,s1,s1
+	MOVW	4(key),t1
+	EOR	s2<<8,i3,s2
+	MOVW	8(key),t2
+	EOR	s3<<24,t3,s3
+	MOVW	12(key),t3
+
+	EOR	i1,s0,s0
+	EOR	t1,s1,s1
+	EOR	t2,s2,s2
+	EOR	t3,s3,s3
+
+	MOVW	dst+8(FP), t4
+
+#ifdef ARM_ARCH_7
+	REV	s0,s0
+	REV	s1,s1
+	REV	s2,s2
+	REV	s3,s3
+	MOVW	s0,0(t4)
+	MOVW	s1,4(t4)
+	MOVW	s2,8(t4)
+	MOVW	s3,12(t4)
+#else
+	MOVW	s0>>24,t1	// write output in endian-neutral
+	MOVW	s0>>16,t2	// manner...
+	MOVW	s0>>8,t3
+	MOVBU	t1,0(t4)
+	MOVBU	t2,1(t4)
+	MOVW	s1>>24,t1
+	MOVBU	t3,2(t4)
+	MOVW	s1>>16,t2
+	MOVBU	s0,3(t4)
+	MOVW	s1>>8,t3
+	MOVBU	t1,4(t4)
+	MOVBU	t2,5(t4)
+	MOVW	s2>>24,t1
+	MOVBU	t3,6(t4)
+	MOVW	s2>>16,t2
+	MOVBU	s1,7(t4)
+	MOVW	s2>>8,t3
+	MOVBU	t1,8(t4)
+	MOVBU	t2,9(t4)
+	MOVW	s3>>24,t1
+	MOVBU	t3,10(t4)
+	MOVW	s3>>16,t2
+	MOVBU	s2,11(t4)
+	MOVW	s3>>8,t3
+	MOVBU	t1,12(t4)
+	MOVBU	t2,13(t4)
+	MOVBU	t3,14(t4)
+	MOVBU	s3,15(t4)
+#endif
+	RET
diff --git a/src/crypto/aes/block.go b/src/crypto/aes/block.go
index 41ea9cf..e525081 100644
--- a/src/crypto/aes/block.go
+++ b/src/crypto/aes/block.go
@@ -36,6 +36,8 @@
 
 package aes
 
+import "fmt"
+
 // Encrypt one block from src into dst, using the expanded key xk.
 func encryptBlockGo(xk []uint32, dst, src []byte) {
 	var s0, s1, s2, s3, t0, t1, t2, t3 uint32
@@ -56,6 +58,7 @@
 	nr := len(xk)/4 - 2 // - 2: one above, one more below
 	k := 4
 	for r := 0; r < nr; r++ {
+		fmt.Printf("0x%08X 0x%08X 0x%08X 0x%08X\n", s0, s1, s2, s3)
 		t0 = xk[k+0] ^ te0[uint8(s0>>24)] ^ te1[uint8(s1>>16)] ^ te2[uint8(s2>>8)] ^ te3[uint8(s3)]
 		t1 = xk[k+1] ^ te0[uint8(s1>>24)] ^ te1[uint8(s2>>16)] ^ te2[uint8(s3>>8)] ^ te3[uint8(s0)]
 		t2 = xk[k+2] ^ te0[uint8(s2>>24)] ^ te1[uint8(s3>>16)] ^ te2[uint8(s0>>8)] ^ te3[uint8(s1)]
@@ -63,6 +66,7 @@
 		k += 4
 		s0, s1, s2, s3 = t0, t1, t2, t3
 	}
+	fmt.Printf("0x%08X 0x%08X 0x%08X 0x%08X\n", s0, s1, s2, s3)
 
 	// Last round uses s-box directly and XORs to produce output.
 	s0 = uint32(sbox0[t0>>24])<<24 | uint32(sbox0[t1>>16&0xff])<<16 | uint32(sbox0[t2>>8&0xff])<<8 | uint32(sbox0[t3&0xff])
diff --git a/src/crypto/aes/cipher_arm.go b/src/crypto/aes/cipher_arm.go
new file mode 100644
index 0000000..d09f404
--- /dev/null
+++ b/src/crypto/aes/cipher_arm.go
@@ -0,0 +1,99 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package aes
+
+import (
+	"crypto/cipher"
+	"fmt"
+)
+
+// defined in asm_arm.s
+func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
+func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
+func expandKeyEncAsm(nr int, userKey *byte, enc *uint32)
+func expandKeyDecAsm(nr int, enc *uint32, dec *uint32)
+
+// func expandKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
+
+type aesCipherAsm struct {
+	aesCipher
+}
+
+func newCipher(key []byte) (cipher.Block, error) {
+	n := len(key) + 28
+	c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
+	rounds := 10
+	switch len(key) {
+	case 128 / 8:
+		rounds = 10
+	case 192 / 8:
+		rounds = 12
+	case 256 / 8:
+		rounds = 14
+	}
+	//expandKeyAsm(rounds, &key[0], &c.enc[0], &c.dec[0])
+	//expandKeyAsm(key, c.enc, c.dec)
+	expandKeyEncAsm(rounds, &key[0], &c.enc[0])
+	expandKeyDecAsm(rounds, &c.enc[0], &c.dec[0])
+
+	// FIXME
+	// if hasGCMAsm() {
+	// 	return &aesCipherGCM{c}, nil
+	// }
+
+	return &c, nil
+}
+
+func (c *aesCipherAsm) BlockSize() int { return BlockSize }
+
+func (c *aesCipherAsm) Encrypt(dst, src []byte) {
+	if len(src) < BlockSize {
+		panic("crypto/aes: input not full block")
+	}
+	if len(dst) < BlockSize {
+		panic("crypto/aes: output not full block")
+	}
+	encryptBlockAsm(len(c.enc)/4-2, &c.enc[0], &dst[0], &src[0])
+}
+
+func (c *aesCipherAsm) Decrypt(dst, src []byte) {
+	if len(src) < BlockSize {
+		panic("crypto/aes: input not full block")
+	}
+	if len(dst) < BlockSize {
+		panic("crypto/aes: output not full block")
+	}
+	decryptBlockAsm(len(c.dec)/4-2, &c.dec[0], &dst[0], &src[0])
+}
+
+// expandKey is used by BenchmarkExpand to ensure that the asm implementation
+// of key expansion is used for the benchmark when it is available.
+func expandKey(key []byte, enc, dec []uint32) {
+	rounds := 10 // rounds needed for AES128
+	switch len(key) {
+	case 192 / 8:
+		rounds = 12
+	case 256 / 8:
+		rounds = 14
+	}
+	//expandKeyAsm(rounds, &key[0], &enc[0], &dec[0])
+	expandKeyEncAsm(rounds, &key[0], &enc[0])
+	if dec != nil {
+		expandKeyDecAsm(rounds, &enc[0], &dec[0])
+	}
+}
+
+// for debugging
+func printUint32(x uint32) {
+	fmt.Printf("0x%08X\n", x)
+}
+
+// rcon table used by asm_arm.s
+var rcon = [16]uint32{
+	0x01000000, 0x02000000, 0x04000000, 0x08000000,
+	0x10000000, 0x20000000, 0x40000000, 0x80000000,
+	0x1B000000, 0x36000000, 0, 0,
+	0, 0, 0, 0,
+}
diff --git a/src/crypto/aes/cipher_generic.go b/src/crypto/aes/cipher_generic.go
index ca74aa8..411adc8 100644
--- a/src/crypto/aes/cipher_generic.go
+++ b/src/crypto/aes/cipher_generic.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build !amd64,!s390x,!ppc64le
+// +build !amd64,!s390x,!ppc64le,!arm
 
 package aes
 

To view, visit change 38366. To unsubscribe, visit settings.

Gerrit-Project: go
Gerrit-Branch: master
Gerrit-MessageType: newchange
Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
Gerrit-Change-Number: 38366
Gerrit-PatchSet: 1
Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>

Brad Fitzpatrick (Gerrit)

unread,
Mar 22, 2017, 4:41:47 PM3/22/17
to Nick Craig-Wood, Brad Fitzpatrick, golang-co...@googlegroups.com

Brad Fitzpatrick posted comments on this change.

View Change

Patch set 1:

RELNOTE=yes

    To view, visit change 38366. To unsubscribe, visit settings.

    Gerrit-Project: go
    Gerrit-Branch: master
    Gerrit-MessageType: comment
    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
    Gerrit-Change-Number: 38366
    Gerrit-PatchSet: 1
    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
    Gerrit-CC: Brad Fitzpatrick <brad...@golang.org>
    Gerrit-Comment-Date: Wed, 22 Mar 2017 20:41:45 +0000
    Gerrit-HasComments: No

    Brad Fitzpatrick (Gerrit)

    unread,
    Mar 22, 2017, 4:42:07 PM3/22/17
    to Nick Craig-Wood, Brad Fitzpatrick, golang-co...@googlegroups.com

    Brad Fitzpatrick posted comments on this change.

    View Change

    Patch set 1:Run-TryBot +1

      To view, visit change 38366. To unsubscribe, visit settings.

      Gerrit-Project: go
      Gerrit-Branch: master
      Gerrit-MessageType: comment
      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
      Gerrit-Change-Number: 38366
      Gerrit-PatchSet: 1
      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
      Gerrit-Comment-Date: Wed, 22 Mar 2017 20:42:05 +0000
      Gerrit-HasComments: No

      Gobot Gobot (Gerrit)

      unread,
      Mar 22, 2017, 4:42:19 PM3/22/17
      to Nick Craig-Wood, Brad Fitzpatrick, golang-co...@googlegroups.com

      Gobot Gobot posted comments on this change.

      View Change

      Patch set 1:

      TryBots beginning. Status page: http://farmer.golang.org/try?commit=e8a35f90

        To view, visit change 38366. To unsubscribe, visit settings.

        Gerrit-Project: go
        Gerrit-Branch: master
        Gerrit-MessageType: comment
        Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
        Gerrit-Change-Number: 38366
        Gerrit-PatchSet: 1
        Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
        Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
        Gerrit-CC: Gobot Gobot <go...@golang.org>
        Gerrit-Comment-Date: Wed, 22 Mar 2017 20:42:17 +0000
        Gerrit-HasComments: No

        Gobot Gobot (Gerrit)

        unread,
        Mar 22, 2017, 4:43:38 PM3/22/17
        to Nick Craig-Wood, Brad Fitzpatrick, golang-co...@googlegroups.com

        Gobot Gobot posted comments on this change.

        View Change

        Patch set 1:

        Build is still in progress... This change failed on darwin-amd64-10_11: See https://storage.googleapis.com/go-build-log/e8a35f90/darwin-amd64-10_11_4bb365ef.log

        Consult https://build.golang.org/ to see whether it's a new failure. Other builds still in progress; subsequent failure notices suppressed until final report.

          To view, visit change 38366. To unsubscribe, visit settings.

          Gerrit-Project: go
          Gerrit-Branch: master
          Gerrit-MessageType: comment
          Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
          Gerrit-Change-Number: 38366
          Gerrit-PatchSet: 1
          Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
          Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
          Gerrit-CC: Gobot Gobot <go...@golang.org>
          Gerrit-Comment-Date: Wed, 22 Mar 2017 20:43:36 +0000
          Gerrit-HasComments: No

          Gobot Gobot (Gerrit)

          unread,
          Mar 22, 2017, 4:47:57 PM3/22/17
          to Nick Craig-Wood, Brad Fitzpatrick, golang-co...@googlegroups.com

          Gobot Gobot posted comments on this change.

          View Change

          Patch set 1:TryBot-Result -1

          12 of 17 TryBots failed: Failed on darwin-amd64-10_11: https://storage.googleapis.com/go-build-log/e8a35f90/darwin-amd64-10_11_4bb365ef.log Failed on nacl-amd64p32: https://storage.googleapis.com/go-build-log/e8a35f90/nacl-amd64p32_6826e765.log Failed on nacl-386: https://storage.googleapis.com/go-build-log/e8a35f90/nacl-386_cddf2da9.log Failed on linux-amd64: https://storage.googleapis.com/go-build-log/e8a35f90/linux-amd64_f482b80e.log Failed on linux-386: https://storage.googleapis.com/go-build-log/e8a35f90/linux-386_6bb42d4f.log Failed on freebsd-amd64-gce101: https://storage.googleapis.com/go-build-log/e8a35f90/freebsd-amd64-gce101_f8b37538.log Failed on windows-386-gce: https://storage.googleapis.com/go-build-log/e8a35f90/windows-386-gce_5ab97d33.log Failed on windows-amd64-gce: https://storage.googleapis.com/go-build-log/e8a35f90/windows-amd64-gce_745a8d17.log Failed on linux-amd64-race: https://storage.googleapis.com/go-build-log/e8a35f90/linux-amd64-race_26b5bb52.log Failed on openbsd-amd64-60: https://storage.googleapis.com/go-build-log/e8a35f90/openbsd-amd64-60_3b8ccac0.log Failed on misc-compile: https://storage.googleapis.com/go-build-log/e8a35f90/misc-compile_dd1a0025.log Failed on linux-arm: https://storage.googleapis.com/go-build-log/e8a35f90/linux-arm_87fadba3.log

          Consult https://build.golang.org/ to see whether they are new failures.

            To view, visit change 38366. To unsubscribe, visit settings.

            Gerrit-Project: go
            Gerrit-Branch: master
            Gerrit-MessageType: comment
            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
            Gerrit-Change-Number: 38366
            Gerrit-PatchSet: 1
            Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
            Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
            Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
            Gerrit-Comment-Date: Wed, 22 Mar 2017 20:47:55 +0000
            Gerrit-HasComments: No

            Nick Craig-Wood (Gerrit)

            unread,
            Mar 22, 2017, 5:07:21 PM3/22/17
            to Gobot Gobot, Brad Fitzpatrick, golang-co...@googlegroups.com

            Nick Craig-Wood uploaded patch set #2 to this change.

            View Change

            crypto/aes: ARM assembly versions of encrypt, decrypt and expandKey
            
            ARM assembly for AES crypto adapted from openssl giving an
            encrypt/decrypt speed up of 1.6-2.7x and a key scheduling speedup of
            2.3-4.5x.
            
            Raspberry Pi 3 BCM2709 ARMv7 Processor rev 5 (v7l)
            
            name       old time/op    new time/op     delta
            Encrypt-4    3.12µs ± 1%     1.13µs ± 2%   -63.86%  (p=0.000 n=20+20)
            Decrypt-4    3.10µs ± 1%     1.21µs ± 2%   -60.91%  (p=0.000 n=20+20)
            Expand-4     11.3µs ± 1%      2.5µs ± 1%   -78.11%  (p=0.000 n=16+16)
            
            name       old speed      new speed       delta
            Encrypt-4  5.13MB/s ± 2%  14.18MB/s ± 2%  +176.58%  (p=0.000 n=20+20)
            Decrypt-4  5.16MB/s ± 1%  13.19MB/s ± 2%  +155.78%  (p=0.000 n=20+20)
            
            Chrombook Samsung Exynos5 ARMv7 Processor rev 4 (v7l)
            
            name       old time/op    new time/op    delta
            Encrypt-2     342ns ± 1%     210ns ± 1%  -38.80%  (p=0.000 n=16+20)
            Decrypt-2     343ns ± 6%     209ns ± 2%  -39.16%  (p=0.000 n=17+18)
            Expand-2     1.64µs ± 5%    0.70µs ± 1%  -57.29%  (p=0.000 n=17+19)
            
            name       old speed      new speed      delta
            Encrypt-2  46.7MB/s ± 1%  76.2MB/s ± 1%  +63.35%  (p=0.000 n=16+20)
            Decrypt-2  46.4MB/s ± 7%  76.4MB/s ± 2%  +64.75%  (p=0.000 n=18+18)
            
            Issue #4299
            
            Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
            ---
            A src/crypto/aes/asm_arm.s
            A src/crypto/aes/cipher_arm.go
            M src/crypto/aes/cipher_generic.go
            3 files changed, 925 insertions(+), 1 deletion(-)
            
            
            >8,s3,s3
            +	MOVW	-4(key),t3
            +	AND	s0>>16,t4,i1
            +	EOR	t1,s1,s1
            +	MOVW	nr+0(FP), t1
            +	AND	s0>>8,t4,i2
            +	EOR	t2,s2,s2
            +	AND	s0,t4,i3
            +	EOR	t3,s3,s3
            +	SUB.S	$1,t1,t1
            +	MOVW	s0>>24,s0
            +
            +	MOVW	t1, nr+0(FP)
            +	BGT	dec_loop
            +
            +	MOVW	$·sbox1(SB),tbl
            +
            +	MOVW	0(tbl),t2	// prefetch Td4
            +	MOVW	32(tbl),t3
            +	MOVW	64(tbl),t1
            +	MOVW	96(tbl),t2
            +	MOVW	128(tbl),t3
            +	MOVW	160(tbl),t1
            +	MOVW	192(tbl),t2
            +	MOVW	224(tbl),t3
            +
            +	MOVBU	s0<<0(tbl),s0	// Td4[s0>>24]
            +	MOVBU	i1<<0(tbl),t1	// Td4[s0>>16]
            +	AND	s1,t4,i1	// i0
            +	MOVBU	i2<<0(tbl),t2	// Td4[s0>>8]
            +	AND	s1>>16,t4,i2
            +	MOVBU	i3<<0(tbl),t3	// Td4[s0>>0]
            +	AND	s1>>8,t4,i3
            +
            +	ADD	s1>>24,tbl,s1
            +	MOVBU	i1<<0(tbl),i1	// Td4[s1>>0]
            +	MOVBU	(s1),s1		// Td4[s1>>24]
            +	MOVBU	i2<<0(tbl),i2	// Td4[s1>>16]
            +	EOR	s0<<24,i1,s0
            +	MOVBU	i3<<0(tbl),i3	// Td4[s1>>8]
            +	EOR	s1<<8,t1,s1
            +	AND	s2>>8,t4,i1	// i0
            +	EOR	i2<<8,t2,t2
            +	AND	s2,t4,i2	// i1
            +	MOVBU	i1<<0(tbl),i1	// Td4[s2>>8]
            +	EOR	i3<<8,t3,t3
            +	MOVBU	i2<<0(tbl),i2	// Td4[s2>>0]
            +	AND	s2>>16,t4,i3
            +
            +	ADD	s2>>24,tbl,s2
            +	MOVBU	(s2),s2		// Td4[s2>>24]
            +	EOR	i1<<8,s0,s0
            +	MOVBU	i3<<0(tbl),i3	// Td4[s2>>16]
            +	EOR	s1<<16,i2,s1
            +	AND	s3>>16,t4,i1	// i0
            +	EOR	s2<<16,t2,s2
            +	AND	s3>>8,t4,i2	// i1
            +	MOVBU	i1<<0(tbl),i1	// Td4[s3>>16]
            +	EOR	i3<<16,t3,t3
            +	MOVBU	i2<<0(tbl),i2	// Td4[s3>>8]
            +	AND	s3,t4,i3	// i2
            +
            +	ADD	s3>>24,tbl,s3
            +	MOVBU	i3<<0(tbl),i3	// Td4[s3>>0]
            +	MOVBU	(s3),s3		// Td4[s3>>24]
            +	EOR	i1<<16,s0,s0
            +	MOVW	0(key),i1
            +	EOR	i2<<8,s1,s1
            +	MOVW	4(key),t1
            +	EOR	s2<<8,i3,s2
            +	MOVW	8(key),t2
            +	EOR	s3<<24,t3,s3
            +	MOVW	12(key),t3
            +
            +	EOR	i1,s0,s0
            +	EOR	t1,s1,s1
            +	EOR	t2,s2,s2
            +	EOR	t3,s3,s3
            +
            +	MOVW	dst+8(FP), t4
            +
            +#ifdef ARM_ARCH_7
            +	REV	s0,s0
            +	REV	s1,s1
            +	REV	s2,s2
            +	REV	s3,s3
            +	MOVW	s0,0(t4)
            +	MOVW	s1,4(t4)
            +	MOVW	s2,8(t4)
            +	MOVW	s3,12(t4)
            +#else
            +	MOVW	s0>>24,t1	// write output in endian-neutral
            +	MOVW	s0>>16,t2	// manner...
            +	MOVW	s0>>8,t3
            +	MOVBU	t1,0(t4)
            +	MOVBU	t2,1(t4)
            +	MOVW	s1>>24,t1
            +	MOVBU	t3,2(t4)
            +	MOVW	s1>>16,t2
            +	MOVBU	s0,3(t4)
            +	MOVW	s1>>8,t3
            +	MOVBU	t1,4(t4)
            +	MOVBU	t2,5(t4)
            +	MOVW	s2>>24,t1
            +	MOVBU	t3,6(t4)
            +	MOVW	s2>>16,t2
            +	MOVBU	s1,7(t4)
            +	MOVW	s2>>8,t3
            +	MOVBU	t1,8(t4)
            +	MOVBU	t2,9(t4)
            +	MOVW	s3>>24,t1
            +	MOVBU	t3,10(t4)
            +	MOVW	s3>>16,t2
            +	MOVBU	s2,11(t4)
            +	MOVW	s3>>8,t3
            +	MOVBU	t1,12(t4)
            +	MOVBU	t2,13(t4)
            +	MOVBU	t3,14(t4)
            +	MOVBU	s3,15(t4)
            +#endif
            +	RET
            diff --git a/src/crypto/aes/cipher_arm.go b/src/crypto/aes/cipher_arm.go
            new file mode 100644
            index 0000000..a0b3823
            --- /dev/null
            +++ b/src/crypto/aes/cipher_arm.go
            @@ -0,0 +1,82 @@
            +// Copyright 2012 The Go Authors. All rights reserved.
            +// Use of this source code is governed by a BSD-style
            +// license that can be found in the LICENSE file.
            +
            +package aes
            +
            +import (
            +	"crypto/cipher"
            +)
            +
            +// defined in asm_arm.s
            +func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
            +func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
            +func expandKeyEncAsm(nr int, userKey *byte, enc *uint32)
            +func expandKeyDecAsm(nr int, enc *uint32, dec *uint32)
            +
            +type aesCipherAsm struct {
            +	aesCipher
            +}
            +
            +func newCipher(key []byte) (cipher.Block, error) {
            +	n := len(key) + 28
            +	c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
            +	rounds := 10
            +	switch len(key) {
            +	case 128 / 8:
            +		rounds = 10
            +	case 192 / 8:
            +		rounds = 12
            +	case 256 / 8:
            +		rounds = 14
            +	}
            +	expandKeyEncAsm(rounds, &key[0], &c.enc[0])
            +	expandKeyDecAsm(rounds, &c.enc[0], &c.dec[0])
            +	return &c, nil
            +}
            +
            +func (c *aesCipherAsm) BlockSize() int { return BlockSize }
            +
            +func (c *aesCipherAsm) Encrypt(dst, src []byte) {
            +	if len(src) < BlockSize {
            +		panic("crypto/aes: input not full block")
            +	}
            +	if len(dst) < BlockSize {
            +		panic("crypto/aes: output not full block")
            +	}
            +	encryptBlockAsm(len(c.enc)/4-2, &c.enc[0], &dst[0], &src[0])
            +}
            +
            +func (c *aesCipherAsm) Decrypt(dst, src []byte) {
            +	if len(src) < BlockSize {
            +		panic("crypto/aes: input not full block")
            +	}
            +	if len(dst) < BlockSize {
            +		panic("crypto/aes: output not full block")
            +	}
            +	decryptBlockAsm(len(c.dec)/4-2, &c.dec[0], &dst[0], &src[0])
            +}
            +
            +// expandKey is used by BenchmarkExpand to ensure that the asm implementation
            +// of key expansion is used for the benchmark when it is available.
            +func expandKey(key []byte, enc, dec []uint32) {
            +	rounds := 10 // rounds needed for AES128
            +	switch len(key) {
            +	case 192 / 8:
            +		rounds = 12
            +	case 256 / 8:
            +		rounds = 14
            +	}
            +	expandKeyEncAsm(rounds, &key[0], &enc[0])
            +	if dec != nil {
            +		expandKeyDecAsm(rounds, &enc[0], &dec[0])
            +	}
            +}
            +
            +// rcon table used by asm_arm.s
            +var rcon = [16]uint32{
            +	0x01000000, 0x02000000, 0x04000000, 0x08000000,
            +	0x10000000, 0x20000000, 0x40000000, 0x80000000,
            +	0x1B000000, 0x36000000, 0, 0,
            +	0, 0, 0, 0,
            +}
            diff --git a/src/crypto/aes/cipher_generic.go b/src/crypto/aes/cipher_generic.go
            index ca74aa8..411adc8 100644
            --- a/src/crypto/aes/cipher_generic.go
            +++ b/src/crypto/aes/cipher_generic.go
            @@ -2,7 +2,7 @@
             // Use of this source code is governed by a BSD-style
             // license that can be found in the LICENSE file.
             
            -// +build !amd64,!s390x,!ppc64le
            +// +build !amd64,!s390x,!ppc64le,!arm
             
             package aes
             
            

            To view, visit change 38366. To unsubscribe, visit settings.

            Gerrit-Project: go
            Gerrit-Branch: master
            Gerrit-MessageType: newpatchset
            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
            Gerrit-Change-Number: 38366
            Gerrit-PatchSet: 2

            Brad Fitzpatrick (Gerrit)

            unread,
            Mar 22, 2017, 5:08:31 PM3/22/17
            to Nick Craig-Wood, Brad Fitzpatrick, Gobot Gobot, golang-co...@googlegroups.com

            Brad Fitzpatrick posted comments on this change.

            View Change

            Patch set 2:

            (1 comment)

            To view, visit change 38366. To unsubscribe, visit settings.

            Gerrit-Project: go
            Gerrit-Branch: master
            Gerrit-MessageType: comment
            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
            Gerrit-Change-Number: 38366
            Gerrit-PatchSet: 2
            Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
            Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
            Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
            Gerrit-Comment-Date: Wed, 22 Mar 2017 21:08:29 +0000
            Gerrit-HasComments: Yes

            Nick Craig-Wood (Gerrit)

            unread,
            Mar 22, 2017, 5:27:24 PM3/22/17
            to Gobot Gobot, Brad Fitzpatrick, golang-co...@googlegroups.com

            Nick Craig-Wood uploaded patch set #3 to this change.

            View Change

            crypto/aes: ARM assembly versions of encrypt, decrypt and expandKey
            
            ARM assembly for AES crypto adapted from openssl giving an
            encrypt/decrypt speed up of 1.6-2.7x and a key scheduling speedup of
            2.3-4.5x.
            
            Raspberry Pi 3 BCM2709 ARMv7 Processor rev 5 (v7l)
            
            name       old time/op    new time/op     delta
            Encrypt-4    3.12µs ± 1%     1.13µs ± 2%   -63.86%  (p=0.000 n=20+20)
            Decrypt-4    3.10µs ± 1%     1.21µs ± 2%   -60.91%  (p=0.000 n=20+20)
            Expand-4     11.3µs ± 1%      2.5µs ± 1%   -78.11%  (p=0.000 n=16+16)
            
            name       old speed      new speed       delta
            Encrypt-4  5.13MB/s ± 2%  14.18MB/s ± 2%  +176.58%  (p=0.000 n=20+20)
            Decrypt-4  5.16MB/s ± 1%  13.19MB/s ± 2%  +155.78%  (p=0.000 n=20+20)
            
            Chrombook Samsung Exynos5 ARMv7 Processor rev 4 (v7l)
            
            name       old time/op    new time/op    delta
            Encrypt-2     342ns ± 1%     210ns ± 1%  -38.80%  (p=0.000 n=16+20)
            Decrypt-2     343ns ± 6%     209ns ± 2%  -39.16%  (p=0.000 n=17+18)
            Expand-2     1.64µs ± 5%    0.70µs ± 1%  -57.29%  (p=0.000 n=17+19)
            
            name       old speed      new speed      delta
            Encrypt-2  46.7MB/s ± 1%  76.2MB/s ± 1%  +63.35%  (p=0.000 n=16+20)
            Decrypt-2  46.4MB/s ± 7%  76.4MB/s ± 2%  +64.75%  (p=0.000 n=18+18)
            
            Issue #4299
            
            Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
            ---
            A src/crypto/aes/asm_arm.s
            A src/crypto/aes/cipher_arm.go
            M src/crypto/aes/cipher_generic.go
            3 files changed, 925 insertions(+), 1 deletion(-)
            
            
            >8,s3,s3
            +	MOVW	-4(key),t3
            +	AND	s0>>16,t4,i1
            +	EOR	t1,s1,s1
            +	MOVW	nr+0(FP), t1
            +	AND	s0>>8,t4,i2
            +	EOR	t2,s2,s2
            +	AND	s0,t4,i3
            +	EOR	t3,s3,s3
            +	SUB.S	$1,t1,t1
            +	MOVW	s0>>24,s0
            +
            +	MOVW	t1, nr+0(FP)
            +	BGT	dec_loop
            +
            +	MOVW	$·sbox1(SB),tbl
            +
            +	MOVW	0(tbl),t2	// prefetch Td4
            +	MOVW	32(tbl),t3
            +	MOVW	64(tbl),t1
            +	MOVW	96(tbl),t2
            +	MOVW	128(tbl),t3
            +	MOVW	160(tbl),t1
            +	MOVW	192(tbl),t2
            +	MOVW	224(tbl),t3
            +
            +	MOVBU	s0<<0(tbl),s0	// Td4[s0>>24]
            +	MOVBU	i1<<0(tbl),t1	// Td4[s0>>16]
            +	AND	s1,t4,i1	// i0
            +	MOVBU	i2<<0(tbl),t2	// Td4[s0>>8]
            +	AND	s1>>16,t4,i2
            +	MOVBU	i3<<0(tbl),t3	// Td4[s0>>0]
            +	AND	s1>>8,t4,i3
            +
            +	ADD	s1>>24,tbl,s1
            +	MOVBU	i1<<0(tbl),i1	// Td4[s1>>0]
            +	MOVBU	(s1),s1		// Td4[s1>>24]
            +	MOVBU	i2<<0(tbl),i2	// Td4[s1>>16]
            +	EOR	s0<<24,i1,s0
            +	MOVBU	i3<<0(tbl),i3	// Td4[s1>>8]
            +	EOR	s1<<8,t1,s1
            +	AND	s2>>8,t4,i1	// i0
            +	EOR	i2<<8,t2,t2
            +	AND	s2,t4,i2	// i1
            +	MOVBU	i1<<0(tbl),i1	// Td4[s2>>8]
            +	EOR	i3<<8,t3,t3
            +	MOVBU	i2<<0(tbl),i2	// Td4[s2>>0]
            +	AND	s2>>16,t4,i3
            +
            +	ADD	s2>>24,tbl,s2
            +	MOVBU	(s2),s2		// Td4[s2>>24]
            +	EOR	i1<<8,s0,s0
            +	MOVBU	i3<<0(tbl),i3	// Td4[s2>>16]
            +	EOR	s1<<16,i2,s1
            +	AND	s3>>16,t4,i1	// i0
            +	EOR	s2<<16,t2,s2
            +	AND	s3>>8,t4,i2	// i1
            +	MOVBU	i1<<0(tbl),i1	// Td4[s3>>16]
            +	EOR	i3<<16,t3,t3
            +	MOVBU	i2<<0(tbl),i2	// Td4[s3>>8]
            +	AND	s3,t4,i3	// i2
            +
            +	ADD	s3>>24,tbl,s3
            +	MOVBU	i3<<0(tbl),i3	// Td4[s3>>0]
            +	MOVBU	(s3),s3		// Td4[s3>>24]
            +	EOR	i1<<16,s0,s0
            +	MOVW	0(key),i1
            +	EOR	i2<<8,s1,s1
            +	MOVW	4(key),t1
            +	EOR	s2<<8,i3,s2
            +	MOVW	8(key),t2
            +	EOR	s3<<24,t3,s3
            +	MOVW	12(key),t3
            +
            +	EOR	i1,s0,s0
            +	EOR	t1,s1,s1
            +	EOR	t2,s2,s2
            +	EOR	t3,s3,s3
            +
            +	MOVW	dst+8(FP), t4
            +
            +#ifdef ARM_ARCH_7
            +	REV	s0,s0
            +	REV	s1,s1
            +	REV	s2,s2
            +	REV	s3,s3
            +	MOVW	s0,0(t4)
            +	MOVW	s1,4(t4)
            +	MOVW	s2,8(t4)
            +	MOVW	s3,12(t4)
            +#else
            +	MOVW	s0>>24,t1	// write output in endian-neutral
            +	MOVW	s0>>16,t2	// manner...
            +	MOVW	s0>>8,t3
            +	MOVBU	t1,0(t4)
            +	MOVBU	t2,1(t4)
            +	MOVW	s1>>24,t1
            +	MOVBU	t3,2(t4)
            +	MOVW	s1>>16,t2
            +	MOVBU	s0,3(t4)
            +	MOVW	s1>>8,t3
            +	MOVBU	t1,4(t4)
            +	MOVBU	t2,5(t4)
            +	MOVW	s2>>24,t1
            +	MOVBU	t3,6(t4)
            +	MOVW	s2>>16,t2
            +	MOVBU	s1,7(t4)
            +	MOVW	s2>>8,t3
            +	MOVBU	t1,8(t4)
            +	MOVBU	t2,9(t4)
            +	MOVW	s3>>24,t1
            +	MOVBU	t3,10(t4)
            +	MOVW	s3>>16,t2
            +	MOVBU	s2,11(t4)
            +	MOVW	s3>>8,t3
            +	MOVBU	t1,12(t4)
            +	MOVBU	t2,13(t4)
            +	MOVBU	t3,14(t4)
            +	MOVBU	s3,15(t4)
            +#endif
            +	RET
            diff --git a/src/crypto/aes/cipher_arm.go b/src/crypto/aes/cipher_arm.go
            new file mode 100644
            index 0000000..d501c10
            --- /dev/null
            +++ b/src/crypto/aes/cipher_arm.go
            @@ -0,0 +1,82 @@
            +// Copyright 2017 The Go Authors. All rights reserved.
            +// Use of this source code is governed by a BSD-style
            +// license that can be found in the LICENSE file.
            +
            +package aes
            +
            +import (
            +	"crypto/cipher"
            +)
            +
            +// defined in asm_arm.s
            +func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
            +func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
            +func expandKeyEncAsm(nr int, userKey *byte, enc *uint32)
            +func expandKeyDecAsm(nr int, enc *uint32, dec *uint32)
            +
            +type aesCipherAsm struct {
            +	aesCipher
            +}
            +
            +func newCipher(key []byte) (cipher.Block, error) {
            +	n := len(key) + 28
            +	c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
            +	rounds := 10
            +	switch len(key) {
            +	case 128 / 8:
            +		rounds = 10
            +	case 192 / 8:
            +		rounds = 12
            +	case 256 / 8:
            +		rounds = 14
            +	}
            +	expandKeyEncAsm(rounds, &key[0], &c.enc[0])
            +	expandKeyDecAsm(rounds, &c.enc[0], &c.dec[0])
            +	return &c, nil
            +}
            +
            +func (c *aesCipherAsm) BlockSize() int { return BlockSize }
            +
            +func (c *aesCipherAsm) Encrypt(dst, src []byte) {
            +	if len(src) < BlockSize {
            +		panic("crypto/aes: input not full block")
            +	}
            +	if len(dst) < BlockSize {
            +		panic("crypto/aes: output not full block")
            +	}
            +	encryptBlockAsm(len(c.enc)/4-2, &c.enc[0], &dst[0], &src[0])
            +}
            +
            +func (c *aesCipherAsm) Decrypt(dst, src []byte) {
            +	if len(src) < BlockSize {
            +		panic("crypto/aes: input not full block")
            +	}
            +	if len(dst) < BlockSize {
            +		panic("crypto/aes: output not full block")
            +	}
            +	decryptBlockAsm(len(c.dec)/4-2, &c.dec[0], &dst[0], &src[0])
            +}
            +
            +// expandKey is used by BenchmarkExpand to ensure that the asm implementation
            +// of key expansion is used for the benchmark when it is available.
            +func expandKey(key []byte, enc, dec []uint32) {
            +	rounds := 10 // rounds needed for AES128
            +	switch len(key) {
            +	case 192 / 8:
            +		rounds = 12
            +	case 256 / 8:
            +		rounds = 14
            +	}
            +	expandKeyEncAsm(rounds, &key[0], &enc[0])
            +	if dec != nil {
            +		expandKeyDecAsm(rounds, &enc[0], &dec[0])
            +	}
            +}
            +
            +// rcon table used by asm_arm.s
            +var rcon = [16]uint32{
            +	0x01000000, 0x02000000, 0x04000000, 0x08000000,
            +	0x10000000, 0x20000000, 0x40000000, 0x80000000,
            +	0x1B000000, 0x36000000, 0, 0,
            +	0, 0, 0, 0,
            +}
            diff --git a/src/crypto/aes/cipher_generic.go b/src/crypto/aes/cipher_generic.go
            index ca74aa8..411adc8 100644
            --- a/src/crypto/aes/cipher_generic.go
            +++ b/src/crypto/aes/cipher_generic.go
            @@ -2,7 +2,7 @@
             // Use of this source code is governed by a BSD-style
             // license that can be found in the LICENSE file.
             
            -// +build !amd64,!s390x,!ppc64le
            +// +build !amd64,!s390x,!ppc64le,!arm
             
             package aes
             
            

            To view, visit change 38366. To unsubscribe, visit settings.

            Gerrit-Project: go
            Gerrit-Branch: master
            Gerrit-MessageType: newpatchset
            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
            Gerrit-Change-Number: 38366
            Gerrit-PatchSet: 3

            Brad Fitzpatrick (Gerrit)

            unread,
            Mar 22, 2017, 5:47:16 PM3/22/17
            to Nick Craig-Wood, Brad Fitzpatrick, Gobot Gobot, golang-co...@googlegroups.com

            Brad Fitzpatrick posted comments on this change.

            View Change

            Patch set 3:Run-TryBot +1

              To view, visit change 38366. To unsubscribe, visit settings.

              Gerrit-Project: go
              Gerrit-Branch: master
              Gerrit-MessageType: comment
              Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
              Gerrit-Change-Number: 38366
              Gerrit-PatchSet: 3
              Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
              Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
              Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
              Gerrit-Comment-Date: Wed, 22 Mar 2017 21:47:14 +0000
              Gerrit-HasComments: No

              Gobot Gobot (Gerrit)

              unread,
              Mar 22, 2017, 5:47:28 PM3/22/17
              to Nick Craig-Wood, Brad Fitzpatrick, golang-co...@googlegroups.com

              Gobot Gobot posted comments on this change.

              View Change

              Patch set 3:

              TryBots beginning. Status page: http://farmer.golang.org/try?commit=24efb630

                To view, visit change 38366. To unsubscribe, visit settings.

                Gerrit-Project: go
                Gerrit-Branch: master
                Gerrit-MessageType: comment
                Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                Gerrit-Change-Number: 38366
                Gerrit-PatchSet: 3
                Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                Gerrit-Comment-Date: Wed, 22 Mar 2017 21:47:26 +0000
                Gerrit-HasComments: No

                Gobot Gobot (Gerrit)

                unread,
                Mar 22, 2017, 5:50:54 PM3/22/17
                to Nick Craig-Wood, Brad Fitzpatrick, golang-co...@googlegroups.com

                Gobot Gobot posted comments on this change.

                View Change

                Patch set 3:

                Build is still in progress... This change failed on misc-compile: See https://storage.googleapis.com/go-build-log/24efb630/misc-compile_f8882959.log

                Consult https://build.golang.org/ to see whether it's a new failure. Other builds still in progress; subsequent failure notices suppressed until final report.

                  To view, visit change 38366. To unsubscribe, visit settings.

                  Gerrit-Project: go
                  Gerrit-Branch: master
                  Gerrit-MessageType: comment
                  Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                  Gerrit-Change-Number: 38366
                  Gerrit-PatchSet: 3
                  Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                  Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                  Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                  Gerrit-Comment-Date: Wed, 22 Mar 2017 21:50:51 +0000
                  Gerrit-HasComments: No

                  Gobot Gobot (Gerrit)

                  unread,
                  Mar 22, 2017, 5:55:04 PM3/22/17
                  to Nick Craig-Wood, Brad Fitzpatrick, golang-co...@googlegroups.com

                  Gobot Gobot posted comments on this change.

                  View Change

                  Patch set 3:TryBot-Result -1

                  1 of 17 TryBots failed: Failed on misc-compile: https://storage.googleapis.com/go-build-log/24efb630/misc-compile_f8882959.log

                  Consult https://build.golang.org/ to see whether they are new failures.

                    To view, visit change 38366. To unsubscribe, visit settings.

                    Gerrit-Project: go
                    Gerrit-Branch: master
                    Gerrit-MessageType: comment
                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                    Gerrit-Change-Number: 38366
                    Gerrit-PatchSet: 3
                    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                    Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                    Gerrit-Comment-Date: Wed, 22 Mar 2017 21:55:02 +0000
                    Gerrit-HasComments: No

                    Cherry Zhang (Gerrit)

                    unread,
                    Mar 22, 2017, 6:17:09 PM3/22/17
                    to Nick Craig-Wood, Gobot Gobot, Brad Fitzpatrick, golang-co...@googlegroups.com

                    Cherry Zhang posted comments on this change.

                    View Change

                    Patch set 3:

                    (3 comments)

                    To view, visit change 38366. To unsubscribe, visit settings.

                    Gerrit-Project: go
                    Gerrit-Branch: master
                    Gerrit-MessageType: comment
                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                    Gerrit-Change-Number: 38366
                    Gerrit-PatchSet: 3
                    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                    Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                    Gerrit-CC: Cherry Zhang <cher...@google.com>
                    Gerrit-Comment-Date: Wed, 22 Mar 2017 22:17:07 +0000
                    Gerrit-HasComments: Yes

                    Josselin Costanzi (Gerrit)

                    unread,
                    Mar 22, 2017, 6:57:59 PM3/22/17
                    to Nick Craig-Wood, Cherry Zhang, Gobot Gobot, Brad Fitzpatrick, golang-co...@googlegroups.com

                    Josselin Costanzi posted comments on this change.

                    View Change

                    Patch set 3:

                    (1 comment)

                    To view, visit change 38366. To unsubscribe, visit settings.

                    Gerrit-Project: go
                    Gerrit-Branch: master
                    Gerrit-MessageType: comment
                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                    Gerrit-Change-Number: 38366
                    Gerrit-PatchSet: 3
                    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                    Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                    Gerrit-CC: Cherry Zhang <cher...@google.com>
                    Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                    Gerrit-Comment-Date: Wed, 22 Mar 2017 22:57:56 +0000
                    Gerrit-HasComments: Yes

                    Nick Craig-Wood (Gerrit)

                    unread,
                    Mar 23, 2017, 5:59:51 AM3/23/17
                    to Josselin Costanzi, Cherry Zhang, Gobot Gobot, Brad Fitzpatrick, golang-co...@googlegroups.com

                    Nick Craig-Wood posted comments on this change.

                    View Change

                    Patch set 3:

                    (4 comments)

                      • In the inner loops nr+0(FP) is saved to and from the stack

                      • When dynamic linking, access of global variable will use R9

                      • The only globals that are in use are the fetching of the table addresses. These are all addresses from within the same package.

                        MOVW $·te0(SB), tbl

                        Would that clobber R9 when dynamic linking?

                        I can probably work around that with a bit of care.

                      • Also R9 is reserved on NaCl

                      • This is a definite problem - I either need to

                        • rewrite to not use R9
                        • not support NaCl

                        Rewriting to not use R9 would mean spilling another register to the stack. That diverges further from the carefully tuned OpenSSL assembler code but perhaps obeys the ARM assembler contract better...

                        What is your opinion?

                      • Ack

                    To view, visit change 38366. To unsubscribe, visit settings.

                    Gerrit-Project: go
                    Gerrit-Branch: master
                    Gerrit-MessageType: comment
                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                    Gerrit-Change-Number: 38366
                    Gerrit-PatchSet: 3
                    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                    Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                    Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-CC: Cherry Zhang <cher...@google.com>
                    Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                    Gerrit-Comment-Date: Thu, 23 Mar 2017 09:59:46 +0000
                    Gerrit-HasComments: Yes

                    Cherry Zhang (Gerrit)

                    unread,
                    Mar 23, 2017, 9:35:25 AM3/23/17
                    to Nick Craig-Wood, Josselin Costanzi, Gobot Gobot, Brad Fitzpatrick, golang-co...@googlegroups.com

                    Cherry Zhang posted comments on this change.

                    View Change

                    Patch set 3:

                    (2 comments)

                      • In the inner loops nr+0(FP) is saved to and from the stack

                      • Oh, it is spill to argument slot, not local slot. Thanks.

                      • Only loading the address is probably ok for R9. In this case R11 may be used as a temporary register and get clobbered though. Since your target register is also R11, it is probably ok. You can try it by passing -shared to the assembler.

                        Falling back to pure Go version on NaCl sounds good to me.

                    To view, visit change 38366. To unsubscribe, visit settings.

                    Gerrit-Project: go
                    Gerrit-Branch: master
                    Gerrit-MessageType: comment
                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                    Gerrit-Change-Number: 38366
                    Gerrit-PatchSet: 3
                    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                    Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                    Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-CC: Cherry Zhang <cher...@google.com>
                    Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                    Gerrit-Comment-Date: Thu, 23 Mar 2017 13:35:23 +0000
                    Gerrit-HasComments: Yes

                    Nick Craig-Wood (Gerrit)

                    unread,
                    Mar 23, 2017, 5:57:20 PM3/23/17
                    to Josselin Costanzi, Cherry Zhang, Gobot Gobot, Brad Fitzpatrick, golang-co...@googlegroups.com

                    Nick Craig-Wood posted comments on this change.

                    View Change

                    Patch set 3:

                    (4 comments)

                      • Well spotted - that should be 2016 rather than some time into the mysteriou

                      • Done

                      • Oh, it is spill to argument slot, not local slot. Thanks.

                        Done

                      • Only loading the address is probably ok for R9. In this case R11 may be use

                        I have excluded nacl from the build.

                        I've also looked at the assembly generated by go tool asm with and without -shared.

                        The linker appears to do the sensible thing to make it relocatable, converting

                             6b0:	e59f6268 	ldr	r6, [pc, #616]	; 0x920
                             6b4:	e59fb268 	ldr	r11, [pc, #616]	; 0x924

                        into

                             6b4:	e59f6280 	ldr	r6, [pc, #640]	; 0x93c
                             6b8:	e08f6006 	add	r6, r15, r6
                             6bc:	e59fb27c 	ldr	r11, [pc, #636]	; 0x940
                             6c0:	e08fb00b 	add	r11, r15, r11

                        So no use of any extra registers!

                      • Ack

                        Done

                    To view, visit change 38366. To unsubscribe, visit settings.

                    Gerrit-Project: go
                    Gerrit-Branch: master
                    Gerrit-MessageType: comment
                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                    Gerrit-Change-Number: 38366
                    Gerrit-PatchSet: 3
                    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                    Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                    Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                    Gerrit-CC: Cherry Zhang <cher...@google.com>
                    Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                    Gerrit-Comment-Date: Thu, 23 Mar 2017 21:57:16 +0000
                    Gerrit-HasComments: Yes

                    Nick Craig-Wood (Gerrit)

                    unread,
                    Mar 23, 2017, 5:58:44 PM3/23/17
                    to Gobot Gobot, Brad Fitzpatrick, Josselin Costanzi, Cherry Zhang, golang-co...@googlegroups.com

                    Nick Craig-Wood uploaded patch set #4 to this change.

                    View Change

                    crypto/aes: ARM assembly versions of encrypt, decrypt and expandKey
                    
                    ARM assembly for AES crypto adapted from openssl giving an
                    encrypt/decrypt speed up of 1.6-2.7x and a key scheduling speedup of
                    2.3-4.5x.
                    
                    Raspberry Pi 3 BCM2709 ARMv7 Processor rev 5 (v7l)
                    
                    name       old time/op    new time/op     delta
                    Encrypt-4    3.12µs ± 1%     1.13µs ± 2%   -63.86%  (p=0.000 n=20+20)
                    Decrypt-4    3.10µs ± 1%     1.21µs ± 2%   -60.91%  (p=0.000 n=20+20)
                    Expand-4     11.3µs ± 1%      2.5µs ± 1%   -78.11%  (p=0.000 n=16+16)
                    
                    name       old speed      new speed       delta
                    Encrypt-4  5.13MB/s ± 2%  14.18MB/s ± 2%  +176.58%  (p=0.000 n=20+20)
                    Decrypt-4  5.16MB/s ± 1%  13.19MB/s ± 2%  +155.78%  (p=0.000 n=20+20)
                    
                    Chrombook Samsung Exynos5 ARMv7 Processor rev 4 (v7l)
                    
                    name       old time/op    new time/op    delta
                    Encrypt-2     342ns ± 1%     210ns ± 1%  -38.80%  (p=0.000 n=16+20)
                    Decrypt-2     343ns ± 6%     209ns ± 2%  -39.16%  (p=0.000 n=17+18)
                    Expand-2     1.64µs ± 5%    0.70µs ± 1%  -57.29%  (p=0.000 n=17+19)
                    
                    name       old speed      new speed      delta
                    Encrypt-2  46.7MB/s ± 1%  76.2MB/s ± 1%  +63.35%  (p=0.000 n=16+20)
                    Decrypt-2  46.4MB/s ± 7%  76.4MB/s ± 2%  +64.75%  (p=0.000 n=18+18)
                    
                    Issue #4299
                    
                    Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                    ---
                    A src/crypto/aes/asm_arm.s
                    A src/crypto/aes/cipher_arm.go
                    M src/crypto/aes/cipher_generic.go
                    3 files changed, 931 insertions(+), 0 deletions(-)
                    
                    
                    diff --git a/src/crypto/aes/asm_arm.s b/src/crypto/aes/asm_arm.s
                    new file mode 100644
                    index 0000000..3fcbae4
                    --- /dev/null
                    +++ b/src/crypto/aes/asm_arm.s
                    @@ -0,0 +1,846 @@
                    +// Copyright 2017 The Go Authors. All rights reserved.
                    +// Use of this source code is governed by a BSD-style
                    +// license that can be found in the LICENSE file.
                    +
                    +// +build arm,!nacl
                    +
                    +// This is a derived work from OpenSSL of AES using assembly optimizations. The
                    +// original code was written by Andy Polyakov <ap...@openssl.org> and it's dual
                    +// licensed under OpenSSL and CRYPTOGAMS licenses depending on where you obtain
                    +// it. For further details see http://www.openssl.org/~appro/cryptogams/.
                    +
                    +// Original code can be found at the link bellow:
                    +// https://git.openssl.org/?p=openssl.git;a=blob;f=crypto/aes/asm/aes-armv4.pl
                    +
                    +// This code is based on crypto/aes/asm/aes-armv4.pl
                     version
                    +// 6aa36e8e5a062e31543e7796f0351ff9628832ce from 21 May 2016
                    +
                    +// Apart from assembler syntax and calling convention changes, the
                    +// major change needed was to spill one register to the stack as go
                    +// can't use R10
                    +
                    +// Note that we don't build for nacl since it needs R9
                    +
                    +// AES for ARMv4
                    +
                    +// January 2007.
                    +//
                    +// Code uses single 1K S-box and is >2 times faster than code generated
                    +// by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
                    +// allows to merge logical or arithmetic operation with shift or rotate
                    +// in one instruction and emit combined result every cycle. The module
                    +// is endian-neutral. The performance is ~42 cycles/byte for 128-bit
                    +// key [on single-issue Xscale PXA250 core].
                    +
                    +// May 2007.
                    +//
                    +// AES_set_[en|de]crypt_key is added.
                    +
                    +// July 2010.
                    +//
                    +// Rescheduling for dual-issue pipeline resulted in 12% improvement on
                    +// Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
                    +
                    +// February 2011.
                    +//
                    +// Profiler-assisted and platform-specific optimization resulted in 16%
                    +// improvement on Cortex A8 core and ~21.5 cycles per byte.
                    +
                    +#include "textflag.h"
                    +
                    +#define s0 R0
                    +#define s1 R1
                    +#define s2 R2
                    +#define s3 R3
                    +#define t1 R4
                    +#define t2 R5
                    +#define t3 R6
                    +#define i1 R7
                    +#define i2 R8
                    +#define i3 R9		// forbidden on nacl, check usage with -shared
                    +#define mask80 i1
                    +#define mask1b i2
                    +#define mask7f i3
                    +
                    +#define tbl R11		// can be used by the linker to synthesise instructions
                    +#define key R12
                    +#define t4 R14
                    +
                    +// #define ARM_ARCH_7 1
                    +
                    +// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
                    +TEXT	·encryptBlockAsm(SB), NOSPLIT, $4-16	// $4 here to save LR
                    +	MOVW	src+12(FP), t4
                    +	MOVW	$·te0(SB), tbl
                    +	MOVW	xk+4(FP), key
                    +
                    +#ifndef ARM_ARCH_7
                    +	MOVBU	3(t4),s0	// load input data in endian-neutral
                    +	MOVBU	2(t4),t1	// manner...
                    +	MOVBU	1(t4),t2
                    +	MOVBU	0(t4),t3
                    +	ORR	t1<<8,s0,s0
                    +	MOVBU	7(t4),s1
                    +	ORR	t2<<16,s0,s0
                    +	MOVBU	6(t4),t1
                    +	ORR	t3<<24,s0,s0
                    +	MOVBU	5(t4),t2
                    +	MOVBU	4(t4),t3
                    +	ORR	t1<<8,s1,s1
                    +	MOVBU	11(t4),s2
                    +	ORR	t2<<16,s1,s1
                    +	MOVBU	10(t4),t1
                    +	ORR	t3<<24,s1,s1
                    +	MOVBU	9(t4),t2
                    +	MOVBU	8(t4),t3
                    +	ORR	t1<<8,s2,s2
                    +	MOVBU	15(t4),s3
                    +	ORR	t2<<16,s2,s2
                    +	MOVBU	14(t4),t1
                    +	ORR	t3<<24,s2,s2
                    +	MOVBU	13(t4),t2
                    +	MOVBU	12(t4),t3
                    +	ORR	t1<<8,s3,s3
                    +	ORR	t2<<16,s3,s3
                    +	ORR	t3<<24,s3,s3
                    +#else
                    +	MOVW	0(t4),s0
                    +	MOVW	4(t4),s1
                    +	MOVW	8(t4),s2
                    +	MOVW	12(t4),s3
                    +	REV	s0,s0
                    +	REV	s1,s1
                    +	REV	s2,s2
                    +	REV	s3,s3
                    +#endif
                    +
                    +	MOVM.IA.W	(key),[t1-i1]
                    +	EOR	t1,s0,s0
                    +	EOR	t2,s1,s1
                    +	EOR	t3,s2,s2
                    +	EOR	i1,s3,s3
                    +	MOVW	$255,t4
                    +
                    +	AND	s0,t4,i1
                    +	AND	s0>>8,t4,i2
                    +	AND	s0>>16,t4,i3
                    +	MOVW	s0>>24,s0
                    +enc_loop:
                    +	MOVW	i1<<2(tbl),t1	// Te3[s0>>0]
                    +	AND	s1>>16,t4,i1	// i0
                    +	MOVW	i2<<2(tbl),t2	// Te2[s0>>8]
                    +	AND	s1,t4,i2
                    +	MOVW	i3<<2(tbl),t3	// Te1[s0>>16]
                    +	AND	s1>>8,t4,i3
                    +	MOVW	s0<<2(tbl),s0	// Te0[s0>>24]
                    +	MOVW	s1>>24,s1
                    +
                    +	MOVW	i1<<2(tbl),i1	// Te1[s1>>16]
                    +	MOVW	i2<<2(tbl),i2	// Te3[s1>>0]
                    +	MOVW	i3<<2(tbl),i3	// Te2[s1>>8]
                    +	EOR	i1@>8,s0,s0
                    +	MOVW	s1<<2(tbl),s1	// Te0[s1>>24]
                    +	AND	s2>>8,t4,i1	// i0
                    +	EOR	i2@>8,t2,t2
                    +	AND	s2>>16,t4,i2	// i1
                    +	EOR	i3@>8,t3,t3
                    +	AND	s2,t4,i3
                    +	MOVW	i1<<2(tbl),i1	// Te2[s2>>8]
                    +	EOR	t1@>24,s1,s1
                    +	MOVW	i2<<2(tbl),i2	// Te1[s2>>16]
                    +	MOVW	s2>>24,s2
                    +
                    +	MOVW	i3<<2(tbl),i3	// Te3[s2>>0]
                    +	EOR	i1@>16,s0,s0
                    +	MOVW	s2<<2(tbl),s2	// Te0[s2>>24]
                    +	AND	s3,t4,i1	// i0
                    +	EOR	i2@>8,s1,s1
                    +	AND	s3>>8,t4,i2	// i1
                    +	EOR	i3@>16,t3,t3
                    +	AND	s3>>16,t4,i3	// i2
                    +	MOVW	i1<<2(tbl),i1	// Te3[s3>>0]
                    +	EOR	t2@>16,s2,s2
                    +	MOVW	i2<<2(tbl),i2	// Te2[s3>>8]
                    +	MOVW	s3>>24,s3
                    +
                    +	MOVW	i3<<2(tbl),i3	// Te1[s3>>16]
                    +	EOR	i1@>24,s0,s0
                    +	MOVW.P	16(key),i1
                    +	EOR	i2@>16,s1,s1
                    +	MOVW	s3<<2(tbl),s3	// Te0[s3>>24]
                    +	EOR	i3@>8,s2,s2
                    +	MOVW	-12(key),t1
                    +	EOR	t3@>8,s3,s3
                    +
                    +	MOVW	-8(key),t2
                    +	EOR	i1,s0,s0
                    +	MOVW	-4(key),t3
                    +	AND	s0,t4,i1
                    +	EOR	t1,s1,s1
                    +	MOVW	nr+0(FP), t1
                    +	AND	s0>>8,t4,i2
                    +	EOR	t2,s2,s2
                    +	AND	s0>>16,t4,i3
                    +	EOR	t3,s3,s3
                    +	SUB.S	$1,t1,t1
                    +	MOVW	s0>>24,s0
                    +
                    +	MOVW	t1, nr+0(FP)
                    +	BGT	enc_loop
                    +
                    +	ADD	$2,tbl,tbl
                    +
                    +	MOVBU	i1<<2(tbl),t1	// Te4[s0>>0]
                    +	AND	s1>>16,t4,i1	// i0
                    +	MOVBU	i2<<2(tbl),t2	// Te4[s0>>8]
                    +	AND	s1,t4,i2
                    +	MOVBU	i3<<2(tbl),t3	// Te4[s0>>16]
                    +	AND	s1>>8,t4,i3
                    +	MOVBU	s0<<2(tbl),s0	// Te4[s0>>24]
                    +	MOVW	s1>>24,s1
                    +
                    +	MOVBU	i1<<2(tbl),i1	// Te4[s1>>16]
                    +	MOVBU	i2<<2(tbl),i2	// Te4[s1>>0]
                    +	MOVBU	i3<<2(tbl),i3	// Te4[s1>>8]
                    +	EOR	s0<<8,i1,s0
                    +	MOVBU	s1<<2(tbl),s1	// Te4[s1>>24]
                    +	AND	s2>>8,t4,i1	// i0
                    +	EOR	t2<<8,i2,t2
                    +	AND	s2>>16,t4,i2	// i1
                    +	EOR	t3<<8,i3,t3
                    +	AND	s2,t4,i3
                    +	MOVBU	i1<<2(tbl),i1	// Te4[s2>>8]
                    +	EOR	s1<<24,t1,s1
                    +	MOVBU	i2<<2(tbl),i2	// Te4[s2>>16]
                    +	MOVW	s2>>24,s2
                    +
                    +	MOVBU	i3<<2(tbl),i3	// Te4[s2>>0]
                    +	EOR	s0<<8,i1,s0
                    +	MOVBU	s2<<2(tbl),s2	// Te4[s2>>24]
                    +	AND	s3,t4,i1	// i0
                    +	EOR	i2<<16,s1,s1
                    +	AND	s3>>8,t4,i2	// i1
                    +	EOR	t3<<8,i3,t3
                    +	AND	s3>>16,t4,i3	// i2
                    +	MOVBU	i1<<2(tbl),i1	// Te4[s3>>0]
                    +	EOR	s2<<24,t2,s2
                    +	MOVBU	i2<<2(tbl),i2	// Te4[s3>>8]
                    +	MOVW	s3>>24,s3
                    +
                    +	MOVBU	i3<<2(tbl),i3	// Te4[s3>>16]
                    +	EOR	s0<<8,i1,s0
                    +	MOVW	0(key),i1
                    +	MOVBU	s3<<2(tbl),s3	// Te4[s3>>24]
                    +	EOR	i2<<8,s1,s1
                    +	MOVW	4(key),t1
                    +	EOR	i3<<16,s2,s2
                    +	MOVW	8(key),t2
                    +	EOR	s3<<24,t3,s3
                    +	MOVW	12(key),t3
                    +
                    +	EOR	i1,s0,s0
                    +	EOR	t1,s1,s1
                    +	EOR	t2,s2,s2
                    +	EOR	t3,s3,s3
                    +
                    +	MOVW	dst+8(FP), t4
                    +
                    +#ifdef ARM_ARCH_7
                    +	REV	s0,s0
                    +	REV	s1,s1
                    +	REV	s2,s2
                    +	REV	s3,s3
                    +	MOVW	s0,0(t4)
                    +	MOVW	s1,4(t4)
                    +	MOVW	s2,8(t4)
                    +	MOVW	s3,12(t4)
                    +#else
                    +	MOVW	s0>>24,t1	// write output in endian-neutral
                    +	MOVW	s0>>16,t2	// manner...
                    +	MOVW	s0>>8,t3
                    +	MOVBU	t1,0(t4)
                    +	MOVBU	t2,1(t4)
                    +	MOVW	s1>>24,t1
                    +	MOVBU	t3,2(t4)
                    +	MOVW	s1>>16,t2
                    +	MOVBU	s0,3(t4)
                    +	MOVW	s1>>8,t3
                    +	MOVBU	t1,4(t4)
                    +	MOVBU	t2,5(t4)
                    +	MOVW	s2>>24,t1
                    +	MOVBU	t3,6(t4)
                    +	MOVW	s2>>16,t2
                    +	MOVBU	s1,7(t4)
                    +	MOVW	s2>>8,t3
                    +	MOVBU	t1,8(t4)
                    +	MOVBU	t2,9(t4)
                    +	MOVW	s3>>24,t1
                    +	MOVBU	t3,10(t4)
                    +	MOVW	s3>>16,t2
                    +	MOVBU	s2,11(t4)
                    +	MOVW	s3>>8,t3
                    +	MOVBU	t1,12(t4)
                    +	MOVBU	t2,13(t4)
                    +	MOVBU	t3,14(t4)
                    +	MOVBU	s3,15(t4)
                    +#endif
                    +	RET
                    +
                    +// func expandKeyEncAsm(nr int, userKey *byte, enc *uint32)
                    +TEXT	·expandKeyEncAsm(SB), NOSPLIT, $4-12	// $4 here to save LR
                    +	MOVW	userKey+4(FP), t4	// inp
                    +	MOVW	nr+0(FP), tbl		// rounds
                    +	MOVW	enc+8(FP), key
                    +
                    +#ifndef ARM_ARCH_7
                    +	MOVBU	3(t4),s0	// load input data in endian-neutral
                    +	MOVBU	2(t4),t1	// manner...
                    +	MOVBU	1(t4),t2
                    +	MOVBU	0(t4),t3
                    +	ORR	t1<<8,s0,s0
                    +	MOVBU	7(t4),s1
                    +	ORR	t2<<16,s0,s0
                    +	MOVBU	6(t4),t1
                    +	ORR	t3<<24,s0,s0
                    +	MOVBU	5(t4),t2
                    +	MOVBU	4(t4),t3
                    +	ORR	t1<<8,s1,s1
                    +	MOVBU	11(t4),s2
                    +	ORR	t2<<16,s1,s1
                    +	MOVBU	10(t4),t1
                    +	ORR	t3<<24,s1,s1
                    +	MOVBU	9(t4),t2
                    +	MOVBU	8(t4),t3
                    +	ORR	t1<<8,s2,s2
                    +	MOVBU	15(t4),s3
                    +	ORR	t2<<16,s2,s2
                    +	MOVBU	14(t4),t1
                    +	ORR	t3<<24,s2,s2
                    +	MOVBU	13(t4),t2
                    +	MOVBU	12(t4),t3
                    +	ORR	t1<<8,s3,s3
                    +	MOVW.P	s0,16(key)
                    +	ORR	t2<<16,s3,s3
                    +	MOVW	s1,-12(key)
                    +	ORR	t3<<24,s3,s3
                    +	MOVW	s2,-8(key)
                    +	MOVW	s3,-4(key)
                    +#else
                    +	MOVW	0(t4),s0
                    +	MOVW	4(t4),s1
                    +	MOVW	8(t4),s2
                    +	MOVW	12(t4),s3
                    +	REV	s0,s0
                    +	REV	s1,s1
                    +	REV	s2,s2
                    +	REV	s3,s3
                    +	MOVW.P	s0,16(key)
                    +	MOVW	s1,-12(key)
                    +	MOVW	s2,-8(key)
                    +	MOVW	s3,-4(key)
                    +#endif
                    +
                    +	TEQ	$10,tbl
                    +	BNE	ek_not128
                    +	MOVW	$·rcon(SB), t3
                    +	MOVW	$·sbox0(SB), tbl	// Te4
                    +	MOVW	$255,t4
                    +
                    +ek_128_loop:
                    +	AND	s3>>24,t4,t2
                    +	AND	s3>>16,t4,i1
                    +	MOVBU	t2<<0(tbl),t2
                    +	AND	s3>>8,t4,i2
                    +	MOVBU	i1<<0(tbl),i1
                    +	AND	s3,t4,i3
                    +	MOVBU	i2<<0(tbl),i2
                    +	ORR	i1<<24,t2,t2
                    +	MOVBU	i3<<0(tbl),i3
                    +	ORR	i2<<16,t2,t2
                    +	MOVW.P	4(t3),t1	// rcon[i++]
                    +	ORR	i3<<8,t2,t2
                    +	EOR	t1,t2,t2
                    +	MOVW	nr+0(FP), t1
                    +	EOR	t2,s0,s0	// rk[4]=rk[0]^...
                    +	EOR	s0,s1,s1	// rk[5]=rk[1]^rk[4]
                    +	MOVW.P	s0,16(key)
                    +	EOR	s1,s2,s2	// rk[6]=rk[2]^rk[5]
                    +	MOVW	s1,-12(key)
                    +	EOR	s2,s3,s3	// rk[7]=rk[3]^rk[6]
                    +	SUB.S	$1,t1,t1
                    +	MOVW	s2,-8(key)
                    +	MOVW	t1, nr+0(FP)
                    +	MOVW	s3,-4(key)
                    +	BNE	ek_128_loop
                    +	SUB	$176,key,R2
                    +	B	ek_done
                    +
                    +ek_not128:
                    +#ifndef ARM_ARCH_7
                    +	MOVBU	19(t4),i2
                    +	MOVBU	18(t4),t1
                    +	MOVBU	17(t4),t2
                    +	MOVBU	16(t4),t3
                    +	ORR	t1<<8,i2,i2
                    +	MOVBU	23(t4),i3
                    +	ORR	t2<<16,i2,i2
                    +	MOVBU	22(t4),t1
                    +	ORR	t3<<24,i2,i2
                    +	MOVBU	21(t4),t2
                    +	MOVBU	20(t4),t3
                    +	ORR	t1<<8,i3,i3
                    +	ORR	t2<<16,i3,i3
                    +	MOVW.P	i2,8(key)
                    +	ORR	t3<<24,i3,i3
                    +	MOVW	i3,-4(key)
                    +#else
                    +	MOVW	16(t4),i2
                    +	MOVW	20(t4),i3
                    +	REV	i2,i2
                    +	REV	i3,i3
                    +	MOVW.P	i2,8(key)
                    +	MOVW	i3,-4(key)
                    +#endif
                    +
                    +	TEQ	$12,tbl
                    +	BNE	ek_not192
                    +	MOVW	$·sbox0(SB), tbl	// Te4
                    +	MOVW	$·rcon(SB), t3
                    +	MOVW	$8,t1
                    +	MOVW	$255,t4
                    +	MOVW	t1, nr+0(FP)
                    +
                    +ek_192_loop:
                    +	AND	i3>>24,t4,t2
                    +	AND	i3>>16,t4,i1
                    +	MOVBU	t2<<0(tbl),t2
                    +	AND	i3>>8,t4,i2
                    +	MOVBU	i1<<0(tbl),i1
                    +	AND	i3,t4,i3
                    +	MOVBU	i2<<0(tbl),i2
                    +	ORR	i1<<24,t2,t2
                    +	MOVBU	i3<<0(tbl),i3
                    +	ORR	i2<<16,t2,t2
                    +	MOVW.P	4(t3),t1	// rcon[i++]
                    +	ORR	i3<<8,t2,t2
                    +	EOR	t1,t2,i3
                    +	MOVW	nr+0(FP), t1
                    +	EOR	i3,s0,s0	// rk[6]=rk[0]^...
                    +	EOR	s0,s1,s1	// rk[7]=rk[1]^rk[6]
                    +	MOVW.P	s0,24(key)
                    +	EOR	s1,s2,s2	// rk[8]=rk[2]^rk[7]
                    +	MOVW	s1,-20(key)
                    +	EOR	s2,s3,s3	// rk[9]=rk[3]^rk[8]
                    +	SUB.S	$1,t1,t1
                    +	MOVW	s2,-16(key)
                    +	MOVW	t1, nr+0(FP)
                    +	MOVW	s3,-12(key)
                    +	BEQ	ek_done
                    +
                    +	MOVW	-32(key),i1
                    +	MOVW	-28(key),i2
                    +	EOR	s3,i1,i1	// rk[10]=rk[4]^rk[9]
                    +	EOR	i1,i2,i3	// rk[11]=rk[5]^rk[10]
                    +	MOVW	i1,-8(key)
                    +	MOVW	i3,-4(key)
                    +	B	ek_192_loop
                    +
                    +ek_not192:
                    +#ifndef ARM_ARCH_7
                    +	MOVBU	27(t4),i2
                    +	MOVBU	26(t4),t1
                    +	MOVBU	25(t4),t2
                    +	MOVBU	24(t4),t3
                    +	ORR	t1<<8,i2,i2
                    +	MOVBU	31(t4),i3
                    +	ORR	t2<<16,i2,i2
                    +	MOVBU	30(t4),t1
                    +	ORR	t3<<24,i2,i2
                    +	MOVBU	29(t4),t2
                    +	MOVBU	28(t4),t3
                    +	ORR	t1<<8,i3,i3
                    +	ORR	t2<<16,i3,i3
                    +	MOVW.P	i2,8(key)
                    +	ORR	t3<<24,i3,i3
                    +	MOVW	i3,-4(key)
                    +#else
                    +	MOVW	24(t4),i2
                    +	MOVW	28(t4),i3
                    +	REV	i2,i2
                    +	REV	i3,i3
                    +	MOVW.P	i2,8(key)
                    +	MOVW	i3,-4(key)
                    +#endif
                    +
                    +	MOVW	$·sbox0(SB), tbl	// Te4
                    +	MOVW	$·rcon(SB), t3
                    +	MOVW	$7,t1
                    +	MOVW	$255,t4
                    +	MOVW	t1, nr+0(FP)
                    +
                    +ek_256_loop:
                    +	AND	i3>>24,t4,t2
                    +	AND	i3>>16,t4,i1
                    +	MOVBU	t2<<0(tbl),t2
                    +	AND	i3>>8,t4,i2
                    +	MOVBU	i1<<0(tbl),i1
                    +	AND	i3,t4,i3
                    +	MOVBU	i2<<0(tbl),i2
                    +	ORR	i1<<24,t2,t2
                    +	MOVBU	i3<<0(tbl),i3
                    +	ORR	i2<<16,t2,t2
                    +	MOVW.P	4(t3),t1	// rcon[i++]
                    +	ORR	i3<<8,t2,t2
                    +	EOR	t1,t2,i3
                    +	MOVW	nr+0(FP), t1
                    +	EOR	i3,s0,s0	// rk[8]=rk[0]^...
                    +	EOR	s0,s1,s1	// rk[9]=rk[1]^rk[8]
                    +	MOVW.P	s0,32(key)
                    +	EOR	s1,s2,s2	// rk[10]=rk[2]^rk[9]
                    +	MOVW	s1,-28(key)
                    +	EOR	s2,s3,s3	// rk[11]=rk[3]^rk[10]
                    +	SUB.S	$1,t1,t1
                    +	MOVW	s2,-24(key)
                    +	MOVW	t1, nr+0(FP)
                    +	MOVW	s3,-20(key)
                    +	BEQ	ek_done
                    +
                    +	AND	s3,t4,t2
                    +	AND	s3>>8,t4,i1
                    +	MOVBU	t2<<0(tbl),t2
                    +	AND	s3>>16,t4,i2
                    +	MOVBU	i1<<0(tbl),i1
                    +	AND	s3>>24,t4,i3
                    +	MOVBU	i2<<0(tbl),i2
                    +	ORR	i1<<8,t2,t2
                    +	MOVBU	i3<<0(tbl),i3
                    +	ORR	i2<<16,t2,t2
                    +	MOVW	-48(key),t1
                    +	ORR	i3<<24,t2,t2
                    +
                    +	MOVW	-44(key),i1
                    +	MOVW	-40(key),i2
                    +	EOR	t2,t1,t1	// rk[12]=rk[4]^...
                    +	MOVW	-36(key),i3
                    +	EOR	t1,i1,i1	// rk[13]=rk[5]^rk[12]
                    +	MOVW	t1,-16(key)
                    +	EOR	i1,i2,i2	// rk[14]=rk[6]^rk[13]
                    +	MOVW	i1,-12(key)
                    +	EOR	i2,i3,i3	// rk[15]=rk[7]^rk[14]
                    +	MOVW	i2,-8(key)
                    +	MOVW	i3,-4(key)
                    +	B	ek_256_loop
                    +
                    +ek_done:
                    +	RET
                    +
                    +// func expandKeyDecAsm(nr int, enc *uint32, dec *uint32)
                    +TEXT	·expandKeyDecAsm(SB), NOSPLIT, $4-12	// $4 here to save LR
                    +	MOVW	nr+0(FP), t4	// rounds
                    +	MOVW	enc+4(FP), i1	// input
                    +	ADD	t4<<4,i1,i2
                    +	MOVW	dec+8(FP), key	// output
                    +	ADD	t4<<4,key,tbl
                    +
                    +dk_inv:	MOVW.P	16(i1),s0
                    +	MOVW	-12(i1),s1
                    +	MOVW	-8(i1),s2
                    +	MOVW	-4(i1),s3
                    +	MOVW.P	-16(i2),t1
                    +	MOVW	16+4(i2),t2
                    +	MOVW	16+8(i2),t3
                    +	MOVW	16+12(i2),i3
                    +	MOVW.P	s0,-16(tbl)
                    +	MOVW	s1,16+4(tbl)
                    +	MOVW	s2,16+8(tbl)
                    +	MOVW	s3,16+12(tbl)
                    +	MOVW.P	t1,16(key)
                    +	MOVW	t2,-12(key)
                    +	MOVW	t3,-8(key)
                    +	MOVW	i3,-4(key)
                    +	TEQ	i2,i1
                    +	BNE	dk_inv
                    +
                    +	MOVW	(i1),s0
                    +	MOVW	4(i1),s1
                    +	MOVW	8(i1),s2
                    +	MOVW	12(i1),s3
                    +	MOVW	s0,(key)
                    +	MOVW	s1,4(key)
                    +	MOVW	s2,8(key)
                    +	MOVW	s3,12(key)
                    +	SUB	t4<<3,key,key
                    +
                    +	MOVW.W	16(key),s0	// prefetch tp1
                    +	MOVW	$0x80,mask80
                    +	MOVW	$0x1b,mask1b
                    +	ORR	$0x8000,mask80,mask80
                    +	ORR	$0x1b00,mask1b,mask1b
                    +	ORR	mask80<<16,mask80,mask80
                    +	ORR	mask1b<<16,mask1b,mask1b
                    +	SUB	$1,t4,t4
                    +	MVN	mask80,mask7f
                    +	MOVW	t4<<2,t4	// (rounds-1)*4
                    +
                    +dk_mix:	AND	mask80,s0,t1
                    +	AND	mask7f,s0,s1
                    +	SUB	t1>>7,t1,t1
                    +	AND	mask1b,t1,t1
                    +	EOR	s1<<1,t1,s1	// tp2
                    +
                    +	AND	mask80,s1,t1
                    +	AND	mask7f,s1,s2
                    +	SUB	t1>>7,t1,t1
                    +	AND	mask1b,t1,t1
                    +	EOR	s2<<1,t1,s2	// tp4
                    +
                    +	AND	mask80,s2,t1
                    +	AND	mask7f,s2,s3
                    +	SUB	t1>>7,t1,t1
                    +	AND	mask1b,t1,t1
                    +	EOR	s3<<1,t1,s3	// tp8
                    +
                    +	EOR	s2,s1,t1
                    +	EOR	s3,s0,t2	// tp9
                    +	EOR	s3,t1,t1	// tpe
                    +	EOR	s1@>24,t1,t1
                    +	EOR	t2@>24,t1,t1	// ^= ROTATE(tpb=tp9^tp2,8)
                    +	EOR	s2@>16,t1,t1
                    +	EOR	t2@>16,t1,t1	// ^= ROTATE(tpd=tp9^tp4,16)
                    +	EOR	t2@>8,t1,t1	// ^= ROTATE(tp9,24)
                    +
                    +	MOVW	4(key),s0	// prefetch tp1
                    +	MOVW.P	t1,4(key)
                    +	SUB.S	$1,t4,t4
                    +	BNE	dk_mix
                    +
                    +	RET
                    +
                    +// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
                    +TEXT	·decryptBlockAsm(SB), NOSPLIT, $4-16	// $4 here to save LR
                    +	MOVW	src+12(FP), t4
                    +	MOVW	$·td0(SB), tbl
                    +	MOVW	xk+4(FP), key
                    +
                    +#ifndef ARM_ARCH_7
                    +	MOVBU	3(t4),s0	// load input data in endian-neutral
                    +	MOVBU	2(t4),t1	// manner...
                    +	MOVBU	1(t4),t2
                    +	MOVBU	0(t4),t3
                    +	ORR	t1<<8,s0,s0
                    +	MOVBU	7(t4),s1
                    +	ORR	t2<<16,s0,s0
                    +	MOVBU	6(t4),t1
                    +	ORR	t3<<24,s0,s0
                    +	MOVBU	5(t4),t2
                    +	MOVBU	4(t4),t3
                    +	ORR	t1<<8,s1,s1
                    +	MOVBU	11(t4),s2
                    +	ORR	t2<<16,s1,s1
                    +	MOVBU	10(t4),t1
                    +	ORR	t3<<24,s1,s1
                    +	MOVBU	9(t4),t2
                    +	MOVBU	8(t4),t3
                    +	ORR	t1<<8,s2,s2
                    +	MOVBU	15(t4),s3
                    +	ORR	t2<<16,s2,s2
                    +	MOVBU	14(t4),t1
                    +	ORR	t3<<24,s2,s2
                    +	MOVBU	13(t4),t2
                    +	MOVBU	12(t4),t3
                    +	ORR	t1<<8,s3,s3
                    +	ORR	t2<<16,s3,s3
                    +	ORR	t3<<24,s3,s3
                    +#else
                    +	MOVW	0(t4),s0
                    +	MOVW	4(t4),s1
                    +	MOVW	8(t4),s2
                    +	MOVW	12(t4),s3
                    +	REV	s0,s0
                    +	REV	s1,s1
                    +	REV	s2,s2
                    +	REV	s3,s3
                    +#endif
                    +
                    +	MOVM.IA.W	(key),[t1-i1]
                    +	EOR	t1,s0,s0
                    +	MOVW	240-16(key),t4
                    +	EOR	t2,s1,s1
                    +	EOR	t3,s2,s2
                    +	EOR	i1,s3,s3
                    +	SUB	$1,t4,t4
                    +	MOVW	$255,t4
                    +
                    +	AND	s0>>16,t4,i1
                    +	AND	s0>>8,t4,i2
                    +	AND	s0,t4,i3
                    +	MOVW	s0>>24,s0
                    +dec_loop:
                    +	MOVW	i1<<2(tbl),t1	// Td1[s0>>16]
                    +	AND	s1,t4,i1	// i0
                    +	MOVW	i2<<2(tbl),t2	// Td2[s0>>8]
                    +	AND	s1>>16,t4,i2
                    +	MOVW	i3<<2(tbl),t3	// Td3[s0>>0]
                    +	AND	s1>>8,t4,i3
                    +	MOVW	s0<<2(tbl),s0	// Td0[s0>>24]
                    +	MOVW	s1>>24,s1
                    +
                    +	MOVW	i1<<2(tbl),i1	// Td3[s1>>0]
                    +	MOVW	i2<<2(tbl),i2	// Td1[s1>>16]
                    +	MOVW	i3<<2(tbl),i3	// Td2[s1>>8]
                    +	EOR	i1@>24,s0,s0
                    +	MOVW	s1<<2(tbl),s1	// Td0[s1>>24]
                    +	AND	s2>>8,t4,i1	// i0
                    +	EOR	t2@>8,i2,t2
                    +	AND	s2,t4,i2	// i1
                    +	EOR	t3@>8,i3,t3
                    +	AND	s2>>16,t4,i3
                    +	MOVW	i1<<2(tbl),i1	// Td2[s2>>8]
                    +	EOR	t1@>8,s1,s1
                    +	MOVW	i2<<2(tbl),i2	// Td3[s2>>0]
                    +	MOVW	s2>>24,s2
                    +
                    +	MOVW	i3<<2(tbl),i3	// Td1[s2>>16]
                    +	EOR	i1@>16,s0,s0
                    +	MOVW	s2<<2(tbl),s2	// Td0[s2>>24]
                    +	AND	s3>>16,t4,i1	// i0
                    +	EOR	i2@>24,s1,s1
                    +	AND	s3>>8,t4,i2	// i1
                    +	EOR	t3@>8,i3,t3
                    +	AND	s3,t4,i3	// i2
                    +	MOVW	i1<<2(tbl),i1	// Td1[s3>>16]
                    +	EOR	t2@>8,s2,s2
                    +	MOVW	i2<<2(tbl),i2	// Td2[s3>>8]
                    +	MOVW	s3>>24,s3
                    +
                    +	MOVW	i3<<2(tbl),i3	// Td3[s3>>0]
                    +	EOR	i1@>8,s0,s0
                    +	MOVW.P	16(key),i1
                    +	EOR	i2@>16,s1,s1
                    +	MOVW	s3<<2(tbl),s3	// Td0[s3>>24]
                    +	EOR	i3@>24,s2,s2
                    +
                    +	MOVW	-12(key),t1
                    +	EOR	i1,s0,s0
                    +	MOVW	-8(key),t2
                    +	EOR	t3@>8,s3,s3
                    +	MOVW	-4(key),t3
                    +	AND	s0>>16,t4,i1
                    +	EOR	t1,s1,s1
                    +	MOVW	nr+0(FP), t1
                    +	AND	s0>>8,t4,i2
                    +	EOR	t2,s2,s2
                    +	AND	s0,t4,i3
                    +	EOR	t3,s3,s3
                    +	SUB.S	$1,t1,t1
                    +	MOVW	s0>>24,s0
                    +
                    +	MOVW	t1, nr+0(FP)
                    +	BGT	dec_loop
                    +
                    +	MOVW	$·sbox1(SB),tbl
                    +
                    +	MOVW	0(tbl),t2	// prefetch Td4
                    +	MOVW	32(tbl),t3
                    +	MOVW	64(tbl),t1
                    +	MOVW	96(tbl),t2
                    +	MOVW	128(tbl),t3
                    +	MOVW	160(tbl),t1
                    +	MOVW	192(tbl),t2
                    +	MOVW	224(tbl),t3
                    +
                    +	MOVBU	s0<<0(tbl),s0	// Td4[s0>>24]
                    +	MOVBU	i1<<0(tbl),t1	// Td4[s0>>16]
                    +	AND	s1,t4,i1	// i0
                    +	MOVBU	i2<<0(tbl),t2	// Td4[s0>>8]
                    +	AND	s1>>16,t4,i2
                    +	MOVBU	i3<<0(tbl),t3	// Td4[s0>>0]
                    +	AND	s1>>8,t4,i3
                    +
                    +	ADD	s1>>24,tbl,s1
                    +	MOVBU	i1<<0(tbl),i1	// Td4[s1>>0]
                    +	MOVBU	(s1),s1		// Td4[s1>>24]
                    +	MOVBU	i2<<0(tbl),i2	// Td4[s1>>16]
                    +	EOR	s0<<24,i1,s0
                    +	MOVBU	i3<<0(tbl),i3	// Td4[s1>>8]
                    +	EOR	s1<<8,t1,s1
                    +	AND	s2>>8,t4,i1	// i0
                    +	EOR	i2<<8,t2,t2
                    +	AND	s2,t4,i2	// i1
                    +	MOVBU	i1<<0(tbl),i1	// Td4[s2>>8]
                    +	EOR	i3<<8,t3,t3
                    +	MOVBU	i2<<0(tbl),i2	// Td4[s2>>0]
                    +	AND	s2>>16,t4,i3
                    +
                    +	ADD	s2>>24,tbl,s2
                    +	MOVBU	(s2),s2		// Td4[s2>>24]
                    +	EOR	i1<<8,s0,s0
                    +	MOVBU	i3<<0(tbl),i3	// Td4[s2>>16]
                    +	EOR	s1<<16,i2,s1
                    +	AND	s3>>16,t4,i1	// i0
                    +	EOR	s2<<16,t2,s2
                    +	AND	s3>>8,t4,i2	// i1
                    +	MOVBU	i1<<0(tbl),i1	// Td4[s3>>16]
                    +	EOR	i3<<16,t3,t3
                    +	MOVBU	i2<<0(tbl),i2	// Td4[s3>>8]
                    +	AND	s3,t4,i3	// i2
                    +
                    +	ADD	s3>>24,tbl,s3
                    +	MOVBU	i3<<0(tbl),i3	// Td4[s3>>0]
                    +	MOVBU	(s3),s3		// Td4[s3>>24]
                    +	EOR	i1<<16,s0,s0
                    +	MOVW	0(key),i1
                    +	EOR	i2<<8,s1,s1
                    +	MOVW	4(key),t1
                    +	EOR	s2<<8,i3,s2
                    +	MOVW	8(key),t2
                    +	EOR	s3<<24,t3,s3
                    +	MOVW	12(key),t3
                    +
                    +	EOR	i1,s0,s0
                    +	EOR	t1,s1,s1
                    +	EOR	t2,s2,s2
                    +	EOR	t3,s3,s3
                    +
                    +	MOVW	dst+8(FP), t4
                    +
                    +#ifdef ARM_ARCH_7
                    +	REV	s0,s0
                    +	REV	s1,s1
                    +	REV	s2,s2
                    +	REV	s3,s3
                    +	MOVW	s0,0(t4)
                    +	MOVW	s1,4(t4)
                    +	MOVW	s2,8(t4)
                    +	MOVW	s3,12(t4)
                    +#else
                    +	MOVW	s0>>24,t1	// write output in endian-neutral
                    +	MOVW	s0>>16,t2	// manner...
                    +	MOVW	s0>>8,t3
                    +	MOVBU	t1,0(t4)
                    +	MOVBU	t2,1(t4)
                    +	MOVW	s1>>24,t1
                    +	MOVBU	t3,2(t4)
                    +	MOVW	s1>>16,t2
                    +	MOVBU	s0,3(t4)
                    +	MOVW	s1>>8,t3
                    +	MOVBU	t1,4(t4)
                    +	MOVBU	t2,5(t4)
                    +	MOVW	s2>>24,t1
                    +	MOVBU	t3,6(t4)
                    +	MOVW	s2>>16,t2
                    +	MOVBU	s1,7(t4)
                    +	MOVW	s2>>8,t3
                    +	MOVBU	t1,8(t4)
                    +	MOVBU	t2,9(t4)
                    +	MOVW	s3>>24,t1
                    +	MOVBU	t3,10(t4)
                    +	MOVW	s3>>16,t2
                    +	MOVBU	s2,11(t4)
                    +	MOVW	s3>>8,t3
                    +	MOVBU	t1,12(t4)
                    +	MOVBU	t2,13(t4)
                    +	MOVBU	t3,14(t4)
                    +	MOVBU	s3,15(t4)
                    +#endif
                    +	RET
                    diff --git a/src/crypto/aes/cipher_arm.go b/src/crypto/aes/cipher_arm.go
                    new file mode 100644
                    index 0000000..9cd161c
                    --- /dev/null
                    +++ b/src/crypto/aes/cipher_arm.go
                    @@ -0,0 +1,84 @@
                    +// Copyright 2017 The Go Authors. All rights reserved.
                    +// Use of this source code is governed by a BSD-style
                    +// license that can be found in the LICENSE file.
                    +
                    +// +build arm,!nacl
                    +
                    +package aes
                    +
                    +import (
                    +	"crypto/cipher"
                    +)
                    +
                    +// defined in asm_arm.s
                    +func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
                    +func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
                    +func expandKeyEncAsm(nr int, userKey *byte, enc *uint32)
                    +func expandKeyDecAsm(nr int, enc *uint32, dec *uint32)
                    +
                    +type aesCipherAsm struct {
                    +	aesCipher
                    +}
                    +
                    +func newCipher(key []byte) (cipher.Block, error) {
                    +	n := len(key) + 28
                    +	c := aesCipherAsm{aesCipher{make([]uint32, n), make([]uint32, n)}}
                    +	rounds := 10
                    +	switch len(key) {
                    +	case 128 / 8:
                    +		rounds = 10
                    +	case 192 / 8:
                    +		rounds = 12
                    +	case 256 / 8:
                    +		rounds = 14
                    +	}
                    +	expandKeyEncAsm(rounds, &key[0], &c.enc[0])
                    +	expandKeyDecAsm(rounds, &c.enc[0], &c.dec[0])
                    +	return &c, nil
                    +}
                    +
                    +func (c *aesCipherAsm) BlockSize() int { return BlockSize }
                    +
                    +func (c *aesCipherAsm) Encrypt(dst, src []byte) {
                    +	if len(src) < BlockSize {
                    +		panic("crypto/aes: input not full block")
                    +	}
                    +	if len(dst) < BlockSize {
                    +		panic("crypto/aes: output not full block")
                    +	}
                    +	encryptBlockAsm(len(c.enc)/4-2, &c.enc[0], &dst[0], &src[0])
                    +}
                    +
                    +func (c *aesCipherAsm) Decrypt(dst, src []byte) {
                    +	if len(src) < BlockSize {
                    +		panic("crypto/aes: input not full block")
                    +	}
                    +	if len(dst) < BlockSize {
                    +		panic("crypto/aes: output not full block")
                    +	}
                    +	decryptBlockAsm(len(c.dec)/4-2, &c.dec[0], &dst[0], &src[0])
                    +}
                    +
                    +// expandKey is used by BenchmarkExpand to ensure that the asm implementation
                    +// of key expansion is used for the benchmark when it is available.
                    +func expandKey(key []byte, enc, dec []uint32) {
                    +	rounds := 10 // rounds needed for AES128
                    +	switch len(key) {
                    +	case 192 / 8:
                    +		rounds = 12
                    +	case 256 / 8:
                    +		rounds = 14
                    +	}
                    +	expandKeyEncAsm(rounds, &key[0], &enc[0])
                    +	if dec != nil {
                    +		expandKeyDecAsm(rounds, &enc[0], &dec[0])
                    +	}
                    +}
                    +
                    +// rcon table used by asm_arm.s
                    +var rcon = [16]uint32{
                    +	0x01000000, 0x02000000, 0x04000000, 0x08000000,
                    +	0x10000000, 0x20000000, 0x40000000, 0x80000000,
                    +	0x1B000000, 0x36000000, 0, 0,
                    +	0, 0, 0, 0,
                    +}
                    diff --git a/src/crypto/aes/cipher_generic.go b/src/crypto/aes/cipher_generic.go
                    index ca74aa8..bfe2da8 100644
                    --- a/src/crypto/aes/cipher_generic.go
                    +++ b/src/crypto/aes/cipher_generic.go
                    @@ -3,6 +3,7 @@
                     // license that can be found in the LICENSE file.
                     
                     // +build !amd64,!s390x,!ppc64le
                    +// +build !arm nacl
                     
                     package aes
                     
                    

                    To view, visit change 38366. To unsubscribe, visit settings.

                    Gerrit-Project: go
                    Gerrit-Branch: master
                    Gerrit-MessageType: newpatchset
                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                    Gerrit-Change-Number: 38366
                    Gerrit-PatchSet: 4

                    Cherry Zhang (Gerrit)

                    unread,
                    Mar 27, 2017, 3:23:04 PM3/27/17
                    to Nick Craig-Wood, Josselin Costanzi, Gobot Gobot, Brad Fitzpatrick, golang-co...@googlegroups.com

                    Cherry Zhang posted comments on this change.

                    View Change

                    Patch set 4:Run-TryBot +1Code-Review +1

                    I only looked at the assembly from a general perspective (Go's calling convention, etc.), not the algorithm. Leave the actual review to someone who knows the algorithm.

                      To view, visit change 38366. To unsubscribe, visit settings.

                      Gerrit-Project: go
                      Gerrit-Branch: master
                      Gerrit-MessageType: comment
                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                      Gerrit-Change-Number: 38366
                      Gerrit-PatchSet: 4
                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                      Gerrit-Comment-Date: Mon, 27 Mar 2017 19:23:02 +0000
                      Gerrit-HasComments: No

                      Gobot Gobot (Gerrit)

                      unread,
                      Mar 27, 2017, 3:23:53 PM3/27/17
                      to Nick Craig-Wood, Cherry Zhang, Josselin Costanzi, Brad Fitzpatrick, golang-co...@googlegroups.com

                      Gobot Gobot posted comments on this change.

                      View Change

                      Patch set 4:

                      TryBots beginning. Status page: http://farmer.golang.org/try?commit=ced74308

                        To view, visit change 38366. To unsubscribe, visit settings.

                        Gerrit-Project: go
                        Gerrit-Branch: master
                        Gerrit-MessageType: comment
                        Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                        Gerrit-Change-Number: 38366
                        Gerrit-PatchSet: 4
                        Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                        Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                        Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                        Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                        Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                        Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                        Gerrit-Comment-Date: Mon, 27 Mar 2017 19:23:51 +0000
                        Gerrit-HasComments: No

                        Gobot Gobot (Gerrit)

                        unread,
                        Mar 27, 2017, 3:31:28 PM3/27/17
                        to Nick Craig-Wood, Cherry Zhang, Josselin Costanzi, Brad Fitzpatrick, golang-co...@googlegroups.com

                        Gobot Gobot posted comments on this change.

                        View Change

                        Patch set 4:TryBot-Result +1

                        TryBots are happy.

                          To view, visit change 38366. To unsubscribe, visit settings.

                          Gerrit-Project: go
                          Gerrit-Branch: master
                          Gerrit-MessageType: comment
                          Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                          Gerrit-Change-Number: 38366
                          Gerrit-PatchSet: 4
                          Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                          Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                          Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                          Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                          Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                          Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                          Gerrit-Comment-Date: Mon, 27 Mar 2017 19:31:26 +0000
                          Gerrit-HasComments: No

                          Nick Craig-Wood (Gerrit)

                          unread,
                          Mar 28, 2017, 4:53:42 AM3/28/17
                          to Gobot Gobot, Cherry Zhang, Josselin Costanzi, Brad Fitzpatrick, golang-co...@googlegroups.com

                          Nick Craig-Wood posted comments on this change.

                          View Change

                          Patch set 4:

                          Patch Set 4: Run-TryBot+1 Code-Review+1

                          I only looked at the assembly from a general perspective (Go's calling convention, etc.), not the algorithm. Leave the actual review to someone who knows the algorithm.

                          The review is probably more a question of looking at

                          https://git.openssl.org/?p=openssl.git;a=blob;f=crypto/aes/asm/aes-armv4.pl

                          And make sure the translation to asm_arm.s is faithful. I think we can assume that the openssl team got the algorithm right, especially since the unit tests pass.

                            To view, visit change 38366. To unsubscribe, visit settings.

                            Gerrit-Project: go
                            Gerrit-Branch: master
                            Gerrit-MessageType: comment
                            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                            Gerrit-Change-Number: 38366
                            Gerrit-PatchSet: 4
                            Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                            Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                            Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                            Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                            Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                            Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                            Gerrit-Comment-Date: Tue, 28 Mar 2017 08:53:37 +0000
                            Gerrit-HasComments: No

                            Nick Craig-Wood (Gerrit)

                            unread,
                            Jun 8, 2017, 12:41:50 PM6/8/17
                            to Adam Langley, Gobot Gobot, Cherry Zhang, Josselin Costanzi, Brad Fitzpatrick, golang-co...@googlegroups.com

                            Nick Craig-Wood posted comments on this change.

                            View Change

                            Patch set 4:

                            Please can you take a look at this? It has been languishing, unloved, probably because it has lots of ARM assembler in.

                            Thanks :-)

                              To view, visit change 38366. To unsubscribe, visit settings.

                              Gerrit-Project: go
                              Gerrit-Branch: master
                              Gerrit-MessageType: comment
                              Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                              Gerrit-Change-Number: 38366
                              Gerrit-PatchSet: 4
                              Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                              Gerrit-Reviewer: Adam Langley <a...@golang.org>
                              Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                              Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                              Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                              Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                              Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                              Gerrit-Comment-Date: Thu, 08 Jun 2017 16:41:46 +0000
                              Gerrit-HasComments: No
                              Gerrit-HasLabels: No

                              Brad Fitzpatrick (Gerrit)

                              unread,
                              Jun 8, 2017, 12:44:02 PM6/8/17
                              to Nick Craig-Wood, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Cherry Zhang, Josselin Costanzi, golang-co...@googlegroups.com

                              Brad Fitzpatrick posted comments on this change.

                              View Change

                              Patch set 4:

                              Adam is busy with something else for a bit and has no time for Go work at the moment.

                              Perhaps somebody else who knows ARM + crypto stuff can review.

                                To view, visit change 38366. To unsubscribe, visit settings.

                                Gerrit-Project: go
                                Gerrit-Branch: master
                                Gerrit-MessageType: comment
                                Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                Gerrit-Change-Number: 38366
                                Gerrit-PatchSet: 4
                                Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                Gerrit-Comment-Date: Thu, 08 Jun 2017 16:43:59 +0000
                                Gerrit-HasComments: No
                                Gerrit-HasLabels: No

                                Nick Craig-Wood (Gerrit)

                                unread,
                                Jun 8, 2017, 12:49:05 PM6/8/17
                                to Brad Fitzpatrick, Adam Langley, Gobot Gobot, Cherry Zhang, Josselin Costanzi, golang-co...@googlegroups.com

                                Nick Craig-Wood posted comments on this change.

                                View Change

                                Patch set 4:

                                Adam is busy with something else for a bit and has no time for Go work at the moment.

                                Perhaps somebody else who knows ARM + crypto stuff can review.

                                Any suggestions?

                                Thanks

                                  To view, visit change 38366. To unsubscribe, visit settings.

                                  Gerrit-Project: go
                                  Gerrit-Branch: master
                                  Gerrit-MessageType: comment
                                  Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                  Gerrit-Change-Number: 38366
                                  Gerrit-PatchSet: 4
                                  Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                  Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                  Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                  Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                  Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                  Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                  Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                  Gerrit-Comment-Date: Thu, 08 Jun 2017 16:49:01 +0000
                                  Gerrit-HasComments: No
                                  Gerrit-HasLabels: No

                                  Brad Fitzpatrick (Gerrit)

                                  unread,
                                  Jun 8, 2017, 1:05:39 PM6/8/17
                                  to Nick Craig-Wood, Filippo Valsorda, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Cherry Zhang, Josselin Costanzi, golang-co...@googlegroups.com

                                  Brad Fitzpatrick posted comments on this change.

                                  View Change

                                  Patch set 4:

                                  Filippo, is something you could review?

                                    To view, visit change 38366. To unsubscribe, visit settings.

                                    Gerrit-Project: go
                                    Gerrit-Branch: master
                                    Gerrit-MessageType: comment
                                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                    Gerrit-Change-Number: 38366
                                    Gerrit-PatchSet: 4
                                    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                    Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                    Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                    Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                    Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                    Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                    Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                    Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                    Gerrit-Comment-Date: Thu, 08 Jun 2017 17:05:36 +0000
                                    Gerrit-HasComments: No
                                    Gerrit-HasLabels: No

                                    Emmanuel Odeke (Gerrit)

                                    unread,
                                    Jun 8, 2017, 5:44:22 PM6/8/17
                                    to Nick Craig-Wood, Andreas Auernhammer, Filippo Valsorda, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Cherry Zhang, Josselin Costanzi, golang-co...@googlegroups.com

                                    Emmanuel Odeke posted comments on this change.

                                    View Change

                                    Patch set 4:

                                    /cc @Andreas too for crypto and ARM

                                      To view, visit change 38366. To unsubscribe, visit settings.

                                      Gerrit-Project: go
                                      Gerrit-Branch: master
                                      Gerrit-MessageType: comment
                                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                      Gerrit-Change-Number: 38366
                                      Gerrit-PatchSet: 4
                                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                      Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                      Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                      Gerrit-Comment-Date: Thu, 08 Jun 2017 21:44:19 +0000
                                      Gerrit-HasComments: No
                                      Gerrit-HasLabels: No

                                      Filippo Valsorda (Gerrit)

                                      unread,
                                      Jun 11, 2017, 7:31:55 AM6/11/17
                                      to Nick Craig-Wood, Andreas Auernhammer, Emmanuel Odeke, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Cherry Zhang, Josselin Costanzi, golang-co...@googlegroups.com

                                      Filippo Valsorda posted comments on this change.

                                      View Change

                                      Patch set 4:

                                      Some preliminary comments. (Most important one is the one about the CLA.)

                                      To review the assembly, I worked towards a minimal diff between this and the OpenSSL code. Here are the changes I did:

                                      • OpenSSL: __ARMEL__ defined (when ARM_ARCH_7 is defined)
                                      • OpenSSL: __thumb2__ and __APPLE__ not defined
                                      • OpenSSL: inlined armv4_AES_decrypt/encrypt
                                      • OpenSSL: regex'd most instructions
                                      • Go: removed needless Rx<<0
                                      • Go: inlined maskXX defines

                                      https://gist.github.com/FiloSottile/85eefcd4a7181678a28d68c8d354f28e#file-not-diff

                                      Then by aliasing both lr and $rounds to t4 in the OpenSSL code:

                                      https://gist.github.com/FiloSottile/85eefcd4a7181678a28d68c8d354f28e#file-aliasing-diff

                                      Still have to review the diff fully, but seems sane. Have to check that the lr+rounds aliasing is correct. Also have to check that the tables match.

                                      (Couldn't find anywhere confirmation that MOVM.W means MOVM with writeback. The Plan9 assembly reference just calls it a special addressing mode bit.)

                                      (6 comments)

                                      To view, visit change 38366. To unsubscribe, visit settings.

                                      Gerrit-Project: go
                                      Gerrit-Branch: master
                                      Gerrit-MessageType: comment
                                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                      Gerrit-Change-Number: 38366
                                      Gerrit-PatchSet: 4
                                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                      Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                      Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                      Gerrit-Comment-Date: Sun, 11 Jun 2017 11:31:48 +0000
                                      Gerrit-HasComments: Yes
                                      Gerrit-HasLabels: No

                                      Andreas Auernhammer (Gerrit)

                                      unread,
                                      Jun 11, 2017, 9:17:15 PM6/11/17
                                      to Nick Craig-Wood, Filippo Valsorda, Emmanuel Odeke, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Cherry Zhang, Josselin Costanzi, golang-co...@googlegroups.com

                                      Andreas Auernhammer posted comments on this change.

                                      View Change

                                      Patch set 4:

                                      (3 comments)

                                        • Is this a magic Go #define, or are all #ifdef ARM_ARCH_7 dead code?

                                      To view, visit change 38366. To unsubscribe, visit settings.

                                      Gerrit-Project: go
                                      Gerrit-Branch: master
                                      Gerrit-MessageType: comment
                                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                      Gerrit-Change-Number: 38366
                                      Gerrit-PatchSet: 4
                                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                      Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                      Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                      Gerrit-Comment-Date: Mon, 12 Jun 2017 01:17:11 +0000
                                      Gerrit-HasComments: Yes
                                      Gerrit-HasLabels: No

                                      Filippo Valsorda (Gerrit)

                                      unread,
                                      Jun 12, 2017, 4:54:21 AM6/12/17
                                      to Nick Craig-Wood, Andreas Auernhammer, Emmanuel Odeke, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Cherry Zhang, Josselin Costanzi, golang-co...@googlegroups.com

                                      Filippo Valsorda posted comments on this change.

                                      View Change

                                      Patch set 4:

                                      (1 comment)

                                        • No, that would be

                                              (!amd64 AND !s390x AND !ppc64le AND !arm) OR nacl

                                          while the multi-line was

                                              (!amd64 AND !s390x AND !ppc64le) AND (!arm OR nacl)

                                          The latter is correct.

                                      To view, visit change 38366. To unsubscribe, visit settings.

                                      Gerrit-Project: go
                                      Gerrit-Branch: master
                                      Gerrit-MessageType: comment
                                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                      Gerrit-Change-Number: 38366
                                      Gerrit-PatchSet: 4
                                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                      Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                      Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                      Gerrit-Comment-Date: Mon, 12 Jun 2017 08:54:16 +0000
                                      Gerrit-HasComments: Yes
                                      Gerrit-HasLabels: No

                                      Cherry Zhang (Gerrit)

                                      unread,
                                      Jun 12, 2017, 8:16:21 AM6/12/17
                                      to Nick Craig-Wood, Filippo Valsorda, Andreas Auernhammer, Emmanuel Odeke, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                      Cherry Zhang posted comments on this change.

                                      View Change

                                      Patch set 4:

                                      (1 comment)

                                        • tbl is used as a stable register (but not across functions). I'd either add

                                        • R10 is the g (goroutine) register. It is needed at any safe point (preemption point). If there is no safe point in the function, it is ok to use g register as long as it restores it upon return.

                                          R11 is the temp register used in the assembler to synthesize instructions. It is possible to carefully write the code (e.g. not using synthesized instructions) so R11 is not clobbered.

                                      To view, visit change 38366. To unsubscribe, visit settings.

                                      Gerrit-Project: go
                                      Gerrit-Branch: master
                                      Gerrit-MessageType: comment
                                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                      Gerrit-Change-Number: 38366
                                      Gerrit-PatchSet: 4
                                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                      Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                      Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                      Gerrit-Comment-Date: Mon, 12 Jun 2017 12:16:18 +0000
                                      Gerrit-HasComments: Yes
                                      Gerrit-HasLabels: No

                                      Andreas Auernhammer (Gerrit)

                                      unread,
                                      Jun 12, 2017, 12:30:03 PM6/12/17
                                      to Nick Craig-Wood, Cherry Zhang, Filippo Valsorda, Emmanuel Odeke, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                      Andreas Auernhammer posted comments on this change.

                                      View Change

                                      Patch set 4:

                                      (1 comment)

                                        • No, that would be

                                          Oh, yes I see - sry, never mind! Thanks Filippo!

                                      To view, visit change 38366. To unsubscribe, visit settings.

                                      Gerrit-Project: go
                                      Gerrit-Branch: master
                                      Gerrit-MessageType: comment
                                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                      Gerrit-Change-Number: 38366
                                      Gerrit-PatchSet: 4
                                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                      Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                      Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                      Gerrit-Comment-Date: Mon, 12 Jun 2017 16:30:00 +0000
                                      Gerrit-HasComments: Yes
                                      Gerrit-HasLabels: No

                                      Nick Craig-Wood (Gerrit)

                                      unread,
                                      Jun 12, 2017, 5:01:32 PM6/12/17
                                      to Andreas Auernhammer, Cherry Zhang, Filippo Valsorda, Emmanuel Odeke, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                      Nick Craig-Wood posted comments on this change.

                                      View Change

                                      Patch set 4:

                                      Thanks for the thorough review Filippo

                                      I've replied in line to your comments.

                                      I'll upload a new version when we've decided on what to do about the ARM7 #ifdef.

                                      To review the assembly, I worked towards a minimal diff between this and the OpenSSL code. Here are the changes I did:

                                      • OpenSSL: __ARMEL__ defined (when ARM_ARCH_7 is defined)
                                      • OpenSSL: __thumb2__ and __APPLE__ not defined
                                      • OpenSSL: inlined armv4_AES_decrypt/encrypt
                                      • OpenSSL: regex'd most instructions
                                      • Go: removed needless Rx<<0
                                      • Go: inlined maskXX defines

                                      That looks correct.


                                      https://gist.github.com/FiloSottile/85eefcd4a7181678a28d68c8d354f28e#file-not-diff

                                      Then by aliasing both lr and $rounds to t4 in the OpenSSL code:

                                      https://gist.github.com/FiloSottile/85eefcd4a7181678a28d68c8d354f28e#file-aliasing-diff

                                      Still have to review the diff fully, but seems sane. Have to check that the lr+rounds aliasing is correct. Also have to check that the tables match.

                                      (Couldn't find anywhere confirmation that MOVM.W means MOVM with writeback. The Plan9 assembly reference just calls it a special addressing mode bit.)

                                      Yes I think you'd probably have to look in the ARM architecture for this. ARM assembly notates this a different way usually.

                                      Here are the ARM docs MOVM == STMIA & LDMIA, the writeback bit "W" is written as ! in arm instructions.

                                      http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0553a/BABCAEDD.html

                                      FYI here is the perl program I used to convert ARM style assembler to Go style: https://gist.github.com/ncw/e2edab9aab09891f9d81a8ab2684137

                                      It isn't a general purpose converter, it does just enough to convert the AES code!

                                      (6 comments)

                                        • I assume the CLA and license concerns have been settled out of band?

                                        • I assumed this was OK since I borrowed this wording from src/crypto/aes/asm_ppc64le.s

                                        • Ack

                                        • Ack

                                        • Patch Set #4, Line 60: #define i3 R9 // forbidden on nacl, check usage with -shared

                                          Looks like you did check usage with -shared. If so, cut the comment.

                                        • Ack

                                        • R10 is the g (goroutine) register. It is needed at any safe point (preempti

                                          I have checked that R11 isn't in use in synthesized instructions so I've adjusted the comment slightly.

                                        • This does not seem to work - at least for cross-compilation...

                                          Those ARM_ARCH_7 defines do not work currently. As far as I know there is no compile time way of detecting which ARM version you are using in ARM assembler code. It would be really, really useful if there was!

                                          The code commented out will make this run much faster on ARM7. The REV instruction is available in ARM6 and above and openssl assumes that nonaligned loads will definitely be available in ARM7 and above.

                                          I'm happy to remove the #ifdefed code if that is the consensus, though I thought it merited discussion first.

                                      To view, visit change 38366. To unsubscribe, visit settings.

                                      Gerrit-Project: go
                                      Gerrit-Branch: master
                                      Gerrit-MessageType: comment
                                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                      Gerrit-Change-Number: 38366
                                      Gerrit-PatchSet: 4
                                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                      Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                      Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                      Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                      Gerrit-Comment-Date: Mon, 12 Jun 2017 21:01:25 +0000
                                      Gerrit-HasComments: Yes
                                      Gerrit-HasLabels: No

                                      Nick Craig-Wood (Gerrit)

                                      unread,
                                      Jun 12, 2017, 5:03:48 PM6/12/17
                                      to Andreas Auernhammer, Cherry Zhang, Filippo Valsorda, Emmanuel Odeke, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                      Nick Craig-Wood posted comments on this change.

                                      View Change

                                      Patch set 4:

                                      FYI here is the perl program I used to convert ARM style assembler to Go style: https://gist.github.com/ncw/e2edab9aab09891f9d81a8ab2684137

                                      It isn't a general purpose converter, it does just enough to convert the AES code!

                                      Cut and paste fail - this is the URL

                                      https://gist.github.com/ncw/e2edab9aab09891f9d81a8ab26841373

                                        To view, visit change 38366. To unsubscribe, visit settings.

                                        Gerrit-Project: go
                                        Gerrit-Branch: master
                                        Gerrit-MessageType: comment
                                        Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                        Gerrit-Change-Number: 38366
                                        Gerrit-PatchSet: 4
                                        Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                        Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                        Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                        Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                        Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                        Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                        Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                        Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                        Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                        Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                        Gerrit-Comment-Date: Mon, 12 Jun 2017 21:03:45 +0000
                                        Gerrit-HasComments: No
                                        Gerrit-HasLabels: No

                                        Filippo Valsorda (Gerrit)

                                        unread,
                                        Jun 13, 2017, 10:47:24 AM6/13/17
                                        to Nick Craig-Wood, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Brad Fitzpatrick, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                        Filippo Valsorda posted comments on this change.

                                        View Change

                                        Patch set 4:

                                        (3 comments)

                                          • I have checked that R11 isn't in use in synthesized instructions so I've ad

                                            It looks like there is no CALL, so no preemption points? If that's the case, it would probably be nicer to store R10 and then use it, instead of aliasing lr+rounds. It would also make the review faster.

                                          • Those ARM_ARCH_7 defines do not work currently. As far as I know there is n

                                            I'm surprised there's no way or switching on GOARM. If that's the case, I'd open an issue to add a way, leave the ARM_ARCH_7 defines, and add a comment at the top mentioning ARM_ARCH_7, the fact that it's currently dead code, and linking to the issue.

                                        To view, visit change 38366. To unsubscribe, visit settings.

                                        Gerrit-Project: go
                                        Gerrit-Branch: master
                                        Gerrit-MessageType: comment
                                        Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                        Gerrit-Change-Number: 38366
                                        Gerrit-PatchSet: 4
                                        Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                        Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                        Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                        Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                        Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                        Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                        Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                        Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                        Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                        Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                        Gerrit-Comment-Date: Tue, 13 Jun 2017 14:47:14 +0000
                                        Gerrit-HasComments: Yes
                                        Gerrit-HasLabels: No

                                        Brad Fitzpatrick (Gerrit)

                                        unread,
                                        Jun 13, 2017, 4:00:08 PM6/13/17
                                        to Nick Craig-Wood, Brad Fitzpatrick, Filippo Valsorda, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                        Brad Fitzpatrick posted comments on this change.

                                        View Change

                                        Patch set 4:

                                        R=go1.10

                                        Nick, are you fine with this being in Go 1.10? That's what it looks like to me, but I haven't been following this CL closely.

                                        If this is for Go 1.9, let me know, but I'm not sure how much review+testing remains.

                                          To view, visit change 38366. To unsubscribe, visit settings.

                                          Gerrit-Project: go
                                          Gerrit-Branch: master
                                          Gerrit-MessageType: comment
                                          Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                          Gerrit-Change-Number: 38366
                                          Gerrit-PatchSet: 4
                                          Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                          Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                          Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                          Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                          Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                          Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                          Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                          Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                          Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                          Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                          Gerrit-Comment-Date: Tue, 13 Jun 2017 20:00:05 +0000
                                          Gerrit-HasComments: No
                                          Gerrit-HasLabels: No

                                          Nick Craig-Wood (Gerrit)

                                          unread,
                                          Jun 14, 2017, 4:13:32 AM6/14/17
                                          to Brad Fitzpatrick, Filippo Valsorda, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                          Nick Craig-Wood posted comments on this change.

                                          View Change

                                          Patch set 4:

                                          Patch Set 4:

                                          R=go1.10

                                          Nick, are you fine with this being in Go 1.10? That's what it looks like to me, but I haven't been following this CL closely.

                                          If this is for Go 1.9, let me know, but I'm not sure how much review+testing remains.

                                          I would have liked it for go 1.9 but I didn't give the review process enough pushing so go 1.10 seems sensible to me given where we are in the release cycle.

                                            To view, visit change 38366. To unsubscribe, visit settings.

                                            Gerrit-Project: go
                                            Gerrit-Branch: master
                                            Gerrit-MessageType: comment
                                            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                            Gerrit-Change-Number: 38366
                                            Gerrit-PatchSet: 4
                                            Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                            Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                            Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                            Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                            Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                            Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                            Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                            Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                            Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                            Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                            Gerrit-Comment-Date: Wed, 14 Jun 2017 08:13:28 +0000
                                            Gerrit-HasComments: No
                                            Gerrit-HasLabels: No

                                            Nick Craig-Wood (Gerrit)

                                            unread,
                                            Jun 15, 2017, 1:05:25 PM6/15/17
                                            to Brad Fitzpatrick, Filippo Valsorda, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                            Nick Craig-Wood posted comments on this change.

                                            View Change

                                            Patch set 4:

                                            See inline comment!

                                            (1 comment)

                                            • File src/crypto/aes/asm_arm.s:

                                              • Patch Set #4, Line 65: #define tbl R11 // can be used by the linker to synthesise instructions

                                                It looks like there is no CALL, so no preemption points? If that's the case

                                                I did a quick code grep and found that there are other bits of ARM code which use g (R10)...

                                                So I'll change this to use R10 which will give a small amount of performance.

                                                It will take me a few days though as I've got lots of other stuff on.

                                            To view, visit change 38366. To unsubscribe, visit settings.

                                            Gerrit-Project: go
                                            Gerrit-Branch: master
                                            Gerrit-MessageType: comment
                                            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                            Gerrit-Change-Number: 38366
                                            Gerrit-PatchSet: 4
                                            Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                            Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                            Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                            Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                            Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                            Gerrit-Reviewer: Filippo Valsorda <fil...@cloudflare.com>
                                            Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                            Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                            Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                            Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                            Gerrit-Comment-Date: Thu, 15 Jun 2017 17:05:20 +0000
                                            Gerrit-HasComments: Yes
                                            Gerrit-HasLabels: No

                                            Nick Craig-Wood (Gerrit)

                                            unread,
                                            Oct 2, 2017, 11:14:56 AM10/2/17
                                            to Adam Langley, Cherry Zhang, Andreas Auernhammer, Gobot Gobot, Brad Fitzpatrick, goph...@pubsubhelper.golang.org, Josselin Costanzi, Emmanuel Odeke, golang-co...@googlegroups.com

                                            Nick Craig-Wood uploaded patch set #5 to this change.

                                            View Change

                                            crypto/aes: ARM assembly versions of encrypt, decrypt and expandKey

                                            ARM assembly for AES crypto adapted from openssl giving an
                                            encrypt/decrypt speed up of 1.6-2.7x and a key scheduling speedup of
                                            2.2-4.7x.


                                            Raspberry Pi 3 BCM2709 ARMv7 Processor rev 5 (v7l)

                                            name old time/op new time/op delta
                                            Encrypt-4    3.12µs ± 1%     1.12µs ± 1%   -63.96%  (p=0.000 n=20+20)
                                            Decrypt-4 3.10µs ± 1% 1.21µs ± 1% -61.10% (p=0.000 n=20+20)
                                            Expand-4 11.3µs ± 1% 2.4µs ± 1% -78.38% (p=0.000 n=16+20)


                                            name old speed new speed delta
                                            Encrypt-4  5.13MB/s ± 2%  14.22MB/s ± 1%  +177.32%  (p=0.000 n=20+20)
                                            Decrypt-4 5.16MB/s ± 1% 13.25MB/s ± 1% +157.06% (p=0.000 n=20+20)


                                            Chrombook Samsung Exynos5 ARMv7 Processor rev 4 (v7l)

                                            name old time/op new time/op delta
                                            Encrypt-2     342ns ± 1%     217ns ± 3%  -36.47%  (p=0.000 n=16+19)
                                            Decrypt-2 343ns ± 6% 221ns ± 7% -35.52% (p=0.000 n=17+18)
                                            Expand-2 1.64µs ± 5% 0.73µs ±10% -55.56% (p=0.000 n=17+20)


                                            name old speed new speed delta
                                            Encrypt-2  46.7MB/s ± 1%  73.2MB/s ± 7%  +56.86%  (p=0.000 n=16+20)
                                            Decrypt-2 46.4MB/s ± 7% 71.8MB/s ± 9% +54.90% (p=0.000 n=18+19)


                                            Issue #4299

                                            Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                            ---
                                            A src/crypto/aes/asm_arm.s
                                            A src/crypto/aes/cipher_arm.go
                                            M src/crypto/aes/cipher_generic.go
                                            3 files changed, 940 insertions(+), 0 deletions(-)

                                            To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                            Gerrit-Project: go
                                            Gerrit-Branch: master
                                            Gerrit-MessageType: newpatchset
                                            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                            Gerrit-Change-Number: 38366
                                            Gerrit-PatchSet: 5
                                            Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                            Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                            Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                            Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                            Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                            Gerrit-Reviewer: Filippo Valsorda

                                            Nick Craig-Wood (Gerrit)

                                            unread,
                                            Oct 2, 2017, 11:18:11 AM10/2/17
                                            to goph...@pubsubhelper.golang.org, Brad Fitzpatrick, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                            Uploaded patch set 5.

                                            Here is the patch redone to use the g register, which means it is much closer to the openssl original code and hopefully much easier to review.

                                            I've also addressed all the remaining issue I think.

                                            Filippo: Apologies for the delay in resubmitting - I finished this all bar one benchmark when I saw you at Gophercon and it kind of slipped off my radar since then!

                                            View Change

                                              To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                              Gerrit-Project: go
                                              Gerrit-Branch: master
                                              Gerrit-MessageType: comment
                                              Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                              Gerrit-Change-Number: 38366
                                              Gerrit-PatchSet: 5
                                              Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                              Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                              Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                              Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                              Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                              Gerrit-Reviewer: Filippo Valsorda
                                              Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                              Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                              Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                              Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                              Gerrit-Comment-Date: Mon, 02 Oct 2017 15:18:06 +0000
                                              Gerrit-HasComments: No
                                              Gerrit-HasLabels: No

                                              Nick Craig-Wood (Gerrit)

                                              unread,
                                              Nov 7, 2017, 9:33:57 AM11/7/17
                                              to goph...@pubsubhelper.golang.org, Brad Fitzpatrick, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                              Is there any interest in merging this for go 1.10? I feel the release freeze approaching ;-)

                                              View Change

                                                To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                                Gerrit-Project: go
                                                Gerrit-Branch: master
                                                Gerrit-MessageType: comment
                                                Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                                Gerrit-Change-Number: 38366
                                                Gerrit-PatchSet: 5
                                                Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                                Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                                Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                                Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                                Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                                Gerrit-Reviewer: Filippo Valsorda
                                                Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                                Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                                Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                                Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                                Gerrit-Comment-Date: Tue, 07 Nov 2017 14:33:54 +0000
                                                Gerrit-HasComments: No
                                                Gerrit-HasLabels: No

                                                Ian Lance Taylor (Gerrit)

                                                unread,
                                                Nov 7, 2017, 10:27:21 AM11/7/17
                                                to Nick Craig-Wood, goph...@pubsubhelper.golang.org, Brad Fitzpatrick, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Adam Langley, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                                Ping Filippo and Cherry, I guess.

                                                View Change

                                                  To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                                  Gerrit-Project: go
                                                  Gerrit-Branch: master
                                                  Gerrit-MessageType: comment
                                                  Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                                  Gerrit-Change-Number: 38366
                                                  Gerrit-PatchSet: 5
                                                  Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                                  Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                                  Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                                  Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                                  Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                                  Gerrit-Reviewer: Filippo Valsorda
                                                  Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                                  Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                                  Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                                  Gerrit-CC: Ian Lance Taylor <ia...@golang.org>
                                                  Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                                  Gerrit-Comment-Date: Tue, 07 Nov 2017 15:27:18 +0000
                                                  Gerrit-HasComments: No
                                                  Gerrit-HasLabels: No

                                                  Adam Langley (Gerrit)

                                                  unread,
                                                  Nov 8, 2017, 6:38:59 PM11/8/17
                                                  to Nick Craig-Wood, goph...@pubsubhelper.golang.org, Ian Lance Taylor, Brad Fitzpatrick, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                                  I'm afraid that we cannot derive work from OpenSSL's sources without respecting the OpenSSL license, but Go is 3-BSD licensed.

                                                  Patch set 5:Code-Review -2

                                                  View Change

                                                    To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                                    Gerrit-Project: go
                                                    Gerrit-Branch: master
                                                    Gerrit-MessageType: comment
                                                    Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                                    Gerrit-Change-Number: 38366
                                                    Gerrit-PatchSet: 5
                                                    Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                                    Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                                    Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                                    Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                                    Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                                    Gerrit-Reviewer: Filippo Valsorda
                                                    Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                                    Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                                    Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                                    Gerrit-CC: Ian Lance Taylor <ia...@golang.org>
                                                    Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                                    Gerrit-Comment-Date: Wed, 08 Nov 2017 23:38:56 +0000
                                                    Gerrit-HasComments: No
                                                    Gerrit-HasLabels: Yes

                                                    Nick Craig-Wood (Gerrit)

                                                    unread,
                                                    Nov 9, 2017, 3:12:07 AM11/9/17
                                                    to goph...@pubsubhelper.golang.org, Adam Langley, Ian Lance Taylor, Brad Fitzpatrick, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                                    Patch Set 5: Code-Review-2

                                                    I'm afraid that we cannot derive work from OpenSSL's sources without respecting the OpenSSL license, but Go is 3-BSD licensed.

                                                    The low level assembler code in OpenSSL is licensed under the cryptograms licence: https://www.openssl.org/~appro/cryptogams/ which is a 3-clause BSD license (I think) which seems eminently compatible.

                                                    Also we already have code derived from OpenSSL, src/crypto/aes/asm_ppc64le.s and src/crypto/sha256/sha256block_ppc64le.s

                                                    View Change

                                                      To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                                      Gerrit-Project: go
                                                      Gerrit-Branch: master
                                                      Gerrit-MessageType: comment
                                                      Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                                      Gerrit-Change-Number: 38366
                                                      Gerrit-PatchSet: 5
                                                      Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                                      Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                                      Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                                      Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                                      Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                                      Gerrit-Reviewer: Filippo Valsorda
                                                      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                                      Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                                      Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                                      Gerrit-CC: Ian Lance Taylor <ia...@golang.org>
                                                      Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                                      Gerrit-Comment-Date: Thu, 09 Nov 2017 08:12:01 +0000
                                                      Gerrit-HasComments: No
                                                      Gerrit-HasLabels: No

                                                      Russ Cox (Gerrit)

                                                      unread,
                                                      Nov 9, 2017, 10:21:47 AM11/9/17
                                                      to Nick Craig-Wood, goph...@pubsubhelper.golang.org, Russ Cox, Adam Langley, Ian Lance Taylor, Brad Fitzpatrick, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                                      Patch Set 5:

                                                      Patch Set 5: Code-Review-2

                                                      I'm afraid that we cannot derive work from OpenSSL's sources without respecting the OpenSSL license, but Go is 3-BSD licensed.

                                                      The low level assembler code in OpenSSL is licensed under the cryptograms licence: https://www.openssl.org/~appro/cryptogams/ which is a 3-clause BSD license (I think) which seems eminently compatible.

                                                      Also we already have code derived from OpenSSL, src/crypto/aes/asm_ppc64le.s and src/crypto/sha256/sha256block_ppc64le.s

                                                      Yes, I believe those should be removed too. The comment in the code being added in this file (and in those) say that the license depends on where you get the code. Clearly you got the code from OpenSSL, since the code is not posted on the cryptogams site.

                                                      View Change

                                                        To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                                        Gerrit-Project: go
                                                        Gerrit-Branch: master
                                                        Gerrit-MessageType: comment
                                                        Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                                        Gerrit-Change-Number: 38366
                                                        Gerrit-PatchSet: 5
                                                        Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                                        Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                                        Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                                        Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                                        Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                                        Gerrit-Reviewer: Filippo Valsorda
                                                        Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                                        Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                                        Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                                        Gerrit-CC: Ian Lance Taylor <ia...@golang.org>
                                                        Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                                        Gerrit-CC: Russ Cox <r...@golang.org>
                                                        Gerrit-Comment-Date: Thu, 09 Nov 2017 15:21:43 +0000
                                                        Gerrit-HasComments: No
                                                        Gerrit-HasLabels: No

                                                        Nick Craig-Wood (Gerrit)

                                                        unread,
                                                        Nov 10, 2017, 8:37:03 AM11/10/17
                                                        to goph...@pubsubhelper.golang.org, Russ Cox, Adam Langley, Ian Lance Taylor, Brad Fitzpatrick, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                                        The comment in the code being added in this file (and in those) say that the license depends on where you get the code. Clearly you got the code from OpenSSL, since the code is not posted on the cryptogams site.

                                                        I agree that is unclear. I think the intention of the statement on the cryptogams site is clear though, clearly ap...@openssl.org desires his work to have a wider use. I emailed him to see he could clarify the matter.

                                                        View Change

                                                          To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                                          Gerrit-Project: go
                                                          Gerrit-Branch: master
                                                          Gerrit-MessageType: comment
                                                          Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                                          Gerrit-Change-Number: 38366
                                                          Gerrit-PatchSet: 5
                                                          Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                                          Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                                          Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                                          Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                                          Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                                          Gerrit-Reviewer: Filippo Valsorda
                                                          Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                                          Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                                          Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                                          Gerrit-CC: Ian Lance Taylor <ia...@golang.org>
                                                          Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                                          Gerrit-CC: Russ Cox <r...@golang.org>
                                                          Gerrit-Comment-Date: Fri, 10 Nov 2017 13:36:59 +0000
                                                          Gerrit-HasComments: No
                                                          Gerrit-HasLabels: No

                                                          Russ Cox (Gerrit)

                                                          unread,
                                                          Nov 10, 2017, 8:52:21 AM11/10/17
                                                          to Nick Craig-Wood, goph...@pubsubhelper.golang.org, Russ Cox, Adam Langley, Ian Lance Taylor, Brad Fitzpatrick, Andreas Auernhammer, Cherry Zhang, Emmanuel Odeke, Gobot Gobot, Josselin Costanzi, golang-co...@googlegroups.com

                                                          Patch Set 5:

                                                          The comment in the code being added in this file (and in those) say that the license depends on where you get the code. Clearly you got the code from OpenSSL, since the code is not posted on the cryptogams site.

                                                          I agree that is unclear. I think the intention of the statement on the cryptogams site is clear though, clearly ap...@openssl.org desires his work to have a wider use. I emailed him to see he could clarify the matter.

                                                          Thanks very much. If fresh versions are posted on the cryptogams site then we should still remove these and start over from the cryptogams version (to make sure we're not picking up any OpenSSL contributions), but I don't think that should be too hard.

                                                          View Change

                                                            To view, visit change 38366. To unsubscribe, or for help writing mail filters, visit settings.

                                                            Gerrit-Project: go
                                                            Gerrit-Branch: master
                                                            Gerrit-MessageType: comment
                                                            Gerrit-Change-Id: I13df6a87f5697de255cb9a494022dd7f7dbde8f5
                                                            Gerrit-Change-Number: 38366
                                                            Gerrit-PatchSet: 5
                                                            Gerrit-Owner: Nick Craig-Wood <nic...@gmail.com>
                                                            Gerrit-Reviewer: Adam Langley <a...@golang.org>
                                                            Gerrit-Reviewer: Andreas Auernhammer <ae...@mail.de>
                                                            Gerrit-Reviewer: Brad Fitzpatrick <brad...@golang.org>
                                                            Gerrit-Reviewer: Cherry Zhang <cher...@google.com>
                                                            Gerrit-Reviewer: Filippo Valsorda
                                                            Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                                                            Gerrit-Reviewer: Nick Craig-Wood <nic...@gmail.com>
                                                            Gerrit-CC: Emmanuel Odeke <emm....@gmail.com>
                                                            Gerrit-CC: Ian Lance Taylor <ia...@golang.org>
                                                            Gerrit-CC: Josselin Costanzi <joss...@costanzi.fr>
                                                            Gerrit-CC: Russ Cox <r...@golang.org>
                                                            Gerrit-Comment-Date: Fri, 10 Nov 2017 13:52:16 +0000
                                                            Gerrit-HasComments: No
                                                            Gerrit-HasLabels: No
                                                            Reply all
                                                            Reply to author
                                                            Forward
                                                            0 new messages