[go] crypto/sha256: add sha-ni implementation

Ted Painter (Gerrit)

unread,

May 25, 2022, 8:44:42 PM5/25/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Ted Painter has uploaded this change for review.

crypto/sha256: add sha-ni implementation

Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
---
M src/crypto/sha256/sha256block_amd64.go
M src/crypto/sha256/sha256block_amd64.s
2 files changed, 154 insertions(+), 2 deletions(-)

diff --git a/src/crypto/sha256/sha256block_amd64.go b/src/crypto/sha256/sha256block_amd64.go
index 27464e2..b5d2c9b 100644
--- a/src/crypto/sha256/sha256block_amd64.go
+++ b/src/crypto/sha256/sha256block_amd64.go
@@ -7,3 +7,4 @@
 import "internal/cpu"
 
 var useAVX2 = cpu.X86.HasAVX2 && cpu.X86.HasBMI2
+var useSHA = useAVX2 && cpu.X86.HasSHA
diff --git a/src/crypto/sha256/sha256block_amd64.s b/src/crypto/sha256/sha256block_amd64.s
index f6af47c..c0ed494 100644
--- a/src/crypto/sha256/sha256block_amd64.s
+++ b/src/crypto/sha256/sha256block_amd64.s
@@ -550,9 +550,80 @@
 	;                                  \
 	ADDL  y3, h                        // h = t1 + S0 + MAJ					// --
 
+// Definitions for sha-ni version
+//
+// The sha-ni implementation uses Intel(R) SHA extensions SHA256RNDS2, SHA256MSG1, SHA256MSG2
+// It also reuses portions of the flip_mask (half) and K256 table (stride 32) from the avx2 version
+//
+// Reference
+// S. Gulley, et al, "New Instructions Supporting the Secure Hash 
+// Algorithm on Intel® Architecture Processors", July 2013
+// https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
+//
+
+#define digestPtr	        DI				// input/output, base pointer to digest hash vector H0, H1, ..., H7
+#define dataPtr	          SI				// input, base pointer to first input data block 
+#define numBytes	        DX				// input, number of input bytes to be processed
+#define sha256Constants	  AX				// round contants from K256 table, indexed by round number x 32
+#define msg		            X0				// input data
+#define state0		        X1				// round intermediates and outputs
+#define state1		        X2
+#define m0		        		X3 				// m0, m1,... m4 -- round message temps 
+#define m1		        		X4
+#define m2		        		X5
+#define m3		        		X6
+#define m4		        		X7
+#define shufMask	        X8				// input data endian conversion control mask
+#define abefSave	        X9				// digest hash vector inter-block buffer abef
+#define cdghSave	        X10				// digest hash vector inter-block buffer cdgh
+
+#define nop(m,a) 										// nop instead of final SHA256MSG1 for first and last few rounds 
+
+#define sha256msg1(m,a) \						// final SHA256MSG1 for middle rounds that require it
+  SHA256MSG1	  m, a
+
+#define vmov(a,b) \									// msg copy for all but rounds 12-15 
+  VMOVDQA       a, b
+
+#define vmovrev(a,b) \							// reverse copy for rounds 12-15
+  VMOVDQA       b, a
+
+// sha rounds 0 to 11 
+// identical with the exception of the final msg op
+// which is replaced with a nop for rounds where it is not needed
+// refer to Gulley, et al for more information
+#define rounds0to11(m,a,c,sha256Msg1) \
+  VMOVDQU		    c*16(dataPtr), msg                \
+	PSHUFB		    shufMask, msg                     \
+	VMOVDQA		    msg, m                            \
+  PADDD		      (c*32)(sha256Constants), msg      \
+	SHA256RNDS2	  msg, state0, state1            		\
+  PSHUFD 		    $0x0e, msg, msg                   \
+  SHA256RNDS2	  msg, state1, state0           		\
+	sha256Msg1	  (m,a)
+
+// sha rounds 12 to 59
+// identical with the exception of the final msg op
+// and the reverse copy(m,msg) in round 12 which is required
+// after the last data load
+// refer to Gulley, et al for more information
+#define rounds12to59(m,c,a,t,sha256Msg1,movop) \
+  movop		      (m,msg)                           \
+	PADDD		      (c*32)(sha256Constants), msg      \
+	SHA256RNDS2	  msg, state0, state1            		\
+	VMOVDQA		    m, m4                        			\
+	PALIGNR		    $4, a, m4                    			\
+	PADDD		      m4, t                        			\
+	SHA256MSG2	  m, t                              \
+	PSHUFD 		    $0x0e, msg, msg                   \
+	SHA256RNDS2	  msg, state1, state0               \
+  sha256Msg1    (m,a)
+
 TEXT ·block(SB), 0, $536-32
-	CMPB ·useAVX2(SB), $1
-	JE   avx2
+	CMPB 	·useSHA(SB), $1
+	JE 		sha_ni
+	CMPB 	·useAVX2(SB), $1
+	JE 		avx2
 
 	MOVQ p_base+8(FP), SI
 	MOVQ p_len+16(FP), DX
@@ -862,6 +933,77 @@
 	VZEROUPPER
 	RET
 
+sha_ni:
+	MOVQ 					dig+0(FP), digestPtr										// init digest hash vector H0, H1,..., H7 pointer
+	MOVQ 					p_base+8(FP), dataPtr										// init input data base pointer
+	MOVQ 					p_len+16(FP), numBytes									// get number of input bytes to hash
+	SHRQ 					$6, numBytes														// force modulo 64 input buffer length 
+	SHLQ 					$6, numBytes
+  CMPQ          numBytes, $0														// exit early for zero-length input buffer
+  JEQ		        done
+	ADDQ		      dataPtr, numBytes                    		// point numBytes to end of input buffer
+	VMOVDQU		    (0*16)(digestPtr), state0								// load initial hash values and reorder
+	VMOVDQU		    (1*16)(digestPtr), state1								// DCBA, HGFE -> ABEF, CDGH
+  PSHUFD		    $0xb1, state0, state0		            		// CDAB
+	PSHUFD		    $0x1b, state1, state1		            		// EFGH 
+	VMOVDQA		    state0, m4
+	PALIGNR		    $8, state1, state0		              		// ABEF
+	PBLENDW		    $0xf0, m4, state1		                		// CDGH
+	VMOVDQA				flip_mask<>(SB), shufMask
+	LEAQ		      K256<>(SB), sha256Constants
+
+roundLoop:
+	// save hash values for addition after rounds 
+	VMOVDQA		    state0, abefSave                                  
+	VMOVDQA		    state1, cdghSave
+
+  // do rounds 0-59
+  rounds0to11   (m0,-,0,nop)                         		// 0-3
+  rounds0to11   (m1,m0,1,sha256msg1)                    // 4-7
+  rounds0to11   (m2,m1,2,sha256msg1)                    // 8-11
+  VMOVDQU		    (3*16)(dataPtr), msg
+	PSHUFB		    shufMask, msg
+  rounds12to59  (m3,3,m2,m0,sha256msg1,vmovrev)    			// 12-15
+  rounds12to59  (m0,4,m3,m1,sha256msg1,vmov)       			// 16-19
+  rounds12to59  (m1,5,m0,m2,sha256msg1,vmov)       			// 20-23
+  rounds12to59  (m2,6,m1,m3,sha256msg1,vmov)       			// 24-27
+  rounds12to59  (m3,7,m2,m0,sha256msg1,vmov)       			// 28-31
+  rounds12to59  (m0,8,m3,m1,sha256msg1,vmov)       			// 32-35
+  rounds12to59  (m1,9,m0,m2,sha256msg1,vmov)       			// 36-39
+  rounds12to59  (m2,10,m1,m3,sha256msg1,vmov)      			// 40-43
+  rounds12to59  (m3,11,m2,m0,sha256msg1,vmov)      			// 44-47
+  rounds12to59  (m0,12,m3,m1,sha256msg1,vmov)      			// 48-51
+  rounds12to59  (m1,13,m0,m2,nop,vmov)             			// 52-55
+  rounds12to59  (m2,14,m1,m3,nop,vmov)             			// 56-59
+	
+	// do rounds 60-63 
+	VMOVDQA		    m3, msg
+  PADDD		      (15*32)(sha256Constants), msg
+  SHA256RNDS2	  msg, state0, state1
+	PSHUFD 		    $0x0e, msg, msg
+  SHA256RNDS2	  msg, state1, state0
+
+	// add current hash values with previously saved 
+	PADDD		      abefSave, state0
+	PADDD		      cdghSave, state1
+
+	// advance data pointer; loop until buffer empty 
+	ADDQ		      $64, dataPtr
+	CMPQ		      numBytes, dataPtr
+	JNE		        roundLoop
+
+	// write hash values back in the correct order 
+	PSHUFD		    $0x1b, state0, state0		              	// FEBA 
+	PSHUFD		    $0xb1, state1, state1		              	// DCHG 
+	VMOVDQA		    state0, m4
+	PBLENDW		    $0xf0, state1, state0		             		// DCBA
+	PALIGNR		    $8, m4, state1		                   		// HGFE 
+  VMOVDQU		    state0, (0*16)(digestPtr)
+	VMOVDQU		    state1, (1*16)(digestPtr)
+
+done:
+	RET
+
 // shuffle byte order from LE to BE
 DATA flip_mask<>+0x00(SB)/8, $0x0405060700010203
 DATA flip_mask<>+0x08(SB)/8, $0x0c0d0e0f08090a0b

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Alexandru Matei (Gerrit)

unread,

Nov 2, 2022, 2:32:02 PM11/2/22

to Ted Painter, goph...@pubsubhelper.golang.org, Filippo Valsorda, Adam Langley, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Ted Painter.

View Change

1 comment:

Patchset:
- Patch Set #1:
  Any info regarding when this patch will be merged? There are multiple open source projects that would greatly benefit from this, like containerd when pulling images, k8s and others.

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Paulo Gomes (Gerrit)

unread,

Nov 8, 2022, 10:53:59 AM11/8/22

to Ted Painter, goph...@pubsubhelper.golang.org, Alexandru Matei, Filippo Valsorda, Adam Langley, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Ted Painter.

Patch set 1:Code-Review +1

View Change

2 comments:

Patchset:
- Patch Set #1:
  This looks good for me, apart from some minor formatting issues.
  I have tested this on i7-11800H and it worked as expected, with the performance improvements below:
```
goos: linux
goarch: amd64
cpu: 11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz
Benchmark_SHA256_BEFORE-16 419570 2765 ns/op
Benchmark_SHA256_AFTER-16 1487156 786 ns/op
```
File src/crypto/sha256/sha256block_amd64.s:
- Patch Set #1, Line 957:
  The trailing spaces should be trimmed (here and in the other parts of the diff).

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Ted Painter (Gerrit)

unread,

Nov 17, 2022, 8:49:04 AM11/17/22

to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Ted Painter.

Ted Painter uploaded patch set #2 to this change.

View Change

crypto/sha256: add sha-ni implementation

Fixes #957



Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
---
M src/crypto/sha256/sha256block_amd64.go
M src/crypto/sha256/sha256block_amd64.s

2 files changed, 156 insertions(+), 2 deletions(-)

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Alan Donovan (Gerrit)

unread,

Mar 22, 2023, 5:58:07 PM3/22/23

to Ted Painter, goph...@pubsubhelper.golang.org, Paulo Gomes, Alexandru Matei, Filippo Valsorda, Adam Langley, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Ted Painter.

View Change

1 comment:

Patchset:
- Patch Set #2:
  Thanks for sharing this CL. Is this stalled for want of a reviewer, or is it waiting for the proposal approval (or something else)?

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Alan Donovan (Gerrit)

unread,

Mar 31, 2023, 11:37:57 AM3/31/23

to Russ Cox, Ted Painter, goph...@pubsubhelper.golang.org, Gopher Robot, Paulo Gomes, Alexandru Matei, Filippo Valsorda, Adam Langley, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Paulo Gomes, Russ Cox, Ted Painter.

Patch set 4:Code-Review +1

View Change

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Alan Donovan (Gerrit)

unread,

Mar 31, 2023, 11:54:59 AM3/31/23

to Russ Cox, Ted Painter, goph...@pubsubhelper.golang.org, Gopher Robot, Paulo Gomes, Alexandru Matei, Filippo Valsorda, Adam Langley, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Paulo Gomes, Russ Cox, Ted Painter.

Patch set 4:Code-Review +2

View Change

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Gopher Robot (Gerrit)

unread,

Mar 31, 2023, 11:55:29 AM3/31/23

to Ted Painter, goph...@pubsubhelper.golang.org, golang-...@googlegroups.com, Alan Donovan, Russ Cox, Paulo Gomes, Alexandru Matei, Filippo Valsorda, Adam Langley, golang-co...@googlegroups.com

Gopher Robot submitted this change.

View Change

Approvals:
  Russ Cox: Looks good to me, approved; Run TryBots; Automatically submit change
  Alan Donovan: Looks good to me, approved
  Gopher Robot: TryBots succeeded
  Paulo Gomes: Looks good to me, but someone else must approve

crypto/sha256: add sha-ni implementation

goos: linux
goarch: amd64
pkg: crypto/sha256
cpu: 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz
                    │  bench.old   │              bench.new              │
                    │    sec/op    │   sec/op     vs base                │
Hash8Bytes/New-4      169.20n ± 7%   65.40n ± 5%  -61.35% (p=0.000 n=10)
Hash8Bytes/Sum224-4   166.10n ± 3%   65.20n ± 8%  -60.74% (p=0.000 n=10)
Hash8Bytes/Sum256-4   168.50n ± 6%   63.58n ± 7%  -62.27% (p=0.000 n=10)
Hash1K/New-4          2275.5n ± 5%   618.5n ± 2%  -72.82% (p=0.000 n=10)
Hash1K/Sum224-4       2364.5n ± 1%   618.1n ± 1%  -73.86% (p=0.000 n=10)
Hash1K/Sum256-4       2338.5n ± 2%   613.0n ± 2%  -73.79% (p=0.000 n=10)
Hash8K/New-4          17.530µ ± 2%   4.501µ ± 1%  -74.33% (p=0.000 n=10)
Hash8K/Sum224-4       17.456µ ± 2%   4.505µ ± 1%  -74.19% (p=0.000 n=10)
Hash8K/Sum256-4       17.417µ ± 2%   4.504µ ± 1%  -74.14% (p=0.000 n=10)
geomean                1.897µ        564.3n       -70.25%

                    │  bench.old   │               bench.new                │
                    │     B/s      │      B/s       vs base                 │
Hash8Bytes/New-4      45.11Mi ± 6%   116.66Mi ± 5%  +158.62% (p=0.000 n=10)
Hash8Bytes/Sum224-4   45.92Mi ± 3%   117.04Mi ± 8%  +154.89% (p=0.000 n=10)
Hash8Bytes/Sum256-4   45.29Mi ± 6%   120.00Mi ± 7%  +164.99% (p=0.000 n=10)
Hash1K/New-4          429.2Mi ± 5%   1578.9Mi ± 2%  +267.92% (p=0.000 n=10)
Hash1K/Sum224-4       413.0Mi ± 1%   1579.8Mi ± 1%  +282.49% (p=0.000 n=10)
Hash1K/Sum256-4       417.6Mi ± 1%   1593.1Mi ± 2%  +281.53% (p=0.000 n=10)
Hash8K/New-4          445.7Mi ± 1%   1735.9Mi ± 1%  +289.50% (p=0.000 n=10)
Hash8K/Sum224-4       447.6Mi ± 2%   1734.5Mi ± 1%  +287.54% (p=0.000 n=10)
Hash8K/Sum256-4       448.6Mi ± 2%   1734.8Mi ± 1%  +286.75% (p=0.000 n=10)
geomean               204.3Mi         686.8Mi       +236.11%

                    │  bench.old   │              bench.new              │
                    │     B/op     │    B/op     vs base                 │
Hash8Bytes/New-4      0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8Bytes/Sum224-4   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8Bytes/Sum256-4   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash1K/New-4          0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash1K/Sum224-4       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash1K/Sum256-4       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8K/New-4          0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8K/Sum224-4       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8K/Sum256-4       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                          ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

                    │  bench.old   │              bench.new              │
                    │  allocs/op   │ allocs/op   vs base                 │
Hash8Bytes/New-4      0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8Bytes/Sum224-4   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8Bytes/Sum256-4   0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash1K/New-4          0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash1K/Sum224-4       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash1K/Sum256-4       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8K/New-4          0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8K/Sum224-4       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
Hash8K/Sum256-4       0.000 ± 0%     0.000 ± 0%       ~ (p=1.000 n=10) ¹
geomean                          ²               +0.00%                ²
¹ all samples are equal
² summaries must be >0 to compute geomean

Fixes #50543.

Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
Reviewed-on: https://go-review.googlesource.com/c/go/+/408795
Reviewed-by: Paulo Gomes <paulo.g...@gmail.com>
TryBot-Result: Gopher Robot <go...@golang.org>
Run-TryBot: Russ Cox <r...@golang.org>
Reviewed-by: Alan Donovan <adon...@google.com>
Auto-Submit: Russ Cox <r...@golang.org>
Reviewed-by: Russ Cox <r...@golang.org>


---
M src/crypto/sha256/sha256block_amd64.go
M src/crypto/sha256/sha256block_amd64.s

2 files changed, 152 insertions(+), 9 deletions(-)

diff --git a/src/crypto/sha256/sha256block_amd64.go b/src/crypto/sha256/sha256block_amd64.go
index 27464e2..b5d2c9b 100644
--- a/src/crypto/sha256/sha256block_amd64.go
+++ b/src/crypto/sha256/sha256block_amd64.go
@@ -7,3 +7,4 @@
 import "internal/cpu"
 
 var useAVX2 = cpu.X86.HasAVX2 && cpu.X86.HasBMI2
+var useSHA = useAVX2 && cpu.X86.HasSHA
diff --git a/src/crypto/sha256/sha256block_amd64.s b/src/crypto/sha256/sha256block_amd64.s

index f6af47c..03535fb 100644
--- a/src/crypto/sha256/sha256block_amd64.s
+++ b/src/crypto/sha256/sha256block_amd64.s
@@ -179,7 +179,7 @@
 
 #define XFER  Y9
 
-#define BYTE_FLIP_MASK 	Y13 // mask to convert LE -> BE
+#define BYTE_FLIP_MASK	Y13 // mask to convert LE -> BE
 #define X_BYTE_FLIP_MASK X13
 
 #define NUM_BYTES DX
@@ -232,14 +232,14 @@
 	RORXL    $13, a, T1;                  \ // T1 = a >> 13			// S0B
 	;                                     \
 	XORL     y1, y0;                      \ // y0 = (e>>25) ^ (e>>11)					// S1
-	XORL     g, y2;                       \ // y2 = f^g                              	// CH
+	XORL     g, y2;                       \ // y2 = f^g	// CH
 	VPADDD   XDWORD0, XTMP0, XTMP0;       \ // XTMP0 = W[-7] + W[-16]	// y1 = (e >> 6)	// S1
 	RORXL    $6, e, y1;                   \ // y1 = (e >> 6)						// S1
 	;                                     \
 	ANDL     e, y2;                       \ // y2 = (f^g)&e                         // CH
 	XORL     y1, y0;                      \ // y0 = (e>>25) ^ (e>>11) ^ (e>>6)		// S1
 	RORXL    $22, a, y1;                  \ // y1 = a >> 22							// S0A
-	ADDL     h, d;                        \ // d = k + w + h + d                     	// --
+	ADDL     h, d;                        \ // d = k + w + h + d	// --
 	;                                     \
 	ANDL     b, y3;                       \ // y3 = (a|c)&b							// MAJA
 	VPALIGNR $4, XDWORD0, XDWORD1, XTMP1; \ // XTMP1 = W[-15]
@@ -270,7 +270,7 @@
 	MOVL    a, y3;                       \ // y3 = a                       // MAJA
 	RORXL   $25, e, y0;                  \ // y0 = e >> 25					// S1A
 	RORXL   $11, e, y1;                  \ // y1 = e >> 11					// S1B
-	ADDL    (disp + 1*4)(SP)(SRND*1), h; \ // h = k + w + h         		// --
+	ADDL    (disp + 1*4)(SP)(SRND*1), h; \ // h = k + w + h		// --
 	ORL     c, y3;                       \ // y3 = a|c						// MAJA
 	;                                    \
 	VPSRLD  $3, XTMP1, XTMP4;            \ // XTMP4 = W[-15] >> 3
@@ -316,7 +316,7 @@
 	;                                    \
 	MOVL    a, y3;                       \ // y3 = a							// MAJA
 	RORXL   $25, e, y0;                  \ // y0 = e >> 25						// S1A
-	ADDL    (disp + 2*4)(SP)(SRND*1), h; \ // h = k + w + h        			// --
+	ADDL    (disp + 2*4)(SP)(SRND*1), h; \ // h = k + w + h			// --
 	;                                    \
 	VPSRLQ  $19, XTMP2, XTMP3;           \ // XTMP3 = W[-2] ror 19 {xBxA}
 	RORXL   $11, e, y1;                  \ // y1 = e >> 11						// S1B
@@ -495,7 +495,7 @@
 	;                                  \
 	XORL  T1, y1;                      \ // y1 = (a>>22) ^ (a>>13)				// S0
 	RORXL $2, a, T1;                   \ // T1 = (a >> 2)						// S0
-	ADDL  (disp + 2*4)(SP)(SRND*1), h; \ // h = k + w + h 	// --
+	ADDL  (disp + 2*4)(SP)(SRND*1), h; \ // h = k + w + h	// --
 	ORL   c, y3;                       \ // y3 = a|c								// MAJA
 	;                                  \
 	XORL  T1, y1;                      \ // y1 = (a>>22) ^ (a>>13) ^ (a>>2)		// S0
@@ -531,7 +531,7 @@
 	;                                  \
 	XORL  T1, y1;                      \ // y1 = (a>>22) ^ (a>>13)				// S0
 	RORXL $2, a, T1;                   \ // T1 = (a >> 2)						// S0
-	ADDL  (disp + 3*4)(SP)(SRND*1), h; \ // h = k + w + h 	// --
+	ADDL  (disp + 3*4)(SP)(SRND*1), h; \ // h = k + w + h	// --
 	ORL   c, y3;                       \ // y3 = a|c								// MAJA
 	;                                  \
 	XORL  T1, y1;                      \ // y1 = (a>>22) ^ (a>>13) ^ (a>>2)		// S0

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Reply all

Reply to author

Forward