[go] crypto/sha256: add sha-ni implementation

104 views
Skip to first unread message

Ted Painter (Gerrit)

unread,
May 25, 2022, 8:44:42 PM5/25/22
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Ted Painter has uploaded this change for review.

View Change

crypto/sha256: add sha-ni implementation

Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
---
M src/crypto/sha256/sha256block_amd64.go
M src/crypto/sha256/sha256block_amd64.s
2 files changed, 154 insertions(+), 2 deletions(-)

diff --git a/src/crypto/sha256/sha256block_amd64.go b/src/crypto/sha256/sha256block_amd64.go
index 27464e2..b5d2c9b 100644
--- a/src/crypto/sha256/sha256block_amd64.go
+++ b/src/crypto/sha256/sha256block_amd64.go
@@ -7,3 +7,4 @@
import "internal/cpu"

var useAVX2 = cpu.X86.HasAVX2 && cpu.X86.HasBMI2
+var useSHA = useAVX2 && cpu.X86.HasSHA
diff --git a/src/crypto/sha256/sha256block_amd64.s b/src/crypto/sha256/sha256block_amd64.s
index f6af47c..c0ed494 100644
--- a/src/crypto/sha256/sha256block_amd64.s
+++ b/src/crypto/sha256/sha256block_amd64.s
@@ -550,9 +550,80 @@
; \
ADDL y3, h // h = t1 + S0 + MAJ // --

+// Definitions for sha-ni version
+//
+// The sha-ni implementation uses Intel(R) SHA extensions SHA256RNDS2, SHA256MSG1, SHA256MSG2
+// It also reuses portions of the flip_mask (half) and K256 table (stride 32) from the avx2 version
+//
+// Reference
+// S. Gulley, et al, "New Instructions Supporting the Secure Hash
+// Algorithm on Intel® Architecture Processors", July 2013
+// https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
+//
+
+#define digestPtr DI // input/output, base pointer to digest hash vector H0, H1, ..., H7
+#define dataPtr SI // input, base pointer to first input data block
+#define numBytes DX // input, number of input bytes to be processed
+#define sha256Constants AX // round contants from K256 table, indexed by round number x 32
+#define msg X0 // input data
+#define state0 X1 // round intermediates and outputs
+#define state1 X2
+#define m0 X3 // m0, m1,... m4 -- round message temps
+#define m1 X4
+#define m2 X5
+#define m3 X6
+#define m4 X7
+#define shufMask X8 // input data endian conversion control mask
+#define abefSave X9 // digest hash vector inter-block buffer abef
+#define cdghSave X10 // digest hash vector inter-block buffer cdgh
+
+#define nop(m,a) // nop instead of final SHA256MSG1 for first and last few rounds
+
+#define sha256msg1(m,a) \ // final SHA256MSG1 for middle rounds that require it
+ SHA256MSG1 m, a
+
+#define vmov(a,b) \ // msg copy for all but rounds 12-15
+ VMOVDQA a, b
+
+#define vmovrev(a,b) \ // reverse copy for rounds 12-15
+ VMOVDQA b, a
+
+// sha rounds 0 to 11
+// identical with the exception of the final msg op
+// which is replaced with a nop for rounds where it is not needed
+// refer to Gulley, et al for more information
+#define rounds0to11(m,a,c,sha256Msg1) \
+ VMOVDQU c*16(dataPtr), msg \
+ PSHUFB shufMask, msg \
+ VMOVDQA msg, m \
+ PADDD (c*32)(sha256Constants), msg \
+ SHA256RNDS2 msg, state0, state1 \
+ PSHUFD $0x0e, msg, msg \
+ SHA256RNDS2 msg, state1, state0 \
+ sha256Msg1 (m,a)
+
+// sha rounds 12 to 59
+// identical with the exception of the final msg op
+// and the reverse copy(m,msg) in round 12 which is required
+// after the last data load
+// refer to Gulley, et al for more information
+#define rounds12to59(m,c,a,t,sha256Msg1,movop) \
+ movop (m,msg) \
+ PADDD (c*32)(sha256Constants), msg \
+ SHA256RNDS2 msg, state0, state1 \
+ VMOVDQA m, m4 \
+ PALIGNR $4, a, m4 \
+ PADDD m4, t \
+ SHA256MSG2 m, t \
+ PSHUFD $0x0e, msg, msg \
+ SHA256RNDS2 msg, state1, state0 \
+ sha256Msg1 (m,a)
+
TEXT ·block(SB), 0, $536-32
- CMPB ·useAVX2(SB), $1
- JE avx2
+ CMPB ·useSHA(SB), $1
+ JE sha_ni
+ CMPB ·useAVX2(SB), $1
+ JE avx2

MOVQ p_base+8(FP), SI
MOVQ p_len+16(FP), DX
@@ -862,6 +933,77 @@
VZEROUPPER
RET

+sha_ni:
+ MOVQ dig+0(FP), digestPtr // init digest hash vector H0, H1,..., H7 pointer
+ MOVQ p_base+8(FP), dataPtr // init input data base pointer
+ MOVQ p_len+16(FP), numBytes // get number of input bytes to hash
+ SHRQ $6, numBytes // force modulo 64 input buffer length
+ SHLQ $6, numBytes
+ CMPQ numBytes, $0 // exit early for zero-length input buffer
+ JEQ done
+ ADDQ dataPtr, numBytes // point numBytes to end of input buffer
+ VMOVDQU (0*16)(digestPtr), state0 // load initial hash values and reorder
+ VMOVDQU (1*16)(digestPtr), state1 // DCBA, HGFE -> ABEF, CDGH
+ PSHUFD $0xb1, state0, state0 // CDAB
+ PSHUFD $0x1b, state1, state1 // EFGH
+ VMOVDQA state0, m4
+ PALIGNR $8, state1, state0 // ABEF
+ PBLENDW $0xf0, m4, state1 // CDGH
+ VMOVDQA flip_mask<>(SB), shufMask
+ LEAQ K256<>(SB), sha256Constants
+
+roundLoop:
+ // save hash values for addition after rounds
+ VMOVDQA state0, abefSave
+ VMOVDQA state1, cdghSave
+
+ // do rounds 0-59
+ rounds0to11 (m0,-,0,nop) // 0-3
+ rounds0to11 (m1,m0,1,sha256msg1) // 4-7
+ rounds0to11 (m2,m1,2,sha256msg1) // 8-11
+ VMOVDQU (3*16)(dataPtr), msg
+ PSHUFB shufMask, msg
+ rounds12to59 (m3,3,m2,m0,sha256msg1,vmovrev) // 12-15
+ rounds12to59 (m0,4,m3,m1,sha256msg1,vmov) // 16-19
+ rounds12to59 (m1,5,m0,m2,sha256msg1,vmov) // 20-23
+ rounds12to59 (m2,6,m1,m3,sha256msg1,vmov) // 24-27
+ rounds12to59 (m3,7,m2,m0,sha256msg1,vmov) // 28-31
+ rounds12to59 (m0,8,m3,m1,sha256msg1,vmov) // 32-35
+ rounds12to59 (m1,9,m0,m2,sha256msg1,vmov) // 36-39
+ rounds12to59 (m2,10,m1,m3,sha256msg1,vmov) // 40-43
+ rounds12to59 (m3,11,m2,m0,sha256msg1,vmov) // 44-47
+ rounds12to59 (m0,12,m3,m1,sha256msg1,vmov) // 48-51
+ rounds12to59 (m1,13,m0,m2,nop,vmov) // 52-55
+ rounds12to59 (m2,14,m1,m3,nop,vmov) // 56-59
+
+ // do rounds 60-63
+ VMOVDQA m3, msg
+ PADDD (15*32)(sha256Constants), msg
+ SHA256RNDS2 msg, state0, state1
+ PSHUFD $0x0e, msg, msg
+ SHA256RNDS2 msg, state1, state0
+
+ // add current hash values with previously saved
+ PADDD abefSave, state0
+ PADDD cdghSave, state1
+
+ // advance data pointer; loop until buffer empty
+ ADDQ $64, dataPtr
+ CMPQ numBytes, dataPtr
+ JNE roundLoop
+
+ // write hash values back in the correct order
+ PSHUFD $0x1b, state0, state0 // FEBA
+ PSHUFD $0xb1, state1, state1 // DCHG
+ VMOVDQA state0, m4
+ PBLENDW $0xf0, state1, state0 // DCBA
+ PALIGNR $8, m4, state1 // HGFE
+ VMOVDQU state0, (0*16)(digestPtr)
+ VMOVDQU state1, (1*16)(digestPtr)
+
+done:
+ RET
+
// shuffle byte order from LE to BE
DATA flip_mask<>+0x00(SB)/8, $0x0405060700010203
DATA flip_mask<>+0x08(SB)/8, $0x0c0d0e0f08090a0b

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
Gerrit-Change-Number: 408795
Gerrit-PatchSet: 1
Gerrit-Owner: Ted Painter <ted.p...@intel.com>
Gerrit-MessageType: newchange

Alexandru Matei (Gerrit)

unread,
Nov 2, 2022, 2:32:02 PM11/2/22
to Ted Painter, goph...@pubsubhelper.golang.org, Filippo Valsorda, Adam Langley, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Ted Painter.

View Change

1 comment:

  • Patchset:

    • Patch Set #1:

      Any info regarding when this patch will be merged? There are multiple open source projects that would greatly benefit from this, like containerd when pulling images, k8s and others.

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
Gerrit-Change-Number: 408795
Gerrit-PatchSet: 1
Gerrit-Owner: Ted Painter <ted.p...@intel.com>
Gerrit-Reviewer: Filippo Valsorda <fil...@golang.org>
Gerrit-CC: Adam Langley <a...@golang.org>
Gerrit-CC: Alexandru Matei <alexand...@uipath.com>
Gerrit-CC: Gopher Robot <go...@golang.org>
Gerrit-Attention: Ted Painter <ted.p...@intel.com>
Gerrit-Attention: Filippo Valsorda <fil...@golang.org>
Gerrit-Comment-Date: Wed, 02 Nov 2022 11:26:50 +0000
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Gerrit-MessageType: comment

Paulo Gomes (Gerrit)

unread,
Nov 8, 2022, 10:53:59 AM11/8/22
to Ted Painter, goph...@pubsubhelper.golang.org, Alexandru Matei, Filippo Valsorda, Adam Langley, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Ted Painter.

Patch set 1:Code-Review +1

View Change

2 comments:

  • Patchset:

    • Patch Set #1:

      This looks good for me, apart from some minor formatting issues.
      I have tested this on i7-11800H and it worked as expected, with the performance improvements below:

      ```
      goos: linux
      goarch: amd64
      cpu: 11th Gen Intel(R) Core(TM) i7-11800H @ 2.30GHz
      Benchmark_SHA256_BEFORE-16 419570 2765 ns/op
      Benchmark_SHA256_AFTER-16 1487156 786 ns/op
      ```

  • File src/crypto/sha256/sha256block_amd64.s:

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
Gerrit-Change-Number: 408795
Gerrit-PatchSet: 1
Gerrit-Owner: Ted Painter <ted.p...@intel.com>
Gerrit-Reviewer: Filippo Valsorda <fil...@golang.org>
Gerrit-Reviewer: Paulo Gomes <paulo.g...@gmail.com>
Gerrit-CC: Adam Langley <a...@golang.org>
Gerrit-CC: Alexandru Matei <alexand...@uipath.com>
Gerrit-CC: Gopher Robot <go...@golang.org>
Gerrit-Attention: Ted Painter <ted.p...@intel.com>
Gerrit-Attention: Filippo Valsorda <fil...@golang.org>
Gerrit-Comment-Date: Mon, 07 Nov 2022 20:55:45 +0000
Gerrit-HasComments: Yes
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment

Ted Painter (Gerrit)

unread,
Nov 17, 2022, 8:49:04 AM11/17/22
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Ted Painter.

Ted Painter uploaded patch set #2 to this change.

View Change

crypto/sha256: add sha-ni implementation

Fixes #957


Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
---
M src/crypto/sha256/sha256block_amd64.go
M src/crypto/sha256/sha256block_amd64.s
2 files changed, 156 insertions(+), 2 deletions(-)

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
Gerrit-Change-Number: 408795
Gerrit-PatchSet: 2
Gerrit-Owner: Ted Painter <ted.p...@intel.com>
Gerrit-Reviewer: Filippo Valsorda <fil...@golang.org>
Gerrit-Reviewer: Paulo Gomes <paulo.g...@gmail.com>
Gerrit-CC: Adam Langley <a...@golang.org>
Gerrit-CC: Alexandru Matei <alexand...@uipath.com>
Gerrit-CC: Gopher Robot <go...@golang.org>
Gerrit-Attention: Ted Painter <ted.p...@intel.com>
Gerrit-Attention: Filippo Valsorda <fil...@golang.org>
Gerrit-MessageType: newpatchset

Alan Donovan (Gerrit)

unread,
Mar 22, 2023, 5:58:07 PM3/22/23
to Ted Painter, goph...@pubsubhelper.golang.org, Paulo Gomes, Alexandru Matei, Filippo Valsorda, Adam Langley, Gopher Robot, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Ted Painter.

View Change

1 comment:

  • Patchset:

    • Patch Set #2:

      Thanks for sharing this CL. Is this stalled for want of a reviewer, or is it waiting for the proposal approval (or something else)?

To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
Gerrit-Change-Number: 408795
Gerrit-PatchSet: 2
Gerrit-Owner: Ted Painter <ted.p...@intel.com>
Gerrit-Reviewer: Filippo Valsorda <fil...@golang.org>
Gerrit-Reviewer: Paulo Gomes <paulo.g...@gmail.com>
Gerrit-CC: Adam Langley <a...@golang.org>
Gerrit-CC: Alan Donovan <adon...@google.com>
Gerrit-CC: Alexandru Matei <alexand...@uipath.com>
Gerrit-CC: Gopher Robot <go...@golang.org>
Gerrit-Attention: Ted Painter <ted.p...@intel.com>
Gerrit-Attention: Filippo Valsorda <fil...@golang.org>
Gerrit-Comment-Date: Wed, 22 Mar 2023 21:58:03 +0000

Alan Donovan (Gerrit)

unread,
Mar 31, 2023, 11:37:57 AM3/31/23
to Russ Cox, Ted Painter, goph...@pubsubhelper.golang.org, Gopher Robot, Paulo Gomes, Alexandru Matei, Filippo Valsorda, Adam Langley, golang-co...@googlegroups.com

Attention is currently required from: Filippo Valsorda, Paulo Gomes, Russ Cox, Ted Painter.

Patch set 4:Code-Review +1

View Change

    To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

    Gerrit-Project: go
    Gerrit-Branch: master
    Gerrit-Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
    Gerrit-Change-Number: 408795
    Gerrit-PatchSet: 4
    Gerrit-Owner: Ted Painter <ted.p...@intel.com>
    Gerrit-Reviewer: Alan Donovan <adon...@google.com>
    Gerrit-Reviewer: Filippo Valsorda <fil...@golang.org>
    Gerrit-Reviewer: Gopher Robot <go...@golang.org>
    Gerrit-Reviewer: Paulo Gomes <paulo.g...@gmail.com>
    Gerrit-Reviewer: Russ Cox <r...@golang.org>
    Gerrit-CC: Adam Langley <a...@golang.org>
    Gerrit-CC: Alexandru Matei <alexand...@uipath.com>
    Gerrit-Attention: Paulo Gomes <paulo.g...@gmail.com>
    Gerrit-Attention: Russ Cox <r...@golang.org>
    Gerrit-Attention: Ted Painter <ted.p...@intel.com>
    Gerrit-Attention: Filippo Valsorda <fil...@golang.org>
    Gerrit-Comment-Date: Fri, 31 Mar 2023 15:37:54 +0000
    Gerrit-HasComments: No
    Gerrit-Has-Labels: Yes
    Gerrit-MessageType: comment

    Alan Donovan (Gerrit)

    unread,
    Mar 31, 2023, 11:54:59 AM3/31/23
    to Russ Cox, Ted Painter, goph...@pubsubhelper.golang.org, Gopher Robot, Paulo Gomes, Alexandru Matei, Filippo Valsorda, Adam Langley, golang-co...@googlegroups.com

    Attention is currently required from: Filippo Valsorda, Paulo Gomes, Russ Cox, Ted Painter.

    Patch set 4:Code-Review +2

    View Change

      To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

      Gerrit-Project: go
      Gerrit-Branch: master
      Gerrit-Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
      Gerrit-Change-Number: 408795
      Gerrit-PatchSet: 4
      Gerrit-Owner: Ted Painter <ted.p...@intel.com>
      Gerrit-Reviewer: Alan Donovan <adon...@google.com>
      Gerrit-Reviewer: Filippo Valsorda <fil...@golang.org>
      Gerrit-Reviewer: Gopher Robot <go...@golang.org>
      Gerrit-Reviewer: Paulo Gomes <paulo.g...@gmail.com>
      Gerrit-Reviewer: Russ Cox <r...@golang.org>
      Gerrit-CC: Adam Langley <a...@golang.org>
      Gerrit-CC: Alexandru Matei <alexand...@uipath.com>
      Gerrit-Attention: Paulo Gomes <paulo.g...@gmail.com>
      Gerrit-Attention: Russ Cox <r...@golang.org>
      Gerrit-Attention: Ted Painter <ted.p...@intel.com>
      Gerrit-Attention: Filippo Valsorda <fil...@golang.org>
      Gerrit-Comment-Date: Fri, 31 Mar 2023 15:54:56 +0000

      Gopher Robot (Gerrit)

      unread,
      Mar 31, 2023, 11:55:29 AM3/31/23
      to Ted Painter, goph...@pubsubhelper.golang.org, golang-...@googlegroups.com, Alan Donovan, Russ Cox, Paulo Gomes, Alexandru Matei, Filippo Valsorda, Adam Langley, golang-co...@googlegroups.com

      Gopher Robot submitted this change.

      View Change

      Approvals: Russ Cox: Looks good to me, approved; Run TryBots; Automatically submit change Alan Donovan: Looks good to me, approved Gopher Robot: TryBots succeeded Paulo Gomes: Looks good to me, but someone else must approve
      crypto/sha256: add sha-ni implementation

      goos: linux
      goarch: amd64
      pkg: crypto/sha256
      cpu: 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz
      │ bench.old │ bench.new
      │ sec/op │ sec/op vs base │
      Hash8Bytes/New-4 169.20n ± 7% 65.40n ± 5% -61.35% (p=0.000 n=10)
      Hash8Bytes/Sum224-4 166.10n ± 3% 65.20n ± 8% -60.74% (p=0.000 n=10)
      Hash8Bytes/Sum256-4 168.50n ± 6% 63.58n ± 7% -62.27% (p=0.000 n=10)
      Hash1K/New-4 2275.5n ± 5% 618.5n ± 2% -72.82% (p=0.000 n=10)
      Hash1K/Sum224-4 2364.5n ± 1% 618.1n ± 1% -73.86% (p=0.000 n=10)
      Hash1K/Sum256-4 2338.5n ± 2% 613.0n ± 2% -73.79% (p=0.000 n=10)
      Hash8K/New-4 17.530µ ± 2% 4.501µ ± 1% -74.33% (p=0.000 n=10)
      Hash8K/Sum224-4 17.456µ ± 2% 4.505µ ± 1% -74.19% (p=0.000 n=10)
      Hash8K/Sum256-4 17.417µ ± 2% 4.504µ ± 1% -74.14% (p=0.000 n=10)
      geomean 1.897µ 564.3n -70.25%

      │ bench.old │ bench.new
      │ B/s │ B/s vs base │
      Hash8Bytes/New-4 45.11Mi ± 6% 116.66Mi ± 5% +158.62% (p=0.000 n=10)
      Hash8Bytes/Sum224-4 45.92Mi ± 3% 117.04Mi ± 8% +154.89% (p=0.000 n=10)
      Hash8Bytes/Sum256-4 45.29Mi ± 6% 120.00Mi ± 7% +164.99% (p=0.000 n=10)
      Hash1K/New-4 429.2Mi ± 5% 1578.9Mi ± 2% +267.92% (p=0.000 n=10)
      Hash1K/Sum224-4 413.0Mi ± 1% 1579.8Mi ± 1% +282.49% (p=0.000 n=10)
      Hash1K/Sum256-4 417.6Mi ± 1% 1593.1Mi ± 2% +281.53% (p=0.000 n=10)
      Hash8K/New-4 445.7Mi ± 1% 1735.9Mi ± 1% +289.50% (p=0.000 n=10)
      Hash8K/Sum224-4 447.6Mi ± 2% 1734.5Mi ± 1% +287.54% (p=0.000 n=10)
      Hash8K/Sum256-4 448.6Mi ± 2% 1734.8Mi ± 1% +286.75% (p=0.000 n=10)
      geomean 204.3Mi 686.8Mi +236.11%

      │ bench.old │ bench.new
      │ B/op │ B/op vs base │
      Hash8Bytes/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8Bytes/Sum224-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8Bytes/Sum256-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash1K/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash1K/Sum224-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash1K/Sum256-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8K/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8K/Sum224-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8K/Sum256-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      geomean ² +0.00% ²
      ¹ all samples are equal
      ² summaries must be >0 to compute geomean

      │ bench.old │ bench.new
      │ allocs/op │ allocs/op vs base │
      Hash8Bytes/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8Bytes/Sum224-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8Bytes/Sum256-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash1K/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash1K/Sum224-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash1K/Sum256-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8K/New-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8K/Sum224-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      Hash8K/Sum256-4 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹
      geomean ² +0.00% ²
      ¹ all samples are equal
      ² summaries must be >0 to compute geomean

      Fixes #50543.

      Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
      Reviewed-on: https://go-review.googlesource.com/c/go/+/408795
      Reviewed-by: Paulo Gomes <paulo.g...@gmail.com>
      TryBot-Result: Gopher Robot <go...@golang.org>
      Run-TryBot: Russ Cox <r...@golang.org>
      Reviewed-by: Alan Donovan <adon...@google.com>
      Auto-Submit: Russ Cox <r...@golang.org>
      Reviewed-by: Russ Cox <r...@golang.org>

      ---
      M src/crypto/sha256/sha256block_amd64.go
      M src/crypto/sha256/sha256block_amd64.s
      2 files changed, 152 insertions(+), 9 deletions(-)

      diff --git a/src/crypto/sha256/sha256block_amd64.go b/src/crypto/sha256/sha256block_amd64.go
      index 27464e2..b5d2c9b 100644
      --- a/src/crypto/sha256/sha256block_amd64.go
      +++ b/src/crypto/sha256/sha256block_amd64.go
      @@ -7,3 +7,4 @@
      import "internal/cpu"

      var useAVX2 = cpu.X86.HasAVX2 && cpu.X86.HasBMI2
      +var useSHA = useAVX2 && cpu.X86.HasSHA
      diff --git a/src/crypto/sha256/sha256block_amd64.s b/src/crypto/sha256/sha256block_amd64.s
      index f6af47c..03535fb 100644
      --- a/src/crypto/sha256/sha256block_amd64.s
      +++ b/src/crypto/sha256/sha256block_amd64.s
      @@ -179,7 +179,7 @@

      #define XFER Y9

      -#define BYTE_FLIP_MASK Y13 // mask to convert LE -> BE
      +#define BYTE_FLIP_MASK Y13 // mask to convert LE -> BE
      #define X_BYTE_FLIP_MASK X13

      #define NUM_BYTES DX
      @@ -232,14 +232,14 @@
      RORXL $13, a, T1; \ // T1 = a >> 13 // S0B
      ; \
      XORL y1, y0; \ // y0 = (e>>25) ^ (e>>11) // S1
      - XORL g, y2; \ // y2 = f^g // CH
      + XORL g, y2; \ // y2 = f^g // CH
      VPADDD XDWORD0, XTMP0, XTMP0; \ // XTMP0 = W[-7] + W[-16] // y1 = (e >> 6) // S1
      RORXL $6, e, y1; \ // y1 = (e >> 6) // S1
      ; \
      ANDL e, y2; \ // y2 = (f^g)&e // CH
      XORL y1, y0; \ // y0 = (e>>25) ^ (e>>11) ^ (e>>6) // S1
      RORXL $22, a, y1; \ // y1 = a >> 22 // S0A
      - ADDL h, d; \ // d = k + w + h + d // --
      + ADDL h, d; \ // d = k + w + h + d // --
      ; \
      ANDL b, y3; \ // y3 = (a|c)&b // MAJA
      VPALIGNR $4, XDWORD0, XDWORD1, XTMP1; \ // XTMP1 = W[-15]
      @@ -270,7 +270,7 @@
      MOVL a, y3; \ // y3 = a // MAJA
      RORXL $25, e, y0; \ // y0 = e >> 25 // S1A
      RORXL $11, e, y1; \ // y1 = e >> 11 // S1B
      - ADDL (disp + 1*4)(SP)(SRND*1), h; \ // h = k + w + h // --
      + ADDL (disp + 1*4)(SP)(SRND*1), h; \ // h = k + w + h // --
      ORL c, y3; \ // y3 = a|c // MAJA
      ; \
      VPSRLD $3, XTMP1, XTMP4; \ // XTMP4 = W[-15] >> 3
      @@ -316,7 +316,7 @@
      ; \
      MOVL a, y3; \ // y3 = a // MAJA
      RORXL $25, e, y0; \ // y0 = e >> 25 // S1A
      - ADDL (disp + 2*4)(SP)(SRND*1), h; \ // h = k + w + h // --
      + ADDL (disp + 2*4)(SP)(SRND*1), h; \ // h = k + w + h // --
      ; \
      VPSRLQ $19, XTMP2, XTMP3; \ // XTMP3 = W[-2] ror 19 {xBxA}
      RORXL $11, e, y1; \ // y1 = e >> 11 // S1B
      @@ -495,7 +495,7 @@
      ; \
      XORL T1, y1; \ // y1 = (a>>22) ^ (a>>13) // S0
      RORXL $2, a, T1; \ // T1 = (a >> 2) // S0
      - ADDL (disp + 2*4)(SP)(SRND*1), h; \ // h = k + w + h // --
      + ADDL (disp + 2*4)(SP)(SRND*1), h; \ // h = k + w + h // --
      ORL c, y3; \ // y3 = a|c // MAJA
      ; \
      XORL T1, y1; \ // y1 = (a>>22) ^ (a>>13) ^ (a>>2) // S0
      @@ -531,7 +531,7 @@
      ; \
      XORL T1, y1; \ // y1 = (a>>22) ^ (a>>13) // S0
      RORXL $2, a, T1; \ // T1 = (a >> 2) // S0
      - ADDL (disp + 3*4)(SP)(SRND*1), h; \ // h = k + w + h // --
      + ADDL (disp + 3*4)(SP)(SRND*1), h; \ // h = k + w + h // --
      ORL c, y3; \ // y3 = a|c // MAJA
      ; \
      XORL T1, y1; \ // y1 = (a>>22) ^ (a>>13) ^ (a>>2) // S0

      To view, visit change 408795. To unsubscribe, or for help writing mail filters, visit settings.

      Gerrit-Project: go
      Gerrit-Branch: master
      Gerrit-Change-Id: Ie9783647fe82f40fcbd91989a96a24f2d3d5b9a0
      Gerrit-Change-Number: 408795
      Gerrit-PatchSet: 5
      Gerrit-Owner: Ted Painter <ted.p...@intel.com>
      Gerrit-Reviewer: Alan Donovan <adon...@google.com>
      Gerrit-Reviewer: Filippo Valsorda <fil...@golang.org>
      Gerrit-Reviewer: Gopher Robot <go...@golang.org>
      Gerrit-Reviewer: Paulo Gomes <paulo.g...@gmail.com>
      Gerrit-Reviewer: Russ Cox <r...@golang.org>
      Gerrit-CC: Adam Langley <a...@golang.org>
      Gerrit-CC: Alexandru Matei <alexand...@uipath.com>
      Gerrit-MessageType: merged
      Reply all
      Reply to author
      Forward
      0 new messages