[go] crypto/sha1: provide optimised assembly for riscv64

3 views
Skip to first unread message

Julian Zhu (Gerrit)

unread,
Dec 24, 2025, 8:11:34 AM (19 hours ago) Dec 24
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Julian Zhu has uploaded the change for review

Commit message

crypto/sha1: provide optimised assembly for riscv64

Provide an optimised assembly implementation of sha1 for riscv64.

goos: linux
goarch: riscv64
pkg: crypto/sha1
cpu: Spacemit(R) X60
│ oldsha1 │ newsha1 │
│ sec/op │ sec/op vs base │
Hash8Bytes/New-8 2.136µ ± 0% 1.173µ ± 0% -45.09% (p=0.000 n=8)
Hash8Bytes/Sum-8 2.079µ ± 0% 1.116µ ± 0% -46.32% (p=0.000 n=8)
Hash320Bytes/New-8 10.704µ ± 0% 4.954µ ± 0% -53.72% (p=0.000 n=8)
Hash320Bytes/Sum-8 10.645µ ± 0% 4.872µ ± 0% -54.23% (p=0.000 n=8)
Hash1K/New-8 29.66µ ± 0% 13.38µ ± 0% -54.90% (p=0.000 n=8)
Hash1K/Sum-8 29.63µ ± 0% 13.24µ ± 0% -55.32% (p=0.000 n=8)
Hash8K/New-8 226.8µ ± 1% 104.7µ ± 2% -53.84% (p=0.000 n=8)
Hash8K/Sum-8 226.7µ ± 1% 102.9µ ± 1% -54.62% (p=0.000 n=8)
geomean 19.72µ 9.387µ -52.40%

│ oldsha1 │ newsha1 │
│ B/s │ B/s vs base │
Hash8Bytes/New-8 3.572Mi ± 0% 6.504Mi ± 0% +82.11% (p=0.000 n=8)
Hash8Bytes/Sum-8 3.672Mi ± 0% 6.838Mi ± 0% +86.23% (p=0.000 n=8)
Hash320Bytes/New-8 28.51Mi ± 0% 61.60Mi ± 0% +116.02% (p=0.000 n=8)
Hash320Bytes/Sum-8 28.67Mi ± 0% 62.64Mi ± 0% +118.51% (p=0.000 n=8)
Hash1K/New-8 32.92Mi ± 0% 73.00Mi ± 0% +121.74% (p=0.000 n=8)
Hash1K/Sum-8 32.96Mi ± 0% 73.76Mi ± 0% +123.78% (p=0.000 n=8)
Hash8K/New-8 34.44Mi ± 1% 74.61Mi ± 2% +116.61% (p=0.000 n=8)
Hash8K/Sum-8 34.46Mi ± 1% 75.93Mi ± 1% +120.37% (p=0.000 n=8)
geomean 18.51Mi 38.89Mi +110.07%
Change-Id: I3d4d05fe19872412fdf77a337395e0bf84c41dd5

Change diff

diff --git a/src/crypto/sha1/sha1block_decl.go b/src/crypto/sha1/sha1block_decl.go
index 887d8ca..f32008a 100644
--- a/src/crypto/sha1/sha1block_decl.go
+++ b/src/crypto/sha1/sha1block_decl.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

-//go:build (386 || arm || loong64) && !purego
+//go:build (386 || arm || loong64 || riscv64) && !purego

package sha1

diff --git a/src/crypto/sha1/sha1block_generic.go b/src/crypto/sha1/sha1block_generic.go
index 5989a24..5dcfe6a 100644
--- a/src/crypto/sha1/sha1block_generic.go
+++ b/src/crypto/sha1/sha1block_generic.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

-//go:build (!386 && !amd64 && !arm && !arm64 && !loong64 && !s390x) || purego
+//go:build (!386 && !amd64 && !arm && !arm64 && !loong64 && !riscv64 && !s390x) || purego

package sha1

diff --git a/src/crypto/sha1/sha1block_riscv64.s b/src/crypto/sha1/sha1block_riscv64.s
new file mode 100644
index 0000000..3f414dc
--- /dev/null
+++ b/src/crypto/sha1/sha1block_riscv64.s
@@ -0,0 +1,228 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// riscv64 version of sha1block.go
+// derived from crypto/sha1/sha1block_loong64.s
+
+//go:build !purego
+
+#include "textflag.h"
+
+#define LOAD1(index) \
+ MOVBU ((index*4)+0)(X29), X5; \
+ MOVBU ((index*4)+1)(X29), X6; \
+ MOVBU ((index*4)+2)(X29), X7; \
+ MOVBU ((index*4)+3)(X29), X8; \
+ SLL $24, X5; \
+ SLL $16, X6; \
+ OR X5, X6, X5; \
+ SLL $8, X7; \
+ OR X5, X7, X5; \
+ OR X5, X8, X5; \
+ MOVW X5, (index*4)(X19)
+
+#define LOAD(index) \
+ MOVWU (((index)&0xf)*4)(X19), X5; \
+ MOVWU (((index-3)&0xf)*4)(X19), X6; \
+ MOVWU (((index-8)&0xf)*4)(X19), X7; \
+ MOVWU (((index-14)&0xf)*4)(X19), X8; \
+ XOR X6, X5; \
+ XOR X7, X5; \
+ XOR X8, X5; \
+ RORW $31, X5; \
+ MOVW X5, (((index)&0xf)*4)(X19)
+
+// f = d ^ (b & (c ^ d))
+#define FUNC1(a, b, c, d, e) \
+ XOR c, d, X7; \
+ AND b, X7; \
+ XOR d, X7
+
+// f = b ^ c ^ d
+#define FUNC2(a, b, c, d, e) \
+ XOR b, c, X7; \
+ XOR d, X7
+
+// f = (b & c) | ((b | c) & d)
+#define FUNC3(a, b, c, d, e) \
+ OR b, c, X8; \
+ AND b, c, X6; \
+ AND d, X8; \
+ OR X6, X8, X7
+
+#define FUNC4 FUNC2
+
+#define MIX(a, b, c, d, e, key) \
+ RORW $2, b; \
+ ADD X7, e; \
+ RORW $27, a, X8; \
+ ADD X5, e; \
+ ADD key, e; \
+ ADD X8, e
+
+#define ROUND1(a, b, c, d, e, index) \
+ LOAD1(index); \
+ FUNC1(a, b, c, d, e); \
+ MIX(a, b, c, d, e, X15)
+
+#define ROUND1x(a, b, c, d, e, index) \
+ LOAD(index); \
+ FUNC1(a, b, c, d, e); \
+ MIX(a, b, c, d, e, X15)
+
+#define ROUND2(a, b, c, d, e, index) \
+ LOAD(index); \
+ FUNC2(a, b, c, d, e); \
+ MIX(a, b, c, d, e, X16)
+
+#define ROUND3(a, b, c, d, e, index) \
+ LOAD(index); \
+ FUNC3(a, b, c, d, e); \
+ MIX(a, b, c, d, e, X17)
+
+#define ROUND4(a, b, c, d, e, index) \
+ LOAD(index); \
+ FUNC4(a, b, c, d, e); \
+ MIX(a, b, c, d, e, X18)
+
+// func block(dig *Digest, p []byte)
+TEXT ·block(SB),NOSPLIT,$64-32
+ MOV p_base+8(FP), X29
+ MOV p_len+16(FP), X30
+ SRL $6, X30
+ SLL $6, X30
+
+ ADD X29, X30, X28
+ BEQ X28, X29, end
+
+ ADD $8, X2, X19 // message schedule buffer on stack
+
+ MOV dig+0(FP), X20
+ MOVWU (0*4)(X20), X10 // a = H0
+ MOVWU (1*4)(X20), X11 // b = H1
+ MOVWU (2*4)(X20), X12 // c = H2
+ MOVWU (3*4)(X20), X13 // d = H3
+ MOVWU (4*4)(X20), X14 // e = H4
+
+ MOV $·_K(SB), X21
+ MOVW (0*4)(X21), X15
+ MOVW (1*4)(X21), X16
+ MOVW (2*4)(X21), X17
+ MOVW (3*4)(X21), X18
+
+loop:
+ MOVW X10, X22
+ MOVW X11, X23
+ MOVW X12, X24
+ MOVW X13, X25
+ MOVW X14, X26
+
+ ROUND1(X10, X11, X12, X13, X14, 0)
+ ROUND1(X14, X10, X11, X12, X13, 1)
+ ROUND1(X13, X14, X10, X11, X12, 2)
+ ROUND1(X12, X13, X14, X10, X11, 3)
+ ROUND1(X11, X12, X13, X14, X10, 4)
+ ROUND1(X10, X11, X12, X13, X14, 5)
+ ROUND1(X14, X10, X11, X12, X13, 6)
+ ROUND1(X13, X14, X10, X11, X12, 7)
+ ROUND1(X12, X13, X14, X10, X11, 8)
+ ROUND1(X11, X12, X13, X14, X10, 9)
+ ROUND1(X10, X11, X12, X13, X14, 10)
+ ROUND1(X14, X10, X11, X12, X13, 11)
+ ROUND1(X13, X14, X10, X11, X12, 12)
+ ROUND1(X12, X13, X14, X10, X11, 13)
+ ROUND1(X11, X12, X13, X14, X10, 14)
+ ROUND1(X10, X11, X12, X13, X14, 15)
+
+ ROUND1x(X14, X10, X11, X12, X13, 16)
+ ROUND1x(X13, X14, X10, X11, X12, 17)
+ ROUND1x(X12, X13, X14, X10, X11, 18)
+ ROUND1x(X11, X12, X13, X14, X10, 19)
+
+ ROUND2(X10, X11, X12, X13, X14, 20)
+ ROUND2(X14, X10, X11, X12, X13, 21)
+ ROUND2(X13, X14, X10, X11, X12, 22)
+ ROUND2(X12, X13, X14, X10, X11, 23)
+ ROUND2(X11, X12, X13, X14, X10, 24)
+ ROUND2(X10, X11, X12, X13, X14, 25)
+ ROUND2(X14, X10, X11, X12, X13, 26)
+ ROUND2(X13, X14, X10, X11, X12, 27)
+ ROUND2(X12, X13, X14, X10, X11, 28)
+ ROUND2(X11, X12, X13, X14, X10, 29)
+ ROUND2(X10, X11, X12, X13, X14, 30)
+ ROUND2(X14, X10, X11, X12, X13, 31)
+ ROUND2(X13, X14, X10, X11, X12, 32)
+ ROUND2(X12, X13, X14, X10, X11, 33)
+ ROUND2(X11, X12, X13, X14, X10, 34)
+ ROUND2(X10, X11, X12, X13, X14, 35)
+ ROUND2(X14, X10, X11, X12, X13, 36)
+ ROUND2(X13, X14, X10, X11, X12, 37)
+ ROUND2(X12, X13, X14, X10, X11, 38)
+ ROUND2(X11, X12, X13, X14, X10, 39)
+
+ ROUND3(X10, X11, X12, X13, X14, 40)
+ ROUND3(X14, X10, X11, X12, X13, 41)
+ ROUND3(X13, X14, X10, X11, X12, 42)
+ ROUND3(X12, X13, X14, X10, X11, 43)
+ ROUND3(X11, X12, X13, X14, X10, 44)
+ ROUND3(X10, X11, X12, X13, X14, 45)
+ ROUND3(X14, X10, X11, X12, X13, 46)
+ ROUND3(X13, X14, X10, X11, X12, 47)
+ ROUND3(X12, X13, X14, X10, X11, 48)
+ ROUND3(X11, X12, X13, X14, X10, 49)
+ ROUND3(X10, X11, X12, X13, X14, 50)
+ ROUND3(X14, X10, X11, X12, X13, 51)
+ ROUND3(X13, X14, X10, X11, X12, 52)
+ ROUND3(X12, X13, X14, X10, X11, 53)
+ ROUND3(X11, X12, X13, X14, X10, 54)
+ ROUND3(X10, X11, X12, X13, X14, 55)
+ ROUND3(X14, X10, X11, X12, X13, 56)
+ ROUND3(X13, X14, X10, X11, X12, 57)
+ ROUND3(X12, X13, X14, X10, X11, 58)
+ ROUND3(X11, X12, X13, X14, X10, 59)
+
+ ROUND4(X10, X11, X12, X13, X14, 60)
+ ROUND4(X14, X10, X11, X12, X13, 61)
+ ROUND4(X13, X14, X10, X11, X12, 62)
+ ROUND4(X12, X13, X14, X10, X11, 63)
+ ROUND4(X11, X12, X13, X14, X10, 64)
+ ROUND4(X10, X11, X12, X13, X14, 65)
+ ROUND4(X14, X10, X11, X12, X13, 66)
+ ROUND4(X13, X14, X10, X11, X12, 67)
+ ROUND4(X12, X13, X14, X10, X11, 68)
+ ROUND4(X11, X12, X13, X14, X10, 69)
+ ROUND4(X10, X11, X12, X13, X14, 70)
+ ROUND4(X14, X10, X11, X12, X13, 71)
+ ROUND4(X13, X14, X10, X11, X12, 72)
+ ROUND4(X12, X13, X14, X10, X11, 73)
+ ROUND4(X11, X12, X13, X14, X10, 74)
+ ROUND4(X10, X11, X12, X13, X14, 75)
+ ROUND4(X14, X10, X11, X12, X13, 76)
+ ROUND4(X13, X14, X10, X11, X12, 77)
+ ROUND4(X12, X13, X14, X10, X11, 78)
+ ROUND4(X11, X12, X13, X14, X10, 79)
+
+ ADD X22, X10
+ ADD X23, X11
+ ADD X24, X12
+ ADD X25, X13
+ ADD X26, X14
+
+ ADD $64, X29
+ BNE X28, X29, loop
+
+end:
+ MOVW X10, (0*4)(X20)
+ MOVW X11, (1*4)(X20)
+ MOVW X12, (2*4)(X20)
+ MOVW X13, (3*4)(X20)
+ MOVW X14, (4*4)(X20)
+
+ RET
+
+GLOBL ·_K(SB),RODATA,$16
+DATA ·_K+0(SB)/4, $0x5A827999
+DATA ·_K+4(SB)/4, $0x6ED9EBA1
+DATA ·_K+8(SB)/4, $0x8F1BBCDC
+DATA ·_K+12(SB)/4, $0xCA62C1D6

Change information

Files:
  • M src/crypto/sha1/sha1block_decl.go
  • M src/crypto/sha1/sha1block_generic.go
  • A src/crypto/sha1/sha1block_riscv64.s
Change size: M
Delta: 3 files changed, 230 insertions(+), 2 deletions(-)
Open in Gerrit

Related details

Attention set is empty
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: newchange
Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: I3d4d05fe19872412fdf77a337395e0bf84c41dd5
Gerrit-Change-Number: 732560
Gerrit-PatchSet: 1
Gerrit-Owner: Julian Zhu <jz53...@gmail.com>
unsatisfied_requirement
satisfied_requirement
open
diffy

Julian Zhu (Gerrit)

unread,
Dec 24, 2025, 8:56:27 AM (18 hours ago) Dec 24
to goph...@pubsubhelper.golang.org, Joel Sing, Mark Ryan, Filippo Valsorda, Roland Shoemaker, Daniel McCarney, Gopher Robot, golang-co...@googlegroups.com
Attention needed from Filippo Valsorda, Joel Sing and Mark Ryan

Julian Zhu voted Commit-Queue+1

Commit-Queue+1
Open in Gerrit

Related details

Attention is currently required from:
  • Filippo Valsorda
  • Joel Sing
  • Mark Ryan
Submit Requirements:
  • requirement is not satisfiedCode-Review
  • requirement satisfiedNo-Unresolved-Comments
  • requirement is not satisfiedReview-Enforcement
  • requirement is not satisfiedTryBots-Pass
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. DiffyGerrit
Gerrit-MessageType: comment
Gerrit-Project: go
Gerrit-Branch: master
Gerrit-Change-Id: I3d4d05fe19872412fdf77a337395e0bf84c41dd5
Gerrit-Change-Number: 732560
Gerrit-PatchSet: 1
Gerrit-Owner: Julian Zhu <jz53...@gmail.com>
Gerrit-Reviewer: Daniel McCarney <dan...@binaryparadox.net>
Gerrit-Reviewer: Filippo Valsorda <fil...@golang.org>
Gerrit-Reviewer: Joel Sing <jo...@sing.id.au>
Gerrit-Reviewer: Julian Zhu <jz53...@gmail.com>
Gerrit-Reviewer: Mark Ryan <mark...@rivosinc.com>
Gerrit-Reviewer: Roland Shoemaker <rol...@golang.org>
Gerrit-CC: Gopher Robot <go...@golang.org>
Gerrit-Attention: Filippo Valsorda <fil...@golang.org>
Gerrit-Attention: Joel Sing <jo...@sing.id.au>
Gerrit-Attention: Mark Ryan <mark...@rivosinc.com>
Gerrit-Comment-Date: Wed, 24 Dec 2025 13:56:23 +0000
Gerrit-HasComments: No
Gerrit-Has-Labels: Yes
unsatisfied_requirement
satisfied_requirement
open
diffy
Reply all
Reply to author
Forward
0 new messages