cmd/compile,sync/atomic: make Add And & Or SQCST on PPC64
Fixes #79186
diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go
index b2358a9..31c92a4 100644
--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
@@ -135,6 +135,7 @@
// AND/OR Rarg1, Rtmp
// STBCCC/STWCCC Rtmp, (Rarg0)
// BNE -3(PC)
+ // LWSYNC
ld := ppc64.ALBAR
st := ppc64.ASTBCCC
if v.Op == ssa.OpPPC64LoweredAtomicAnd32 || v.Op == ssa.OpPPC64LoweredAtomicOr32 {
@@ -170,6 +171,11 @@
p3 := s.Prog(ppc64.ABNE)
p3.To.Type = obj.TYPE_BRANCH
p3.To.SetTarget(p)
+ // LWSYNC - Provide acquire ordering to pair with the
+ // release (pre-LWSYNC) above, making the operation
+ // sequentially consistent.
+ plwsync2 := s.Prog(ppc64.ALWSYNC)
+ plwsync2.To.Type = obj.TYPE_NONE
case ssa.OpPPC64LoweredAtomicAdd32,
ssa.OpPPC64LoweredAtomicAdd64:
@@ -179,6 +185,7 @@
// STDCCC/STWCCC Rout, (Rarg0)
// BNE -3(PC)
// MOVW Rout,Rout (if Add32)
+ // LWSYNC
ld := ppc64.ALDAR
st := ppc64.ASTDCCC
if v.Op == ssa.OpPPC64LoweredAtomicAdd32 {
@@ -223,6 +230,11 @@
p5.From.Type = obj.TYPE_REG
p5.From.Reg = out
}
+ // LWSYNC - Provide acquire ordering to pair with the
+ // release (pre-LWSYNC) above, making the operation
+ // sequentially consistent.
+ plwsync2 := s.Prog(ppc64.ALWSYNC)
+ plwsync2.To.Type = obj.TYPE_NONE
case ssa.OpPPC64LoweredAtomicExchange8,
ssa.OpPPC64LoweredAtomicExchange32,
diff --git a/src/internal/runtime/atomic/atomic_ppc64x.s b/src/internal/runtime/atomic/atomic_ppc64x.s
index bff7d19..a82a34e 100644
--- a/src/internal/runtime/atomic/atomic_ppc64x.s
+++ b/src/internal/runtime/atomic/atomic_ppc64x.s
@@ -220,6 +220,7 @@
ADD R5, R3
STWCCC R3, (R4)
BNE -3(PC)
+ LWSYNC
MOVW R3, ret+16(FP)
RET
@@ -235,6 +236,7 @@
ADD R5, R3
STDCCC R3, (R4)
BNE -3(PC)
+ LWSYNC
MOVD R3, ret+16(FP)
RET
@@ -343,6 +345,7 @@
OR R4, R6
STBCCC R6, (R3)
BNE again
+ LWSYNC
RET
// void ·And8(byte volatile*, byte);
@@ -355,6 +358,7 @@
AND R4, R6
STBCCC R6, (R3)
BNE again
+ LWSYNC
RET
// func Or(addr *uint32, v uint32)
@@ -367,6 +371,7 @@
OR R4, R6
STWCCC R6, (R3)
BNE again
+ LWSYNC
RET
// func And(addr *uint32, v uint32)
@@ -379,6 +384,7 @@
AND R4, R6
STWCCC R6, (R3)
BNE again
+ LWSYNC
RET
// func Or32(addr *uint32, v uint32) old uint32
@@ -391,6 +397,7 @@
OR R4, R6, R7
STWCCC R7, (R3)
BNE again
+ LWSYNC
MOVW R6, ret+16(FP)
RET
@@ -404,6 +411,7 @@
AND R4, R6, R7
STWCCC R7, (R3)
BNE again
+ LWSYNC
MOVW R6, ret+16(FP)
RET
@@ -417,6 +425,7 @@
OR R4, R6, R7
STDCCC R7, (R3)
BNE again
+ LWSYNC
MOVD R6, ret+16(FP)
RET
@@ -430,6 +439,7 @@
AND R4, R6, R7
STDCCC R7, (R3)
BNE again
+ LWSYNC
MOVD R6, ret+16(FP)
RET
diff --git a/test/fixedbugs/issue79186.go b/test/fixedbugs/issue79186.go
new file mode 100644
index 0000000..5d7e103
--- /dev/null
+++ b/test/fixedbugs/issue79186.go
@@ -0,0 +1,69 @@
+// run
+
+//go:build ppc64le
+
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Issue 79186: on ppc64le (POWER8/9), atomic add operations lacked a
+// post-barrier (acquire ordering), allowing loads after an RWMutex.RLock
+// to be speculatively reordered before the lock acquisition, causing
+// concurrent map read and map write.
+
+package main
+
+import (
+ "runtime"
+ "sync"
+)
+
+type M struct {
+ mu sync.RWMutex
+ m map[int]int
+}
+
+func NewM() *M {
+ return &M{m: make(map[int]int)}
+}
+
+func (x *M) Get(k int) (int, bool) {
+ x.mu.RLock()
+ v, ok := x.m[k]
+ x.mu.RUnlock()
+ return v, ok
+}
+
+func (x *M) Set(k, v int) {
+ x.mu.Lock()
+ x.m[k] = v
+ x.mu.Unlock()
+}
+
+func main() {
+ runtime.GOMAXPROCS(2)
+
+ x := NewM()
+
+ const goroutines = 256
+ const iters = 200000
+
+ var wg sync.WaitGroup
+ wg.Add(goroutines)
+
+ for g := 0; g < goroutines; g++ {
+ go func(id int) {
+ defer wg.Done()
+ for i := 0; i < iters; i++ {
+ k := (id + i) & 15
+ if _, ok := x.Get(k); !ok {
+ x.Set(k, i)
+ } else if i&7 == 0 {
+ x.Set(k, i)
+ }
+ }
+ }(g)
+ }
+
+ wg.Wait()
+}
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Hold | +1 |
Atomic operations on ppc are tricky to get right. I need to review this very carefully.
Likewise, some Go atomic operations take advantage of reduced synchronization requirements. Unduly strengthening synchronization operations will have a substantial performance penalty.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Atomic operations on ppc are tricky to get right. I need to review this very carefully.
Likewise, some Go atomic operations take advantage of reduced synchronization requirements. Unduly strengthening synchronization operations will have a substantial performance penalty.
Just so you know this make all atomics match the CAS behavior.
Also there isn't any correctness risk from having too much syncing, at worst we're uselessly slow.
But I'm not gonna deny that this code is tricky.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
JorropoAtomic operations on ppc are tricky to get right. I need to review this very carefully.
Likewise, some Go atomic operations take advantage of reduced synchronization requirements. Unduly strengthening synchronization operations will have a substantial performance penalty.
Just so you know this make all atomics match the CAS behavior.
Also there isn't any correctness risk from having too much syncing, at worst we're uselessly slow.But I'm not gonna deny that this code is tricky.
Right; I want to verify this isn't covering up another issue. The implementation of the ppc atomics are, kindly put, inconsistent.
I was part of some internal discussions about how Go implements its atomic operations. They can take some not-so-well-documented shortcuts; we don't have to deal with mmio or uncached memory. This is why some sequences do not match what gcc/clang produce. In theory, they perform better.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +2 |
| Hold | +0 |
Looking at the atomic documentation, these are expected to be seqcst. Using lwsync in place of sync and isync is OK here since Go doesn't interact with uncached memory or mmio.
// Ensure a 32 bit result@k...@google.com, is the sign extension needed here? My understanding is we treat the upper 32 bits as undefined.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +1 |
plwsync2.To.Type = obj.TYPE_NONEIs this line necessary? (also below)
//go:build ppc64lePerhaps we should just run it everywhere (assuming the test is not too slow). The issue was specific to PPC64, but it could also serve as a regression test for other architectures to ensure that they have the right semantics for atomic operations.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
@jayanth.kr...@ibm.com Did you try this patch and did it fix the issue you were encountering?
| Code-Review | +1 |
@jayanth.kr...@ibm.com Did you try this patch and did it fix the issue you were encountering?
Yes, the reproducer did run successfully on P8le, P9le and P10le. P10le has faster execution time.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Is this line necessary? (also below)
No idea, let's see if CI fails.
Perhaps we should just run it everywhere (assuming the test is not too slow). The issue was specific to PPC64, but it could also serve as a regression test for other architectures to ensure that they have the right semantics for atomic operations.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
1 is the latest approved patch-set.
The change was submitted with unreviewed changes in the following files:
```
The name of the file: src/cmd/compile/internal/ppc64/ssa.go
Insertions: 5, Deletions: 6.
The diff is too large to show. Please review the diff.
```
```
The name of the file: test/fixedbugs/issue79186.go
Insertions: 0, Deletions: 2.
The diff is too large to show. Please review the diff.
```
cmd/compile,sync/atomic: make Add And & Or SQCST on PPC64
Fixes #79186
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |