cmd/compile: reduce panicBound stack frame size
Fixes #76166
diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
index a0e1ab9..532aaf5 100644
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
@@ -49,7 +49,7 @@
func init() {
var regNamesRISCV64 []string
- var gpMask, fpMask, gpgMask, gpspMask, gpspsbMask, gpspsbgMask, first16Mask regMask
+ var gpMask, fpMask, gpgMask, gpspMask, gpspsbMask, gpspsbgMask, boundsMask regMask
regNamed := make(map[string]regMask)
// Build the list of register names, creating an appropriately indexed
@@ -93,8 +93,11 @@
gpspMask |= mask
gpspsbMask |= mask
gpspsbgMask |= mask
- if r >= 5 && r < 5+16 {
- first16Mask |= mask
+ // BoundsEncode register must be in 0-15
+ // meanwhile riscv64 using X10-X17, X8,X9, X18-X23 as arguments
+ // which the overlap of these two limitations are X8-X15
+ if r >= 8 && r <= 15 {
+ boundsMask |= mask
}
}
}
@@ -442,10 +445,10 @@
// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
// failure means the length must have also been 0).
// AuxInt contains a report code (see PanicBounds in genericOps.go).
- {name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first16Mask, first16Mask}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
- {name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16Mask}}, typ: "Mem", call: true}, // arg0=x, arg1=mem, returns memory.
- {name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16Mask}}, typ: "Mem", call: true}, // arg0=y, arg1=mem, returns memory.
- {name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true}, // arg0=mem, returns memory.
+ {name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{boundsMask, boundsMask}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+ {name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{boundsMask}}, typ: "Mem", call: true}, // arg0=x, arg1=mem, returns memory.
+ {name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{boundsMask}}, typ: "Mem", call: true}, // arg0=y, arg1=mem, returns memory.
+ {name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true}, // arg0=mem, returns memory.
// F extension.
{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true, typ: "Float32"}, // arg0 + arg1
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 264f4b3..f99ce0c 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -34917,8 +34917,8 @@
call: true,
reg: regInfo{
inputs: []inputInfo{
- {0, 1048560}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
- {1, 1048560}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
+ {0, 32640}, // X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 32640}, // X8 X9 X10 X11 X12 X13 X14 X15
},
},
},
@@ -34929,7 +34929,7 @@
call: true,
reg: regInfo{
inputs: []inputInfo{
- {0, 1048560}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
+ {0, 32640}, // X8 X9 X10 X11 X12 X13 X14 X15
},
},
},
@@ -34940,7 +34940,7 @@
call: true,
reg: regInfo{
inputs: []inputInfo{
- {0, 1048560}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
+ {0, 32640}, // X8 X9 X10 X11 X12 X13 X14 X15
},
},
},
diff --git a/src/runtime/asm_riscv64.s b/src/runtime/asm_riscv64.s
index 5bd1618..cd60505 100644
--- a/src/runtime/asm_riscv64.s
+++ b/src/runtime/asm_riscv64.s
@@ -982,27 +982,22 @@
MOV $64, X24
JMP gcWriteBarrier<>(SB)
-TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
+TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$104-0
NO_LOCAL_POINTERS
- // Save all 16 int registers that could have an index in them.
+ // The PanicBounds RR has 3 arguments so the offset is 24.
+ // Save all overlapped int registers that could have an index in them.
// They may be pointers, but if they are they are dead.
// Skip X0 aka ZERO, X1 aka LR, X2 aka SP, X3 aka GP, X4 aka TP.
- MOV X5, 24(X2)
- MOV X6, 32(X2)
- MOV X7, 40(X2)
- MOV X8, 48(X2)
- MOV X9, 56(X2)
- MOV X10, 64(X2)
- MOV X11, 72(X2)
- MOV X12, 80(X2)
- MOV X13, 88(X2)
- MOV X14, 96(X2)
- MOV X15, 104(X2)
- MOV X16, 112(X2)
- MOV X17, 120(X2)
- MOV X18, 128(X2)
- MOV X19, 136(X2)
- MOV X20, 144(X2)
+ MOV X8, 24(X2)
+ MOV X9, 32(X2)
+ MOV X10, 40(X2)
+ MOV X11, 48(X2)
+ MOV X12, 56(X2)
+ MOV X13, 64(X2)
+ MOV X14, 72(X2)
+ MOV X15, 80(X2)
+ MOV X16, 88(X2)
+ MOV X17, 96(X2)
MOV X1, X10 // PC immediately after call to panicBounds
ADD $24, X2, X11 // pointer to save area
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
// meanwhile riscv64 using X10-X17, X8,X9, X18-X23 as arguments
// which the overlap of these two limitations are X8-X15I don't understand this reasoning.
MOV X16, 88(X2)The regalloc code restricts to X8-X15, but you are saving X8-X17 here.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
maybe a better way to fix #76166 is to get rid of the bounds check in internal/runtime/sys/intrinsics.go. Maybe just implementing the intrinsic would work. Maybe just adding a "&0xff" to the table lookup would work.
Maybe teaching prove about that access pattern would work.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
// meanwhile riscv64 using X10-X17, X8,X9, X18-X23 as arguments
// which the overlap of these two limitations are X8-X15I don't understand this reasoning.
Done
The regalloc code restricts to X8-X15, but you are saving X8-X17 here.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
maybe a better way to fix #76166 is to get rid of the bounds check in internal/runtime/sys/intrinsics.go. Maybe just implementing the intrinsic would work. Maybe just adding a "&0xff" to the table lookup would work.
Maybe teaching prove about that access pattern would work.
Hi, Keith
Thanks for these tips. Here is my quick testes or reply.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
This seems like a less than ideal solution - we've shaved a few bytes off the panicBound stack frame, but we now deviate further from the implementation used on the majority of architectures.
I'm pretty sure that casting x to uint8() will eliminate the bounds check, which in turn should benefit all architectures that do not have a Len64 intrinsic... https://go-review.googlesource.com/c/go/+/718040 (although at least one of the riscv64 builders appears to have an issue with race again).
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Meng Zhuomaybe a better way to fix #76166 is to get rid of the bounds check in internal/runtime/sys/intrinsics.go. Maybe just implementing the intrinsic would work. Maybe just adding a "&0xff" to the table lookup would work.
Maybe teaching prove about that access pattern would work.
Hi, Keith
Thanks for these tips. Here is my quick testes or reply.
- The rva22u64 mandatory bits extension but the mininal requirement for Go is rva20u64.
- Add "&0xff" don't work.
- As for teaching prove I need some time to figure out
I was able to remove the bounds check using &0xff (a byte cast also works).
return n + int(len8tab[x&0xff])
I wonder why it did not work for you?
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Meng Zhuomaybe a better way to fix #76166 is to get rid of the bounds check in internal/runtime/sys/intrinsics.go. Maybe just implementing the intrinsic would work. Maybe just adding a "&0xff" to the table lookup would work.
Maybe teaching prove about that access pattern would work.
Keith RandallHi, Keith
Thanks for these tips. Here is my quick testes or reply.
- The rva22u64 mandatory bits extension but the mininal requirement for Go is rva20u64.
- Add "&0xff" don't work.
- As for teaching prove I need some time to figure out
I was able to remove the bounds check using &0xff (a byte cast also works).
return n + int(len8tab[x&0xff])I wonder why it did not work for you?
Ah ha, my code is `return n + int(len8tab[x]&0xff)`. I understand it now, bound check will be elimated if it's proved index won't be over the size of slice.
This seems like a less than ideal solution - we've shaved a few bytes off the panicBound stack frame, but we now deviate further from the implementation used on the majority of architectures.
I'm pretty sure that casting x to uint8() will eliminate the bounds check, which in turn should benefit all architectures that do not have a Len64 intrinsic... https://go-review.googlesource.com/c/go/+/718040 (although at least one of the riscv64 builders appears to have an issue with race again).
Yes, I forget Milkv Megrez using a patched kernel...Apparently the upstream of EBC7700 don't know about it.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |