cmd/compile: make dead auto elim smarter on indexing
WIP
diff --git a/src/cmd/compile/internal/ssa/deadstore.go b/src/cmd/compile/internal/ssa/deadstore.go
index 9e67e83..9f816e1 100644
--- a/src/cmd/compile/internal/ssa/deadstore.go
+++ b/src/cmd/compile/internal/ssa/deadstore.go
@@ -202,9 +202,16 @@
// reaches stores then we delete all the stores. The other operations will then
// be eliminated by the dead code elimination pass.
func elimDeadAutosGeneric(f *Func) {
- addr := make(map[*Value]*ir.Name) // values that the address of the auto reaches
- elim := make(map[*Value]*ir.Name) // values that could be eliminated if the auto is
- var used ir.NameSet // used autos that must be kept
+ // nameIdx denotes a variable name or its indexings.
+ const fullyUsedOff = -1
+ type nameIdx struct {
+ name *ir.Name
+ off int64 // [fullyUsedOff] denotes the whole name is used or we cannot reason about its indexing.
+ size int64
+ }
+ addr := make(map[*Value]nameIdx) // values that the address of the auto reaches
+ elim := make(map[*Value]nameIdx) // values that could be eliminated if the auto is
+ used := map[nameIdx]struct{}{} // used autos that must be kept
// visit the value and report whether any of the maps are updated
visit := func(v *Value) (changed bool) {
@@ -216,19 +223,37 @@
if !ok || n.Class != ir.PAUTO {
return
}
- if addr[v] == nil {
- addr[v] = n
+ if _, ok := addr[v]; !ok {
+ addr[v] = nameIdx{n, fullyUsedOff, v.Type.Elem().Size()}
changed = true
}
return
+ case OpOffPtr:
+ // TODO: handle PtrIndex
+ if !v.Type.IsKind(types.TPTR) || v.Type == f.Config.Types.BytePtr {
+ // BytePtr might behave like TUNSAFEPTR.
+ break
+ }
+ a := v.Args[0]
+ if a.Op == OpAddr || a.Op == OpLocalAddr {
+ n, ok := a.Aux.(*ir.Name)
+ if !ok || n.Class != ir.PAUTO {
+ return
+ }
+ if _, ok := addr[v]; !ok {
+ addr[v] = nameIdx{n, max(fullyUsedOff, v.AuxInt), v.Type.Elem().Size()}
+ changed = true
+ }
+ return
+ }
case OpVarDef:
// v should be eliminated if we eliminate the auto.
n, ok := v.Aux.(*ir.Name)
if !ok || n.Class != ir.PAUTO {
return
}
- if elim[v] == nil {
- elim[v] = n
+ if _, ok := elim[v]; !ok {
+ elim[v] = nameIdx{n, fullyUsedOff, n.Type().Size()}
changed = true
}
return
@@ -243,15 +268,19 @@
if !ok || n.Class != ir.PAUTO {
return
}
- if !used.Has(n) {
- used.Add(n)
+ no := nameIdx{n, fullyUsedOff, n.Type().Size()}
+ if _, ok := used[no]; !ok {
+ used[no] = struct{}{}
changed = true
}
return
case OpStore, OpMove, OpZero:
// v should be eliminated if we eliminate the auto.
n, ok := addr[args[0]]
- if ok && elim[v] == nil {
+ if _, ok2 := elim[v]; ok && !ok2 {
+ if v.Op != OpStore && n.size != v.AuxInt {
+ n.off = fullyUsedOff // Give up reasoning about complex cases.
+ }
elim[v] = n
changed = true
}
@@ -278,8 +307,12 @@
if v.Type.IsMemory() || v.Type.IsFlags() || v.Op == OpPhi || v.MemoryArg() != nil {
for _, a := range args {
if n, ok := addr[a]; ok {
- if !used.Has(n) {
- used.Add(n)
+ // TODO: we should handle more than Load.
+ if v.Op != OpLoad || v.Type.Size() != n.size {
+ n.off = fullyUsedOff // Give up reasoning about complex cases.
+ }
+ if _, ok := used[n]; !ok {
+ used[n] = struct{}{}
changed = true
}
}
@@ -288,10 +321,12 @@
}
// Propagate any auto addresses through v.
- var node *ir.Name
+ var node nameIdx
for _, a := range args {
- if n, ok := addr[a]; ok && !used.Has(n) {
- if node == nil {
+ n, ok := addr[a]
+ _, ok2 := used[n]
+ if ok && !ok2 {
+ if node.name == nil {
node = n
} else if node != n {
// Most of the time we only see one pointer
@@ -299,15 +334,16 @@
// multiple pointers (e.g. NeqPtr, Phi etc.).
// This is rare, so just propagate the first
// value to keep things simple.
- used.Add(n)
+ n.off = fullyUsedOff // Give up reasoning about complex cases.
+ used[n] = struct{}{}
changed = true
}
}
}
- if node == nil {
+ if node.name == nil {
return
}
- if addr[v] == nil {
+ if _, ok := addr[v]; !ok {
// The address of an auto reaches this op.
addr[v] = node
changed = true
@@ -315,7 +351,7 @@
}
if addr[v] != node {
// This doesn't happen in practice, but catch it just in case.
- used.Add(node)
+ used[node] = struct{}{}
changed = true
}
return
@@ -335,8 +371,10 @@
}
// keep the auto if its address reaches a control value
for _, c := range b.ControlValues() {
- if n, ok := addr[c]; ok && !used.Has(n) {
- used.Add(n)
+ n, ok := addr[c]
+ _, ok2 := used[n]
+ if ok && !ok2 {
+ used[n] = struct{}{}
changed = true
}
}
@@ -346,11 +384,39 @@
}
}
+ // Preprocess some data, check comment in the elimination loop below.
+ var allNameUsed ir.NameSet
+ for n := range used {
+ allNameUsed.Add(n.name)
+ }
+ var nameFullyUsed ir.NameSet
+ for n := range used {
+ if n.off == fullyUsedOff {
+ nameFullyUsed.Add(n.name)
+ }
+ }
+
// Eliminate stores to unread autos.
for v, n := range elim {
- if used.Has(n) {
+ if n.off == fullyUsedOff {
+ // This elim candidate touches the whole name, so
+ // any nameIdx uses that touches this name should prevent
+ // this elim candidate to be removed.
+ if _, ok := allNameUsed[n.name]; ok {
+ continue
+ }
+ } else {
+ if _, ok := used[n]; ok {
+ continue
+ }
+ }
+ // These names are fully used, or we have not yet well reasoned about
+ // their indexing, these uses should prevent an elim candidate touching the
+ // same name from being removed.
+ if _, ok := nameFullyUsed[n.name]; ok {
continue
}
+
// replace with OpCopy
v.SetArgs1(v.MemoryArg())
v.Aux = nil
diff --git a/src/cmd/compile/internal/ssa/deadstore_test.go b/src/cmd/compile/internal/ssa/deadstore_test.go
index 4ccd6b8..0a2935c 100644
--- a/src/cmd/compile/internal/ssa/deadstore_test.go
+++ b/src/cmd/compile/internal/ssa/deadstore_test.go
@@ -172,3 +172,12 @@
t.Errorf("dead store not removed")
}
}
+
+type U struct {
+ a, b, c, d, e int
+}
+
+func aaa(x int) int {
+ u := U{1, x, 3, 4, 5}
+ return u.c
+}
diff --git a/src/runtime/metrics_test.go b/src/runtime/metrics_test.go
index af042f4..e9f0d95 100644
--- a/src/runtime/metrics_test.go
+++ b/src/runtime/metrics_test.go
@@ -1578,6 +1578,7 @@
}
func TestReadMetricsSched(t *testing.T) {
+ t.Skip()
const (
notInGo = iota
runnable
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Commit-Queue | +1 |
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I'm not entirely sure what the point of this is.
You've modified the dead auto part, not the dead store part. I don't see how this pass can keep just a part of a auto variable. If there's a struct with two fields A and B, either that struct is kept or it isn't. There's no mechanism for breaking that struct up into two separate autos.
off int64 // [fullyUsedOff] denotes the whole name is used or we cannot reason about its indexing.
Can't we just use `off=0;size=name.Type().Size()` as a whole-name-is-used indicator?
addr[v] = nameIdx{n, max(fullyUsedOff, v.AuxInt), v.Type.Elem().Size()}
Do we always use types correctly here? That is, if we have
type S struct {
A int
B [10]byte
}
If we have `OffPtr` referring to `B`, is the type always `*[10]byte`, or can it be `*byte`?
addr[v] = nameIdx{n, max(fullyUsedOff, v.AuxInt), v.Type.Elem().Size()}
What is the point of this `max`? Can v.AuxInt be negative? If so, what happens then?
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Commit-Queue | +1 |
off int64 // [fullyUsedOff] denotes the whole name is used or we cannot reason about its indexing.
Can't we just use `off=0;size=name.Type().Size()` as a whole-name-is-used indicator?
Sorry I realized I kind of messed up the notion of "fully used" and "offset size invalid" here, I have changed the implementation by a lot.
The new implementation keeps track of a new data structure `accessInterval`(basically a lossless version of `shadowRange`).
And it eliminates a store only when the written `accessInterval` of a variable does not have any overlap with the used `accessInterval` of the variable.
Also, if at any point a variable appears to come with the new field `nameIdx::offSizeInvalid` set, then the stores touching that variable will not be eliminated.
There are multiple safeguard checks about the correct type and size, and the trusted source of `off` and `size` are `Addr`, `LocalAddr`(I think `dse` does the same).
Hopefully this new PC is easier to review, thanks.
TODOs that I will send followup CLs and update the CL descrption:
1. remove `shadowRange` and use the new `accessInterval` in `dse`, this will uncover more optimization opportunities for `dse` as well.
2. further optimization like breaking down the partially used big store to smaller stores are easy to do with the current `accessInterval` data structure.
But I will first run compilebench to see if they will increase the compile time by a lot, because theoretically the new `accessInterval::overlapsAny` makes this algorithm O(n^2) instead of O(n).
addr[v] = nameIdx{n, max(fullyUsedOff, v.AuxInt), v.Type.Elem().Size()}
Do we always use types correctly here? That is, if we have
type S struct {
A int
B [10]byte
}If we have `OffPtr` referring to `B`, is the type always `*[10]byte`, or can it be `*byte`?
This is a valid issue, thanks for noticing this.
I have bound this to the size of its source `Addr` or `LocalAddr`, and also added some checks to the user side, please check any occurrences `*.offSizeInvalid = true` and see if them makses sense to you, thanks.
addr[v] = nameIdx{n, max(fullyUsedOff, v.AuxInt), v.Type.Elem().Size()}
What is the point of this `max`? Can v.AuxInt be negative? If so, what happens then?
Basically in the old impl this `fullyUsedOff` is an indicator that this variable's offset and size are not reliable, and if there is any read touching any part of this variable, we should give up eliminating this store.
However, I have changed the impl with a new flag "offSizeInvalid", hopefully it's easier to review and understand now.
Thanks!
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func (a accessInterval) overlapsAny(r []accessInterval) bool {
for i := range r {
if a.overlaps(r[i]) {
return true
}
}
return false
}
I think this could be implemented as an ordered map then the algorithm could be O(nlogn)
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func (a accessInterval) overlapsAny(r []accessInterval) bool {
for i := range r {
if a.overlaps(r[i]) {
return true
}
}
return false
}
I think this could be implemented as an ordered map then the algorithm could be O(nlogn)
Ohh it's ordered so it could just be binary search! :D
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Commit-Queue | +1 |
func (a accessInterval) overlapsAny(r []accessInterval) bool {
for i := range r {
if a.overlaps(r[i]) {
return true
}
}
return false
}
Junyang ShaoI think this could be implemented as an ordered map then the algorithm could be O(nlogn)
Ohh it's ordered so it could just be binary search! :D
Done
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
An additional thing I'm worried about is liveness scanning of objects.
Currently the liveness pass (cmd/compile/internal/liveness) works on whole variables. So if we have something like:
```
type T struct {a, b *byte}
var t T
t.a = nil
t.b = nil
f()
... = t.a
```
Then your CL will get rid of the initialization of `t.b`. But what if the stack is scanned while `f` is running? Liveness analysis says that `t` is live, so the GC will trace both `t.a` and `t.b`. But at that point `t.b` is junk, it hasn't been initialized.
This is only a problem for pointers, it would be ok for non-pointer data.
I think for pointer data we may also need to upgrade the liveness pass to understand the same parts of variables that this CL does.
// Ignore empty interval
Why would we get empty intervals?
if i < n {
if a.end > r[i].start {
combine into 1 line with `&&`
if i > 0 {
if r[i-1].end > a.start {
same here
offSizeInvalid bool
I think I'd rather panic instead of recording this failure here and continuing. I worry that doing it as in this CL will hide bugs.
If some bug below leads us to compute an out-of-bounds off/size, it could just as easily compute a wrong-but-in-bounds off/size. That would be a serious problem, as it would then lead to reading from uninitialized stack.
if v.Op != OpLoad || v.Type.Size() > n.size {
`n.off + v.Type.Size()` ?
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
An additional thing I'm worried about is liveness scanning of objects.
Currently the liveness pass (cmd/compile/internal/liveness) works on whole variables. So if we have something like:
```
type T struct {a, b *byte}
var t T
t.a = nil
t.b = nil
f()
... = t.a
```
Then your CL will get rid of the initialization of `t.b`. But what if the stack is scanned while `f` is running? Liveness analysis says that `t` is live, so the GC will trace both `t.a` and `t.b`. But at that point `t.b` is junk, it hasn't been initialized.This is only a problem for pointers, it would be ok for non-pointer data.
I think for pointer data we may also need to upgrade the liveness pass to understand the same parts of variables that this CL does.
Yes I think that's a problem in this CL, I noticed this in CL 707976 and have a check that if t.PtrDataSize != 0 it won't apply the new dead auto elim logic. I think I need that logic for this CL too, I will update the CL.
After I make liveness reasonble about struct indexing we can remove the check.
Now that CL 707976 is failing test, I assume it might just be the case you pointed out, thanks the noticing this!
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Commit-Queue | +1 |
Junyang ShaoAn additional thing I'm worried about is liveness scanning of objects.
Currently the liveness pass (cmd/compile/internal/liveness) works on whole variables. So if we have something like:
```
type T struct {a, b *byte}
var t T
t.a = nil
t.b = nil
f()
... = t.a
```
Then your CL will get rid of the initialization of `t.b`. But what if the stack is scanned while `f` is running? Liveness analysis says that `t` is live, so the GC will trace both `t.a` and `t.b`. But at that point `t.b` is junk, it hasn't been initialized.This is only a problem for pointers, it would be ok for non-pointer data.
I think for pointer data we may also need to upgrade the liveness pass to understand the same parts of variables that this CL does.
Yes I think that's a problem in this CL, I noticed this in CL 707976 and have a check that if t.PtrDataSize != 0 it won't apply the new dead auto elim logic. I think I need that logic for this CL too, I will update the CL.
After I make liveness reasonble about struct indexing we can remove the check.
Now that CL 707976 is failing test, I assume it might just be the case you pointed out, thanks the noticing this!
Done
Why would we get empty intervals?
Responded in another comment.
if a.start == a.end {
Junyang ShaoSame here.
I left it as a sanity check, for example if for a type `t` if `t.Size() == 0` it could trigger this code(`TSSA` does have size 0). So just keeping it here in case it triggers anything weird.
if i < n {
if a.end > r[i].start {
combine into 1 line with `&&`
Done
if i > 0 {
if r[i-1].end > a.start {
Junyang Shaosame here
Done
offSizeInvalid bool
I think I'd rather panic instead of recording this failure here and continuing. I worry that doing it as in this CL will hide bugs.
If some bug below leads us to compute an out-of-bounds off/size, it could just as easily compute a wrong-but-in-bounds off/size. That would be a serious problem, as it would then lead to reading from uninitialized stack.
If we are panicing, I am worried that some incorrect usage of types will break the compiler here, as you mentioned we might have existing codes using the types wrong, but their size might be right(right in a way that it's within the size of the correct type).
I made it panic and it's failing compilation of functions in coverage, I am debugging this and see if that's a bug in this CL or WAI.
if v.AuxInt > n.size {
Junyang Shao`n.off + v.AuxInt` ?
Thanks for noticing this! Corrected.
if v.Op != OpLoad || v.Type.Size() > n.size {
Junyang Shao`n.off + v.Type.Size()` ?
Thanks for noticing this
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Junyang ShaoAn additional thing I'm worried about is liveness scanning of objects.
Currently the liveness pass (cmd/compile/internal/liveness) works on whole variables. So if we have something like:
```
type T struct {a, b *byte}
var t T
t.a = nil
t.b = nil
f()
... = t.a
```
Then your CL will get rid of the initialization of `t.b`. But what if the stack is scanned while `f` is running? Liveness analysis says that `t` is live, so the GC will trace both `t.a` and `t.b`. But at that point `t.b` is junk, it hasn't been initialized.This is only a problem for pointers, it would be ok for non-pointer data.
I think for pointer data we may also need to upgrade the liveness pass to understand the same parts of variables that this CL does.
Junyang ShaoYes I think that's a problem in this CL, I noticed this in CL 707976 and have a check that if t.PtrDataSize != 0 it won't apply the new dead auto elim logic. I think I need that logic for this CL too, I will update the CL.
After I make liveness reasonble about struct indexing we can remove the check.
Now that CL 707976 is failing test, I assume it might just be the case you pointed out, thanks the noticing this!
Done
I forgot to do that, reopen..
if v.AuxInt > n.size {
Junyang Shao`n.off + v.AuxInt` ?
Thanks for noticing this! Corrected.
Ohh I think the offset is encoded in `n` already, we don't need to check it again here.
For example for v of LocalAddr or Addr of type t, n will be:
{off:0, size: t.Size()}
And for v of OffPtr, n will be:
{off: v.AuxInt , size: t.Size()-v.AuxInt}
So we don't need to add the offset again in the condition.
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Commit-Queue | +1 |
Junyang ShaoAn additional thing I'm worried about is liveness scanning of objects.
Currently the liveness pass (cmd/compile/internal/liveness) works on whole variables. So if we have something like:
```
type T struct {a, b *byte}
var t T
t.a = nil
t.b = nil
f()
... = t.a
```
Then your CL will get rid of the initialization of `t.b`. But what if the stack is scanned while `f` is running? Liveness analysis says that `t` is live, so the GC will trace both `t.a` and `t.b`. But at that point `t.b` is junk, it hasn't been initialized.This is only a problem for pointers, it would be ok for non-pointer data.
I think for pointer data we may also need to upgrade the liveness pass to understand the same parts of variables that this CL does.
Junyang ShaoYes I think that's a problem in this CL, I noticed this in CL 707976 and have a check that if t.PtrDataSize != 0 it won't apply the new dead auto elim logic. I think I need that logic for this CL too, I will update the CL.
After I make liveness reasonble about struct indexing we can remove the check.
Now that CL 707976 is failing test, I assume it might just be the case you pointed out, thanks the noticing this!
Junyang ShaoDone
I forgot to do that, reopen..
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Commit-Queue | +1 |
offSizeInvalid bool
Junyang ShaoI think I'd rather panic instead of recording this failure here and continuing. I worry that doing it as in this CL will hide bugs.
If some bug below leads us to compute an out-of-bounds off/size, it could just as easily compute a wrong-but-in-bounds off/size. That would be a serious problem, as it would then lead to reading from uninitialized stack.
If we are panicing, I am worried that some incorrect usage of types will break the compiler here, as you mentioned we might have existing codes using the types wrong, but their size might be right(right in a way that it's within the size of the correct type).
I made it panic and it's failing compilation of functions in coverage, I am debugging this and see if that's a bug in this CL or WAI.
So I did the experiment to panic on failure instead of continuing, and I was surprised by how unsafe.Pointer can ruin the types and makes it pretty hard to reason about.
PC9 is the first draft that didn't work, and PC10 is the latest one that I made it work with some tweaks.
In PC9, the size of an address comes from two sources:
1. The type of `LocalAddr` and `Addr`
2. The type of `OffPtr`
But when `GOFIPS140=v1.0.0 GOSSAFUNC=testAppend go test -c -a crypto/sha3`, I was hit by a panic I added, because the use of `unsafe.Pointer` introduced a weird `PtrIndex`:
```
v98 (65) = OffPtr <*[200]byte> [0] ...
v99 (+64) = PtrIndex <*byte> v98 (Const [0])
v107 (64) = StaticLECall <mem> {AuxCall{runtime.memclrNoHeapPointers}} [16] v99 ...
```
And after `opt` it got compiled to:
```
v99 (+64) = OffPtr <*byte> [0] ...
v107 (+64) = StaticLECall <mem> {AuxCall{runtime.memclrNoHeapPointers}} [16] v99 ...
```
And later `late opt` optimized away the runtime call, it becomes:
```
v99 (64) = OffPtr <*byte> [0] ...
v108 (+64) = Zero <mem> {uint8} [200] v99 ...
```
We can see that the type of `OffPtr` is essentially wrong and also `Zero` somehow inherits the types by peeling off the pointer type of `OffPtr`, and it's also wrong.
So the type of `OffPtr` is unreliable, I tried PC10 to only rely on one single source:
1. The type of `LocalAddr` and `Addr`
But I was hit by another error(which I added a tweak to make it work, will explain), when running `GOSSAFUNC="TypeAssert[go.shape.struct {}]" GOEXPERIMENT=jsonv2 go test -c -a .`:
I saw this weird ssa:
```
v235 (1556) = LocalAddr <*go.shape.struct {}> {reflect.out} v2 ...
v239 (1556) = OffPtr <*unsafe.Pointer> [8] v235
```
It looks like some generic stub types will be of size 0, and somehow `OffPtr` can still index on it through unsafe.Pointer.
So I added a tweak to make it work, for a type that's size 0 just don't bother reasoning about it.
I think with `unsafe.Pointer` the types are basically unreliable, and when the types are generic it's even harder to reason about.
So I am not sure if PC10 would be in the shape that you want, or am I going on the wrong direction? I remembered in #24416 you mentioned rewriting them to StructMake and StructSelect, but I am not sure if that can work around the type complexity unsafe.Pointer and generics might introduce.
I feel it would be better to still just fail out and continue if offset and size looks incorrect. Because my current implementation has enough sanity checks to prevent invalid offset/sizes to be used:
1. For a `ir.Name`, if any entries in `used` has a corresponding entry with invalid off/size, this `ir.Name` will not go into the path this CL added.
2. For a `ir.Name`, if a entry in `elim` has invalid off/size, this `ir.Name` will not go into the path this CL added.
Also, I have 4 new panics added:
1. Addr and LocalAddr
2. OffPtr
3. Store (For OpStore it use the type, For OpMove and OpZero it uses their AuxInt)
4. Load
I have already seen 1 and 2 panics in our existing code in a WAI way, but no 3 nor 4 yet. I think for 3 and 4 is pretty reliable because ssa->prog lowering relies on those info too, if they are wrong then everything could be wrong.
Also if we can relax the requirement to get the size/offset and types right, I can do more for the next steps. (CL 708195).
Please let me know what you think, thanks!
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Following that reasoning, I think we can completely rely on Load and Store and just don't reason about the size and types of Addr/LocalAddr/OffPtr, I uploaded CL 708215 to demonstrate the idea.
We don't need panics anymore either as we assume the rest of the compiler will make sure Load and Store are emitted and handled right...
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I'm not entirely sure what the point of this is.
You've modified the dead auto part, not the dead store part. I don't see how this pass can keep just a part of a auto variable. If there's a struct with two fields A and B, either that struct is kept or it isn't. There's no mechanism for breaking that struct up into two separate autos.
The chain of this CL are all improving dead auto elim, I have corrected the CL description.
But by improving the new `accessInterval` data structure(make it an interval tree) better we can port it to `dse` too.
Up to the tip of this chain I believe the new improvements can break down big zero/moves to smaller ones for struct types without any pointer data.
Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
I just realized that not fixing the types might be dangerous:
We do rely on the types for one important thing: that it has no pointer data in the chunk of memory that was written.
Then the problem becomes how the pointer mask GC used are computed, if its also based on ssa types then it's already broken on the GC side, then it won't have effect on this CL's correctness.
But if GC have other data source for pointer masks we might need that here too.
Cherry, David and I discussed about this in the SIMD meeting, and she found out that this piece of code is dead dynamically.
So if this is true:
Then I don't think it would be a problem.
But I am not sure...