runtime/maps: only grow small full maps when inserting new keys
For small full maps, the existing implementation triggers the growth logic
unconditionally, regardless of whether the key already exists.
This change fixes this issue by only triggering growth when inserting a new
key, not when updating an existing key.
Fixes #78853
diff --git a/src/internal/runtime/maps/map.go b/src/internal/runtime/maps/map.go
index 515558a9..c61fc99 100644
--- a/src/internal/runtime/maps/map.go
+++ b/src/internal/runtime/maps/map.go
@@ -499,9 +499,9 @@
}
if m.dirLen == 0 {
- if m.used < abi.MapGroupSlots {
- elem := m.putSlotSmall(typ, hash, key)
-
+ elem := m.putSlotSmall(typ, hash, key)
+ if elem != nil {
+ // Found an existing slot.
if m.writing == 0 {
fatal("concurrent map writes")
}
@@ -511,9 +511,6 @@
}
// Can't fit another entry, grow to full size map.
- //
- // TODO(prattmic): If this is an update to an existing key then
- // we actually don't need to grow.
m.growToTable(typ)
}
@@ -568,7 +565,7 @@
// more efficient than matchEmpty.
match = g.ctrls().matchEmptyOrDeleted()
if match == 0 {
- fatal("small map with no empty slot (concurrent map writes?)")
+ // No empty slot found. Need to grow the map.
return nil
}
diff --git a/src/internal/runtime/maps/map_test.go b/src/internal/runtime/maps/map_test.go
index c8ef25a..2e52e1c 100644
--- a/src/internal/runtime/maps/map_test.go
+++ b/src/internal/runtime/maps/map_test.go
@@ -57,6 +57,56 @@
}
}
+func TestSmallMapGrow(t *testing.T) {
+ m, typ := maps.NewTestMap[uint32, uint64](8)
+
+ key := uint32(0)
+ elem := uint64(256 + 0)
+
+ for i := 0; i < 8; i++ {
+ key += 1
+ elem += 1
+ m.Put(typ, unsafe.Pointer(&key), unsafe.Pointer(&elem))
+
+ if maps.DebugLog {
+ fmt.Printf("After put %d: %v\n", key, m)
+ }
+ }
+
+ if m.TableCount() != 0 {
+ t.Errorf("TableCount() got %d want 0", m.TableCount())
+ }
+
+ key = uint32(0)
+ elem = uint64(256 + 10)
+
+ for i := 0; i < 8; i++ {
+ key += 1
+ elem += 1
+ m.Put(typ, unsafe.Pointer(&key), unsafe.Pointer(&elem))
+
+ if maps.DebugLog {
+ fmt.Printf("After put %d: %v\n", key, m)
+ }
+ }
+
+ if m.TableCount() != 0 {
+ t.Errorf("TableCount() got %d want 0", m.TableCount())
+ }
+
+ key += 1
+ elem += 1
+ m.Put(typ, unsafe.Pointer(&key), unsafe.Pointer(&elem))
+
+ if maps.DebugLog {
+ fmt.Printf("After put %d: %v\n", key, m)
+ }
+
+ if m.TableCount() != 1 {
+ t.Errorf("TableCount() got %d want 1", m.TableCount())
+ }
+}
+
// Grow enough to cause a table split.
func TestMapSplit(t *testing.T) {
m, typ := maps.NewTestMap[uint32, uint64](0)
diff --git a/src/internal/runtime/maps/runtime.go b/src/internal/runtime/maps/runtime.go
index bd3ba6a..56d7e3e 100644
--- a/src/internal/runtime/maps/runtime.go
+++ b/src/internal/runtime/maps/runtime.go
@@ -180,9 +180,9 @@
}
if m.dirLen == 0 {
- if m.used < abi.MapGroupSlots {
- elem := m.putSlotSmall(typ, hash, key)
-
+ elem := m.putSlotSmall(typ, hash, key)
+ if elem != nil {
+ // Found an existing slot.
if m.writing == 0 {
fatal("concurrent map writes")
}
diff --git a/src/internal/runtime/maps/runtime_fast32.go b/src/internal/runtime/maps/runtime_fast32.go
index dc3bd3f..0c63d23 100644
--- a/src/internal/runtime/maps/runtime_fast32.go
+++ b/src/internal/runtime/maps/runtime_fast32.go
@@ -136,7 +136,8 @@
// more efficient than matchEmpty.
match = g.ctrls().matchEmptyOrDeleted()
if match == 0 {
- fatal("small map with no empty slot (concurrent map writes?)")
+ // No empty slot found. Need to grow the map.
+ return nil
}
i := match.first()
@@ -180,9 +181,9 @@
}
if m.dirLen == 0 {
- if m.used < abi.MapGroupSlots {
- elem := m.putSlotSmallFast32(typ, hash, key)
-
+ elem := m.putSlotSmallFast32(typ, hash, key)
+ if elem != nil {
+ // Found an existing slot.
if m.writing == 0 {
fatal("concurrent map writes")
}
diff --git a/src/internal/runtime/maps/runtime_fast64.go b/src/internal/runtime/maps/runtime_fast64.go
index dac89ee..086b3bb 100644
--- a/src/internal/runtime/maps/runtime_fast64.go
+++ b/src/internal/runtime/maps/runtime_fast64.go
@@ -128,7 +128,8 @@
// more efficient than matchEmpty.
match = g.ctrls().matchEmptyOrDeleted()
if match == 0 {
- fatal("small map with no empty slot (concurrent map writes?)")
+ // No empty slot found. Need to grow the map.
+ return nil
}
i := match.first()
@@ -172,9 +173,9 @@
}
if m.dirLen == 0 {
- if m.used < abi.MapGroupSlots {
- elem := m.putSlotSmallFast64(typ, hash, key)
-
+ elem := m.putSlotSmallFast64(typ, hash, key)
+ if elem != nil {
+ // Found an existing slot.
if m.writing == 0 {
fatal("concurrent map writes")
}
diff --git a/src/internal/runtime/maps/runtime_faststr.go b/src/internal/runtime/maps/runtime_faststr.go
index a96f9fe..431786f 100644
--- a/src/internal/runtime/maps/runtime_faststr.go
+++ b/src/internal/runtime/maps/runtime_faststr.go
@@ -212,7 +212,8 @@
// more efficient than matchEmpty.
match = g.ctrls().matchEmptyOrDeleted()
if match == 0 {
- fatal("small map with no empty slot (concurrent map writes?)")
+ // No empty slot found. Need to grow the map.
+ return nil
}
i := match.first()
@@ -256,9 +257,9 @@
}
if m.dirLen == 0 {
- if m.used < abi.MapGroupSlots {
- elem := m.putSlotSmallFastStr(typ, hash, key)
-
+ elem := m.putSlotSmallFastStr(typ, hash, key)
+ if elem != nil {
+ // Found an existing slot.
if m.writing == 0 {
fatal("concurrent map writes")
}
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
Congratulations on opening your first change. Thank you for your contribution!
Next steps:
A maintainer will review your change and provide feedback. See
https://go.dev/doc/contribute#review for more info and tips to get your
patch through code review.
Most changes in the Go project go through a few rounds of revision. This can be
surprising to people new to the project. The careful, iterative review process
is our way of helping mentor contributors and ensuring that their contributions
have a lasting impact.
During May-July and Nov-Jan the Go project is in a code freeze, during which
little code gets reviewed or merged. If a reviewer responds with a comment like
R=go1.11 or adds a tag like "wait-release", it means that this CL will be
reviewed as part of the next development cycle. See https://go.dev/s/release
for more details.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
elem := m.putSlotSmall(typ, hash, key)I worry that this will add cost to the common case.
The common case is we are inserting distinct elements. On the 9th insert, we would normally just grow and insert into the larger table. Now we do some work looking for a spot in the small table, don't find one, and then do all the above anyway.
So it doesn't sound like an unambiguous win. When we're inserting duplicates, sure, but when we're not it is just extra overhead.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
elem := m.putSlotSmall(typ, hash, key)I worry that this will add cost to the common case.
The common case is we are inserting distinct elements. On the 9th insert, we would normally just grow and insert into the larger table. Now we do some work looking for a spot in the small table, don't find one, and then do all the above anyway.
So it doesn't sound like an unambiguous win. When we're inserting duplicates, sure, but when we're not it is just extra overhead.
You're right that for the distinct-insert case the small-map scan is truly extra work — we still need another search after growing to place the key in the table. I should have been clearer about that.
However, the amount of this extra work is tiny: scanning up to 8 slots is just a few pointer dereferences and key equality checks. The benchmark confirms this — about a 3.7% increase, or roughly 18 ns, on the 9th distinct insert.
In return, the duplicate-update path (counters, caches, etc.) becomes nearly 7× faster and fully allocation-free (496.8 ns → 75.7 ns, 328 B → 0 B). More importantly, this ~18 ns overhead is a one-time cost in the small map's lifetime; once the map grows to a table, all subsequent inserts or updates incur no extra cost at all.
I believe this trade-off is well worth it. That said, I'm very open to running additional benchmarks with different key distributions, or exploring fast-path refinements, if you'd like more data. Happy to follow your guidance.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
elem := m.putSlotSmall(typ, hash, key)和HesenI worry that this will add cost to the common case.
The common case is we are inserting distinct elements. On the 9th insert, we would normally just grow and insert into the larger table. Now we do some work looking for a spot in the small table, don't find one, and then do all the above anyway.
So it doesn't sound like an unambiguous win. When we're inserting duplicates, sure, but when we're not it is just extra overhead.
You're right that for the distinct-insert case the small-map scan is truly extra work — we still need another search after growing to place the key in the table. I should have been clearer about that.
However, the amount of this extra work is tiny: scanning up to 8 slots is just a few pointer dereferences and key equality checks. The benchmark confirms this — about a 3.7% increase, or roughly 18 ns, on the 9th distinct insert.
In return, the duplicate-update path (counters, caches, etc.) becomes nearly 7× faster and fully allocation-free (496.8 ns → 75.7 ns, 328 B → 0 B). More importantly, this ~18 ns overhead is a one-time cost in the small map's lifetime; once the map grows to a table, all subsequent inserts or updates incur no extra cost at all.
I believe this trade-off is well worth it. That said, I'm very open to running additional benchmarks with different key distributions, or exploring fast-path refinements, if you'd like more data. Happy to follow your guidance.
This would only help maps which grow to exactly 8 elements, and then insert a duplicate item. Maps of any other size aren't helped. But when it helps, it helps *a lot*.
But for all maps that grow to 9 elements or more, this is only overhead. Not much overhead, but some.
So I guess what we really need to answer is: how often are each of these cases? What is their ratio compared to the improvement/overhead ratio?
I hacked your CL to panic every time your CL triggers (duplicate insert when the map has 8 elements). It only triggers when running all.bash on internal/runtime/maps test cases. So it seems to happen exceedingly rarely, at least in the stdlib. So I think I'm leaning against doing this. We would need some evidence pointing in the other direction, I think, to continue.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
elem := m.putSlotSmall(typ, hash, key)和HesenI worry that this will add cost to the common case.
The common case is we are inserting distinct elements. On the 9th insert, we would normally just grow and insert into the larger table. Now we do some work looking for a spot in the small table, don't find one, and then do all the above anyway.
So it doesn't sound like an unambiguous win. When we're inserting duplicates, sure, but when we're not it is just extra overhead.
Keith RandallYou're right that for the distinct-insert case the small-map scan is truly extra work — we still need another search after growing to place the key in the table. I should have been clearer about that.
However, the amount of this extra work is tiny: scanning up to 8 slots is just a few pointer dereferences and key equality checks. The benchmark confirms this — about a 3.7% increase, or roughly 18 ns, on the 9th distinct insert.
In return, the duplicate-update path (counters, caches, etc.) becomes nearly 7× faster and fully allocation-free (496.8 ns → 75.7 ns, 328 B → 0 B). More importantly, this ~18 ns overhead is a one-time cost in the small map's lifetime; once the map grows to a table, all subsequent inserts or updates incur no extra cost at all.
I believe this trade-off is well worth it. That said, I'm very open to running additional benchmarks with different key distributions, or exploring fast-path refinements, if you'd like more data. Happy to follow your guidance.
This would only help maps which grow to exactly 8 elements, and then insert a duplicate item. Maps of any other size aren't helped. But when it helps, it helps *a lot*.
But for all maps that grow to 9 elements or more, this is only overhead. Not much overhead, but some.
So I guess what we really need to answer is: how often are each of these cases? What is their ratio compared to the improvement/overhead ratio?
I hacked your CL to panic every time your CL triggers (duplicate insert when the map has 8 elements). It only triggers when running all.bash on internal/runtime/maps test cases. So it seems to happen exceedingly rarely, at least in the stdlib. So I think I'm leaning against doing this. We would need some evidence pointing in the other direction, I think, to continue.
Actually, hold on, that's something of a lie. Your CL (and hence my experiment) does not handle the int32/int64/string fast paths. I will retry after adding those cases.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
elem := m.putSlotSmall(typ, hash, key)和HesenI worry that this will add cost to the common case.
The common case is we are inserting distinct elements. On the 9th insert, we would normally just grow and insert into the larger table. Now we do some work looking for a spot in the small table, don't find one, and then do all the above anyway.
So it doesn't sound like an unambiguous win. When we're inserting duplicates, sure, but when we're not it is just extra overhead.
Keith RandallYou're right that for the distinct-insert case the small-map scan is truly extra work — we still need another search after growing to place the key in the table. I should have been clearer about that.
However, the amount of this extra work is tiny: scanning up to 8 slots is just a few pointer dereferences and key equality checks. The benchmark confirms this — about a 3.7% increase, or roughly 18 ns, on the 9th distinct insert.
In return, the duplicate-update path (counters, caches, etc.) becomes nearly 7× faster and fully allocation-free (496.8 ns → 75.7 ns, 328 B → 0 B). More importantly, this ~18 ns overhead is a one-time cost in the small map's lifetime; once the map grows to a table, all subsequent inserts or updates incur no extra cost at all.
I believe this trade-off is well worth it. That said, I'm very open to running additional benchmarks with different key distributions, or exploring fast-path refinements, if you'd like more data. Happy to follow your guidance.
Keith RandallThis would only help maps which grow to exactly 8 elements, and then insert a duplicate item. Maps of any other size aren't helped. But when it helps, it helps *a lot*.
But for all maps that grow to 9 elements or more, this is only overhead. Not much overhead, but some.
So I guess what we really need to answer is: how often are each of these cases? What is their ratio compared to the improvement/overhead ratio?
I hacked your CL to panic every time your CL triggers (duplicate insert when the map has 8 elements). It only triggers when running all.bash on internal/runtime/maps test cases. So it seems to happen exceedingly rarely, at least in the stdlib. So I think I'm leaning against doing this. We would need some evidence pointing in the other direction, I think, to continue.
Actually, hold on, that's something of a lie. Your CL (and hence my experiment) does not handle the int32/int64/string fast paths. I will retry after adding those cases.
Your CL does, my hack didn't.
if m.used < abi.MapGroupSlots {The ptr-shaped cases aren't handled.
if m.used < abi.MapGroupSlots {The ptr-shaped cases aren't handled.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
elem := m.putSlotSmall(typ, hash, key)和HesenI worry that this will add cost to the common case.
The common case is we are inserting distinct elements. On the 9th insert, we would normally just grow and insert into the larger table. Now we do some work looking for a spot in the small table, don't find one, and then do all the above anyway.
So it doesn't sound like an unambiguous win. When we're inserting duplicates, sure, but when we're not it is just extra overhead.
Keith RandallYou're right that for the distinct-insert case the small-map scan is truly extra work — we still need another search after growing to place the key in the table. I should have been clearer about that.
However, the amount of this extra work is tiny: scanning up to 8 slots is just a few pointer dereferences and key equality checks. The benchmark confirms this — about a 3.7% increase, or roughly 18 ns, on the 9th distinct insert.
In return, the duplicate-update path (counters, caches, etc.) becomes nearly 7× faster and fully allocation-free (496.8 ns → 75.7 ns, 328 B → 0 B). More importantly, this ~18 ns overhead is a one-time cost in the small map's lifetime; once the map grows to a table, all subsequent inserts or updates incur no extra cost at all.
I believe this trade-off is well worth it. That said, I'm very open to running additional benchmarks with different key distributions, or exploring fast-path refinements, if you'd like more data. Happy to follow your guidance.
Keith RandallThis would only help maps which grow to exactly 8 elements, and then insert a duplicate item. Maps of any other size aren't helped. But when it helps, it helps *a lot*.
But for all maps that grow to 9 elements or more, this is only overhead. Not much overhead, but some.
So I guess what we really need to answer is: how often are each of these cases? What is their ratio compared to the improvement/overhead ratio?
I hacked your CL to panic every time your CL triggers (duplicate insert when the map has 8 elements). It only triggers when running all.bash on internal/runtime/maps test cases. So it seems to happen exceedingly rarely, at least in the stdlib. So I think I'm leaning against doing this. We would need some evidence pointing in the other direction, I think, to continue.
Keith RandallActually, hold on, that's something of a lie. Your CL (and hence my experiment) does not handle the int32/int64/string fast paths. I will retry after adding those cases.
Your CL does, my hack didn't.
Ok, I have some better data.
I hacked the runtime to track the number of maps for which:
1. This CL triggers (1 group, all full, found existing key when inserting)
2. Subset of 1 where the map subsequently grows
3. Total times we grow from 1 group to >1 group.
Then I compare 1 minus 2 (we get the benefit, and never end up growing that map) vs. 3.
For the compiler, it seems to be around 5%. So this triggers for 1 out of 20 maps. Which kind of surprised me, I was expecting lower numbers. The linker is lower at 1%.
Done for today, I will investigate more tomorrow. And post a WIP CL so people can double-check my work.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
elem := m.putSlotSmall(typ, hash, key)I've submitted another version, in which I optimized the performance of inserting a new key into a full small map by using the `uncheckedPutSlotForAssign*` functions, and re-ran the benchmarks. The data shows that, compared to the previous version, inserting a new key into a full small map now also yields positive gains. I've put the specific data in the issue.
if m.used < abi.MapGroupSlots {The ptr-shaped cases aren't handled.
Done
if m.used < abi.MapGroupSlots {The ptr-shaped cases aren't handled.
Done
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Commit-Queue | +1 |
elem := m.putSlotSmall(typ, hash, key)Ok, I've come around to this being net positive. It happens often enough, at least to my hacky experiments. Let's do it.
m.used++All of these `m.used++` look redundant to the ones inside uncheckedPutSlotForAssign.
Does checkInvariants fail? It should...
func (t *table) uncheckedPutSlotForAssign(typ *abi.MapType, hash uintptr, key unsafe.Pointer) unsafe.Pointer {This will only ever be called with a 2-group table, right?
Maybe not worth taking advantage of at the moment. But maybe an assertion or even just a comment to that effect would be clarifying.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
m.used++All of these `m.used++` look redundant to the ones inside uncheckedPutSlotForAssign.
Does checkInvariants fail? It should...
Or, it should if `debugLog` is set to true.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Code-Review | +2 |
m.used++Keith RandallAll of these `m.used++` look redundant to the ones inside uncheckedPutSlotForAssign.
Does checkInvariants fail? It should...
Or, it should if `debugLog` is set to true.
Never mind. One is map.used, one is table.used.
Definitely confusing though. Probably could use some clarifying notes.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
func (t *table) uncheckedPutSlotForAssign(typ *abi.MapType, hash uintptr, key unsafe.Pointer) unsafe.Pointer {This will only ever be called with a 2-group table, right?
Maybe not worth taking advantage of at the moment. But maybe an assertion or even just a comment to that effect would be clarifying.
I think whether it can be used has nothing to do with the number of groups—it can only be called when we are sure that the key is not already in the table, which is already stated in the comments. However, as of now, it is indeed only used for two-group tables right after a rehash. I've also added a new comment.
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |
| Inspect html for hidden footers to help with email filtering. To unsubscribe visit settings. |