Below is a quick benchmark program I wrote, and here are the results on my x86-64 desktop. There appears to be about a 30% slow down for short int-sized keys, but it decreases to about 5% slow down at 20-byte string keys, and actually a 10% speed up at 100-byte string keys.
Note that if cc could optimize the rotations into a ROTQ instruction, I expect SipHash-2-4's performance would improve some, but I don't know how much. Currently each rotation gets compiled into 6 instructions rather than just one. Asymptotically, SipHash-2-4 needs 1.5 rotations per byte hashed, as well as 24 rotations per invocation for finalization.
I tried changing the ROTATE() macro to something that cc could better optimize like just "x = x << 1" or "x = x >> 1" for benchmarking purposes, but the resulting hashes all seemed consistently slower. I presume this is because their distribution ends up worse.
testing: warning: no tests to run
PASS
BenchmarkIntMap10 500000 4118 ns/op
BenchmarkIntMap100 50000 45364 ns/op
BenchmarkIntMap10000 500 5057292 ns/op
BenchmarkIntMap1000000 5 805614711 ns/op
BenchmarkStringMap10 500000 6785 ns/op
BenchmarkStringMap100 20000 77035 ns/op
BenchmarkStringMap10000 500 5319521 ns/op
BenchmarkStringMap1000000 5 495649046 ns/op
BenchmarkStringMap10Medium 200000 9991 ns/op
BenchmarkStringMap10Large 100000 18932 ns/op
BenchmarkStringMap100Medium 10000 100578 ns/op
BenchmarkStringMap100Large 10000 188233 ns/op
SipHash-2-4:
$ ./mapbench.test -test.bench=".*"
testing: warning: no tests to run
PASS
BenchmarkIntMap10 500000 5419 ns/op
BenchmarkIntMap100 50000 57732 ns/op
BenchmarkIntMap10000 500 6132868 ns/op
BenchmarkIntMap1000000 5 905450211 ns/op
BenchmarkStringMap10 200000 8178 ns/op
BenchmarkStringMap100 20000 89061 ns/op
BenchmarkStringMap10000 500 6330882 ns/op
BenchmarkStringMap1000000 5 614827912 ns/op
BenchmarkStringMap10Medium 200000 10484 ns/op
BenchmarkStringMap10Large 100000 17030 ns/op
BenchmarkStringMap100Medium 10000 105176 ns/op
BenchmarkStringMap100Large 10000 168942 ns/op
The percentage differences respectively are: +31.6%, +27.3%, +21.3%, +12.4%, +20.5%, +15.6%, +19.0%, +24.0%, +4.9%, -10.0%, +4.6%, -10.2%
(Positive means SipHash-2-4 is slower than default, while negative means it's faster.)
package mapbench_test
import (
"testing"
)
func benchmarkIntMapN(b *testing.B, m int) {
for i := 0; i < b.N; i++ {
mup := make(map[int]bool)
for j := 0; j < m; j++ {
mup[j] = true
}
for j := 0; j < m; j++ {
if !mup[j] {
b.Fatalf("key %d missing", j)
}
}
for j := 0; j < m; j++ {
delete(mup, j)
}
}
}
func BenchmarkIntMap10(b *testing.B) {
benchmarkIntMapN(b, 10)
}
func BenchmarkIntMap100(b *testing.B) {
benchmarkIntMapN(b, 100)
}
func BenchmarkIntMap10000(b *testing.B) {
benchmarkIntMapN(b, 10000)
}
func BenchmarkIntMap1000000(b *testing.B) {
benchmarkIntMapN(b, 1000000)
}
func benchmarkStringMapN(b *testing.B, f int, m int) {
for i := 0; i < b.N; i++ {
buf := make([]byte, f)
mup := make(map[string]bool)
for j := 0; j < m; j++ {
for k := 0; k < f; {
if buf[k]++; buf[k] != 0 {
break
}
}
mup[string(buf)] = true
}
buf = make([]byte, f)
for j := 0; j < m; j++ {
for k := 0; k < f; {
if buf[k]++; buf[k] != 0 {
break
}
}
if !mup[string(buf)] {
b.Fatalf("key %d missing", j)
}
}
buf = make([]byte, f)
for j := 0; j < m; j++ {
for k := 0; k < f; {
if buf[k]++; buf[k] != 0 {
break
}
}
delete(mup, string(buf))
}
}
}
func BenchmarkStringMap10(b *testing.B) {
benchmarkStringMapN(b, 2, 10)
}
func BenchmarkStringMap100(b *testing.B) {
benchmarkStringMapN(b, 4, 100)
}
func BenchmarkStringMap10000(b *testing.B) {
benchmarkStringMapN(b, 6, 10000)
}
func BenchmarkStringMap1000000(b *testing.B) {
benchmarkStringMapN(b, 8, 1000000)
}
func BenchmarkStringMap10Medium(b *testing.B) {
benchmarkStringMapN(b, 20, 10)
}
func BenchmarkStringMap10Large(b *testing.B) {
benchmarkStringMapN(b, 100, 10)
}
func BenchmarkStringMap100Medium(b *testing.B) {
benchmarkStringMapN(b, 20, 100)
}
func BenchmarkStringMap100Large(b *testing.B) {
benchmarkStringMapN(b, 100, 100)
}