Hi,
I've been recently experimenting with system pipes performance using Go, trying to measure how fast I could read from STDIN to a buffer, then do something with it.
During that experiment I noticed a significant throughput degradation when reading into the same buffer vs different (pre-allocated) buffers.
To make things more interesting, this performance degradation seems to happen on my Linux system only, when done in a Mac laptop throughput is consistent regardless of the count of buffers I am writing to which is what I was initially expecting.
To remove throughput variance from cache misses, I've pinned both processes to the same virtual core.
My question is why is this happening? and, is there a way to avoid it?
Below code snipped and command lines
---------------------------------------
$ taskset 0x1 cat 10GiB | taskset 0x1 ./stdin-to-buffer -buffer-count=1
Start time: 2023-03-05T12:26:21-08:00
Total bytes read: 10737418240
Chunks: 10240
Throughput: 2903 MiB/s
Total time: 3.527284627s
Finish time: 2023-03-05T12:26:25-08:00
$ taskset 0x1 cat 10GiB | taskset 0x1 ./stdin-to-buffer -buffer-count=25
Start time: 2023-03-05T12:26:29-08:00
Total bytes read: 10737418240
Chunks: 10240
Throughput: 2531 MiB/s
Total time: 4.045169972s
Finish time: 2023-03-05T12:26:34-08:00
$ taskset 0x1 cat 10GiB | taskset 0x1 ./stdin-to-buffer -buffer-count=50
Start time: 2023-03-05T12:26:39-08:00
Total bytes read: 10737418240
Chunks: 10240
Throughput: 2215 MiB/s
Total time: 4.623220632s
Finish time: 2023-03-05T12:26:43-08:00
$ taskset 0x1 cat 10GiB | taskset 0x1 ./stdin-to-buffer -buffer-count=150
Start time: 2023-03-05T12:26:49-08:00
Total bytes read: 10737418240
Chunks: 10240
Throughput: 1916 MiB/s
Total time: 5.344739086s
Finish time: 2023-03-05T12:26:54-08:00
$ taskset 0x1 cat 10GiB | taskset 0x1 ./stdin-to-buffer -buffer-count=250
Start time: 2023-03-05T12:26:59-08:00
Total bytes read: 10737418240
Chunks: 10240
Throughput: 1732 MiB/s
Total time: 5.910793462s
Finish time: 2023-03-05T12:27:05-08:00
$ taskset 0x1 cat 10GiB | taskset 0x1 ./stdin-to-buffer -buffer-count=500
Start time: 2023-03-05T12:27:13-08:00
Total bytes read: 10737418240
Chunks: 10240
Throughput: 1757 MiB/s
Total time: 5.827294797s
Finish time: 2023-03-05T12:27:19-08:00
---------------------------------------
Code for stdin-to-buffer.go follows.
---------------------------------------
package main
import (
"bytes"
"flag"
"fmt"
"io"
"log"
"math"
"os"
"time"
)
var (
bufCount = flag.Int("buffer-count", 8, "The number of buffers")
bufSize = flag.Int("buffer-size-bytes", 1048576 /*1MiB*/, "The buffer size in bytes")
)
func main() {
flag.Parse()
var nBytes, nChunks int
bufPool := make([][]byte, *bufCount)
for i := 0; i < *bufCount; i++ {
bufPool[i] = make([]byte, *bufSize)
}
var startTime = time.Now()
var loopIteration = -1
loop:
for {
loopIteration++
buf := bufPool[loopIteration%*bufCount]
n, err := io.ReadFull(os.Stdin, buf)
if n > 0 {
nChunks++
nBytes += n
}
switch err {
case nil:
continue
case io.EOF, io.ErrUnexpectedEOF:
break loop
}
log.Fatal(err)
}
finishTime := time.Now()
totalTime := finishTime.Sub(startTime)
fmt.Fprintf(os.Stderr, "Start time: %v\n", startTime.Format(time.RFC3339))
fmt.Fprintf(os.Stderr, "Total bytes read: %v\n", nBytes)
fmt.Fprintf(os.Stderr, "Chunks: %v\n", nChunks)
fmt.Fprintf(os.Stderr, "Throughput: %v MiB/s\n", math.Round(float64(nBytes)/math.Pow(1024, 2)/totalTime.Seconds()))
fmt.Fprintf(os.Stderr, "Total time: %v\n", totalTime)
fmt.Fprintf(os.Stderr, "Finish time: %v\n", finishTime.Format(time.RFC3339))
}