package main
import "fmt"
import "runtime"
type callRet struct {
ret int
}
type callIn struct {
ret_chan *chan *callRet
arg1 int
}
func caller(call_in_c *chan *callIn, arg1 int) int {
ret_c := make(chan *callRet)
ci := callIn{&ret_c, arg1}
*call_in_c <- &ci
ret := <-ret_c
return ret.ret
}
func call_srv(call_in_c *chan *callIn) {
//runtime.LockOSThread()
for {
in := <-*call_in_c
ret_c := in.ret_chan
ret := callRet{3 + in.arg1}
*ret_c <- &ret
}
//runtime.UnlockOSThread()
}
func main() {
p := fmt.Println
runtime.GOMAXPROCS(2)
call_in_c := make(chan *callIn)
go call_srv(&call_in_c)
fp := func(call_in_c chan *callIn) {
ct := 0
for ; ct < 10000000; ct = ct + 1 {
//caller_batch(&call_in_c, 3, 100)
caller(&call_in_c, 1)
if ct%1000000 == 0 {
fmt.Println(ct)
}
}
p("done:)")
}
{
ct := 0
for ; ct < 0; ct++ {
go fp(call_in_c)
}
}
fp(call_in_c)
return
}It's perf output is about 1000,000 ops/sec :)
But when you uncomment the runtime.LockOSThread() at line 24(the beginning at func call_srv),it goes so slow to 100,000/sec and the cpu usage of kernel goes into madness.
Please try profiling your application, if you are on Linux perf(1) works very well for tracing user and system time.
Performance counter stats for './penalty_with_LockOSThread':4893.495113 task-clock (msec) # 0.583 CPUs utilized2,112,910 context-switches # 0.432 M/sec80 cpu-migrations # 0.016 K/sec2,377 page-faults # 0.486 K/sec<not supported> cycles<not supported> stalled-cycles-frontend<not supported> stalled-cycles-backend<not supported> instructions<not supported> branches<not supported> branch-misses8.389103849 seconds time elapsedPerformance counter stats for './penalty_without_LockOSThread':930.442000 task-clock (msec) # 0.978 CPUs utilized2,947 context-switches # 0.003 M/sec41 cpu-migrations # 0.044 K/sec2,389 page-faults # 0.003 M/sec<not supported> cycles<not supported> stalled-cycles-frontend<not supported> stalled-cycles-backend<not supported> instructions<not supported> branches<not supported> branch-misses0.951572314 seconds time elapsed
Perf record / report will highlight the problem.
Related, you don't need a pointer to a chan, channels are already pointers to the private runtime channel type.
Why do you want to use LockOSthread, you've proved it has a significant performance cost for your application.
Thanks Ian.