I write a simple zlib code using compress/zlib, but it's way too slow than the python versionI know python using c extension to do thatbut encoding/json package has the same efficiency of python json module, but slower than UltraJson extension of pythonSo, is there anyway to make zlib faster? My project is heavily depends on this functionalityhere is the test code:package main
import (
"bytes"
"compress/zlib"
"fmt"
"io"
"time"
)
func main() {
times = 30000
var in, out bytes.Buffer
b := []byte(`{"Name":"Wednesday","Age":6,"Parents":["Gomez","Morticia"],"test":{"prop1":1,"prop2":[1,2,3]}}`)
t1 = time.Now()
for i := 0; i < times; i++ {
w := zlib.NewWriter(&in)
w.Write(b)
w.Flush()
r, _ := zlib.NewReader(&in)
io.Copy(&out, r)
in.Reset()
out.Reset()
}
fmt.Println(time.Since(t1))
}
Thanks for the advise.I don't know if the size added up to 20+MB will produce a better result, but this test is based on my project situation. I use zlib to compress the network packets, not files. Seldom packets can be larger than 1MB. C zlib with python did a good job on this point
在 2013年2月5日星期二UTC+8上午1时47分48秒,bryanturley写道:
--
You received this message because you are subscribed to the Google Groups "golang-nuts" group.
To unsubscribe from this group and stop receiving emails from it, send an email to golang-nuts...@googlegroups.com.
For more options, visit https://groups.google.com/groups/opt_out.
import time
import zlib
s = '{"Name":"Wednesday","Age":6,"Parents":["Gomez","Morticia"],"test":{"prop1":1,"prop2":[1,2,3]}}'
st = time.time()
for i in xrange(times):
zlib.decompress(zlib.compress(s))
et = time.time()
print "zlib:",et - st
python2.7 and golang 1.0.3// buf := bytes.NewBuffer(rawBytes) // this will improve a little bit
binary.Write(buf, binary.BigEndian, uint32(dstLen))
binary.Write(buf, binary.BigEndian, rawBytes)
func maxZipLen(nLenSrc int) int {
n16kBlocks := (nLenSrc + 16383) / 16384 // round up any fraction of a block
return (nLenSrc + 6 + (n16kBlocks * 5))
}
func Zip(src *[]byte) []byte {
srcLen := len(*src)
raw := unsafe.Pointer(&((*src)[0])) // change []byte to Pointer
memLen := C.size_t(maxZipLen(srcLen))
// fmt.Println("mem length is ", memLen)
dst := C.calloc(memLen, 1)
defer C.free(dst)
dstLen := C.ulong(memLen)
C.zcompress(dst, &dstLen, raw, C.ulong(srcLen))
//write the compressed length
rawBytes := C.GoBytes(dst, C.int(dstLen))
// buf := new(bytes.Buffer)
// buf := bytes.NewBuffer(rawBytes)
// binary.Write(buf, binary.BigEndian, uint32(dstLen))
// binary.Write(buf, binary.BigEndian, rawBytes)
// fmt.Printf("%02x\n",buf.Bytes())
// return buf.Bytes()
return rawBytes
}
func UnZip(src *[]byte, oriLen uint32) []byte {
srcLen := len(*src)
buf := new(bytes.Buffer)
buf.Write(*src)
// binary.Read(buf, binary.BigEndian, &oriLen)
// fmt.Println("original size found ", oriLen)
// rawBytes := make([]byte, oriLen)
// binary.Read(buf, binary.BigEndian, &rawBytes)
// ioutil.WriteFile("/tmp/go_compressed_inter", rawBytes, 0644)
// raw := unsafe.Pointer(&((rawBytes)[0])) // change []byte to Pointer
raw := unsafe.Pointer(&((*src)[0])) // change []byte to Pointer
// fmt.Println("mem length is ", oriLen)
dst := C.calloc(C.size_t(oriLen), 1)
defer C.free(dst)
dstLen := C.ulong(oriLen)
C.zuncompress(dst, &dstLen, raw, C.ulong(srcLen))
// fmt.Println("origLen after uncompressed", dstLen)
// fmt.Printf("%02x\n",buf.Bytes())
return C.GoBytes(dst, C.int(dstLen))
}
cgo transitions are not expensive enough to be an issue here.We have the exact same issues with performance in Vitess, so we have the cgzip module which works just as you describe. It's performance is within ~2% of the C version IIRC. I've dropped a link to the module if you want to just use it.There are several issues contributing to the inefficiency of pure-Go zlib. They are all fixable, but if linking via CGO is an option, I would take that road for now.