Eddie Scholtz has uploaded this change for review.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
It's important to note that the reader returned by OpenRaw will include
the trailing data descriptor if any. The CreateRaw caller is responsbile
for writing the trailing data descriptor, if applicable.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
4 files changed, 194 insertions(+), 18 deletions(-)
diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go
index c288ad9..f063db1 100644
--- a/src/archive/zip/reader.go
+++ b/src/archive/zip/reader.go
@@ -52,7 +52,6 @@
FileHeader
zip *Reader
zipr io.ReaderAt
- zipsize int64
headerOffset int64
}
@@ -112,7 +111,7 @@
// a bad one, and then only report an ErrFormat or UnexpectedEOF if
// the file count modulo 65536 is incorrect.
for {
- f := &File{zip: z, zipr: r, zipsize: size}
+ f := &File{zip: z, zipr: r}
err = readDirectoryHeader(f, buf)
if err == ErrFormat || err == io.ErrUnexpectedEOF {
break
@@ -193,6 +192,25 @@
return rc, nil
}
+// OpenRaw returns a Reader that provides access to the File's contents without
+// decompression.
+//
+// If the file has a trailing data descriptor, it will be included in the Reader
+// but not validated. The caller can use CompressedSize or CompressedSize64 to
+// determine the boundary between the file contents and the data descriptor.
+func (f *File) OpenRaw() (io.Reader, error) {
+ bodyOffset, err := f.findBodyOffset()
+ if err != nil {
+ return nil, err
+ }
+ size := int64(f.CompressedSize64)
+ if f.hasDataDescriptor() {
+ size += dataDescriptorLen
+ }
+ r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, size)
+ return r, nil
+}
+
type checksumReader struct {
rc io.ReadCloser
hash hash.Hash32
diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go
index 5faf1f4..4299c00 100644
--- a/src/archive/zip/reader_test.go
+++ b/src/archive/zip/reader_test.go
@@ -11,6 +11,7 @@
"internal/obscuretestdata"
"io"
"io/fs"
+ "io/ioutil"
"os"
"path/filepath"
"regexp"
@@ -499,9 +500,15 @@
func readTestZip(t *testing.T, zt ZipTest) {
var z *Reader
var err error
+ var raw []byte
if zt.Source != nil {
rat, size := zt.Source()
z, err = NewReader(rat, size)
+ raw = make([]byte, size)
+ if _, err := rat.ReadAt(raw, 0); err != nil {
+ t.Errorf("ReadAt error=%v", err)
+ return
+ }
} else {
path := filepath.Join("testdata", zt.Name)
if zt.Obscured {
@@ -519,6 +526,12 @@
defer rc.Close()
z = &rc.Reader
}
+ var err2 error
+ raw, err2 = ioutil.ReadFile(path)
+ if err2 != nil {
+ t.Errorf("ReadFile(%s) error=%v", path, err2)
+ return
+ }
}
if err != zt.Error {
t.Errorf("error=%v, want %v", err, zt.Error)
@@ -545,7 +558,7 @@
// test read of each file
for i, ft := range zt.File {
- readTestFile(t, zt, ft, z.File[i])
+ readTestFile(t, zt, ft, z.File[i], raw)
}
if t.Failed() {
return
@@ -557,7 +570,7 @@
for i := 0; i < 5; i++ {
for j, ft := range zt.File {
go func(j int, ft ZipTestFile) {
- readTestFile(t, zt, ft, z.File[j])
+ readTestFile(t, zt, ft, z.File[j], raw)
done <- true
}(j, ft)
n++
@@ -574,7 +587,7 @@
return t1.Equal(t2) && name1 == name2 && offset1 == offset2
}
-func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File) {
+func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File, raw []byte) {
if f.Name != ft.Name {
t.Errorf("name=%q, want %q", f.Name, ft.Name)
}
@@ -594,6 +607,32 @@
t.Errorf("%v: UncompressedSize=%#x does not match UncompressedSize64=%#x", f.Name, size, f.UncompressedSize64)
}
+ // Check that OpenRaw returns the correct byte segment
+ rw, err := f.OpenRaw()
+ if err != nil {
+ t.Errorf("%v: OpenRaw error=%v", f.Name, err)
+ return
+ }
+ start, err := f.DataOffset()
+ if err != nil {
+ t.Errorf("%v: DataOffset error=%v", f.Name, err)
+ return
+ }
+ end := uint64(start) + f.CompressedSize64
+ if f.hasDataDescriptor() {
+ end += dataDescriptorLen
+ }
+ got, err := ioutil.ReadAll(rw)
+ if err != nil {
+ t.Errorf("%v: OpenRaw ReadAll error=%v", f.Name, err)
+ return
+ }
+ want := raw[start:end]
+ if !bytes.Equal(got, want) {
+ t.Errorf("%v: OpenRaw returned unexpected bytes", f.Name)
+ return
+ }
+
r, err := f.Open()
if err != nil {
t.Errorf("%v", err)
diff --git a/src/archive/zip/writer.go b/src/archive/zip/writer.go
index cdc534e..abc63e4 100644
--- a/src/archive/zip/writer.go
+++ b/src/archive/zip/writer.go
@@ -37,6 +37,7 @@
type header struct {
*FileHeader
offset uint64
+ raw bool
}
// NewWriter returns a new Writer writing a zip file to w.
@@ -245,6 +246,21 @@
return true, require
}
+// prepare performs the bookkeeping operations required at the start of
+// CreateHeader and CreateRaw.
+func (w *Writer) prepare(fh *FileHeader) error {
+ if w.last != nil && !w.last.closed {
+ if err := w.last.close(); err != nil {
+ return err
+ }
+ }
+ if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
+ // See https://golang.org/issue/11144 confusion.
+ return errors.New("archive/zip: invalid duplicate FileHeader")
+ }
+ return nil
+}
+
// CreateHeader adds a file to the zip archive using the provided FileHeader
// for the file metadata. Writer takes ownership of fh and may mutate
// its fields. The caller must not modify fh after calling CreateHeader.
@@ -253,14 +269,8 @@
// The file's contents must be written to the io.Writer before the next
// call to Create, CreateHeader, or Close.
func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
- if w.last != nil && !w.last.closed {
- if err := w.last.close(); err != nil {
- return nil, err
- }
- }
- if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
- // See https://golang.org/issue/11144 confusion.
- return nil, errors.New("archive/zip: invalid duplicate FileHeader")
+ if err := w.prepare(fh); err != nil {
+ return nil, err
}
// The ZIP format has a sad state of affairs regarding character encoding.
@@ -365,7 +375,7 @@
ow = fw
}
w.dir = append(w.dir, h)
- if err := writeHeader(w.cw, fh); err != nil {
+ if err := writeHeader(w.cw, h); err != nil {
return nil, err
}
// If we're creating a directory, fw is nil.
@@ -373,7 +383,7 @@
return ow, nil
}
-func writeHeader(w io.Writer, h *FileHeader) error {
+func writeHeader(w io.Writer, h *header) error {
const maxUint16 = 1<<16 - 1
if len(h.Name) > maxUint16 {
return errLongName
@@ -390,9 +400,15 @@
b.uint16(h.Method)
b.uint16(h.ModifiedTime)
b.uint16(h.ModifiedDate)
- b.uint32(0) // since we are writing a data descriptor crc32,
- b.uint32(0) // compressed size,
- b.uint32(0) // and uncompressed size should be zero
+ if h.raw {
+ b.uint32(h.CRC32)
+ b.uint32(h.CompressedSize)
+ b.uint32(h.UncompressedSize)
+ } else {
+ b.uint32(0) // since we are writing a data descriptor crc32,
+ b.uint32(0) // compressed size,
+ b.uint32(0) // and uncompressed size should be zero
+ }
b.uint16(uint16(len(h.Name)))
b.uint16(uint16(len(h.Extra)))
if _, err := w.Write(buf[:]); err != nil {
@@ -405,6 +421,60 @@
return err
}
+// CreateRaw adds a file to the zip archive using the provided FileHeader
+// for the file metadata.
+//
+// This returns a Writer to which the file contents should be written.
+// The file's contents must be written to the io.Writer before the next
+// call to Create, CreateHeader, or Close.
+//
+// In contrast to CreateHeader, the bytes passed to Writer are not compressed
+// and a trailing data descriptor is not written automatically. The caller
+// is responsbile for writing a trailing data descriptor to Writer when the
+// FileHeader Flags 0x8 bit is set.
+func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) {
+ if err := w.prepare(fh); err != nil {
+ return nil, err
+ }
+
+ h := &header{
+ FileHeader: fh,
+ offset: uint64(w.cw.count),
+ raw: true,
+ }
+ w.dir = append(w.dir, h)
+ if err := writeHeader(w.cw, h); err != nil {
+ return nil, err
+ }
+
+ if strings.HasSuffix(fh.Name, "/") {
+ w.last = nil
+ return dirWriter{}, nil
+ }
+
+ fw := &fileWriter{
+ zipw: w.cw,
+ raw: true,
+ }
+ w.last = fw
+ return fw, nil
+}
+
+// Copy copies the file f (obtained from a Reader) into w. It copies the raw
+// form directly bypassing decompression, compression, and validation.
+func (w *Writer) Copy(f *File) error {
+ r, err := f.OpenRaw()
+ if err != nil {
+ return err
+ }
+ fw, err := w.CreateRaw(&f.FileHeader)
+ if err != nil {
+ return err
+ }
+ _, err = io.Copy(fw, r)
+ return err
+}
+
// RegisterCompressor registers or overrides a custom compressor for a specific
// method ID. If a compressor for a given method is not found, Writer will
// default to looking up the compressor at the package level.
@@ -440,12 +510,16 @@
compCount *countWriter
crc32 hash.Hash32
closed bool
+ raw bool
}
func (w *fileWriter) Write(p []byte) (int, error) {
if w.closed {
return 0, errors.New("zip: write to closed file")
}
+ if w.raw {
+ return w.zipw.Write(p)
+ }
w.crc32.Write(p)
return w.rawCount.Write(p)
}
@@ -455,6 +529,9 @@
return errors.New("zip: file closed twice")
}
w.closed = true
+ if w.raw {
+ return nil
+ }
if err := w.comp.Close(); err != nil {
return err
}
diff --git a/src/archive/zip/writer_test.go b/src/archive/zip/writer_test.go
index 3fa8bef..293ec15 100644
--- a/src/archive/zip/writer_test.go
+++ b/src/archive/zip/writer_test.go
@@ -365,6 +365,48 @@
}
}
+func TestWriterCopy(t *testing.T) {
+ // make a zip file
+ buf := new(bytes.Buffer)
+ w := NewWriter(buf)
+ for _, wt := range writeTests {
+ testCreate(t, w, &wt)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read it back
+ src, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
+ if err != nil {
+ t.Fatal(err)
+ }
+ for i, wt := range writeTests {
+ testReadFile(t, src.File[i], &wt)
+ }
+
+ // make a new zip file copying the old compressed data.
+ buf2 := new(bytes.Buffer)
+ dst := NewWriter(buf2)
+ for _, f := range src.File {
+ if err := dst.Copy(f); err != nil {
+ t.Fatal(err)
+ }
+ }
+ if err := dst.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read the new one back
+ r, err := NewReader(bytes.NewReader(buf2.Bytes()), int64(buf2.Len()))
+ if err != nil {
+ t.Fatal(err)
+ }
+ for i, wt := range writeTests {
+ testReadFile(t, r.File[i], &wt)
+ }
+}
+
func testCreate(t *testing.T, w *Writer, wt *WriteTest) {
header := &FileHeader{
Name: wt.Name,
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
3 comments:
File src/archive/zip/reader.go:
Patch Set #1, Line 198: // If the file has a trailing data descriptor, it will be included in the Reader
Was this discussed on the issue? It seems potentially quite confusing. I understand the desire to provide access to the data descriptor but I'm not sure this is the way to do it.
File src/archive/zip/reader_test.go:
Patch Set #1, Line 530: raw, err2 = ioutil.ReadFile(path)
Use os.ReadFile.
File src/archive/zip/writer.go:
Patch Set #1, Line 431: // In contrast to CreateHeader, the bytes passed to Writer are not compressed
Again, is this really the best approach? Was this discussed on the issue?
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Eddie Scholtz uploaded patch set #2 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
It's important to note that the reader returned by OpenRaw will include
the trailing data descriptor if any. The CreateRaw caller is responsbile
for writing the trailing data descriptor, if applicable.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
4 files changed, 194 insertions(+), 18 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
2 comments:
File src/archive/zip/reader.go:
Patch Set #1, Line 198: // If the file has a trailing data descriptor, it will be included in the Reader
Was this discussed on the issue? It seems potentially quite confusing. […]
The specifics of how data descriptors are handled was not discussed. I agree it could be confusing and error prone. Although these methods are intended for rare and advanced use cases where it seems the caller would want access. Perhaps we can tweak the interface to make them easier to work with. I commented on the issue so we can discuss further and hopefully reach agreement: https://github.com/golang/go/issues/34974#issuecomment-824327099
File src/archive/zip/reader_test.go:
Patch Set #1, Line 530: raw, err2 = ioutil.ReadFile(path)
Use os.ReadFile.
Done
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
Eddie Scholtz uploaded patch set #3 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
A new DataDescriptor struct has been added to provide read/write access
to the metadata segment that optionally (depending on header settings)
trails each file.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 427 insertions(+), 59 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
2 comments:
File src/archive/zip/reader.go:
Patch Set #1, Line 198: // If the file has a trailing data descriptor, it will be included in the Reader
The specifics of how data descriptors are handled was not discussed. […]
I moved to the DataDescriptor struct you proposed in the issue discussion.
File src/archive/zip/writer.go:
Patch Set #1, Line 431: // In contrast to CreateHeader, the bytes passed to Writer are not compressed
Again, is this really the best approach? Was this discussed on the issue?
Updated to use the DataDescriptor struct approach you proposed in the issue discussion.
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
Eddie Scholtz uploaded patch set #4 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
A new DataDescriptor struct has been added to provide read/write access
to the metadata segment that optionally (depending on header settings)
trails each file.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 455 insertions(+), 59 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
1 comment:
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Eddie Scholtz uploaded patch set #5 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 401 insertions(+), 56 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
2 comments:
File src/archive/zip/reader.go:
Patch Set #1, Line 198: // If the file has a trailing data descriptor, it will be included in the Reader
I moved to the DataDescriptor struct you proposed in the issue discussion.
Switched to the FileHeader approach rsc proposed (https://github.com/golang/go/issues/34974#issuecomment-828628628). Kept the new data descriptor struct but no longer export it.
File src/archive/zip/writer.go:
Patch Set #1, Line 431: // In contrast to CreateHeader, the bytes passed to Writer are not compressed
Updated to use the DataDescriptor struct approach you proposed in the issue discussion.
Same resolution as above. Switched to using the values in the FileHeader for writing the data descriptor.
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
1 comment:
File src/archive/zip/reader_test.go:
Patch Set #5, Line 1156: func TestReadDataDescriptor(t *testing.T) {
Should I get rid of this since it's only testing an unexported method?
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
12 comments:
File src/archive/zip/reader.go:
Patch Set #5, Line 58: func (f *FileHeader) hasDataDescriptor() bool {
If this method moves to FileHeader it should move to struct.go with the other FileHeader methods.
Patch Set #5, Line 198: // any.
Really minor, but move "any." to the end of the previous line.
Patch Set #5, Line 205: int64(f.CompressedSize64)
May as well use size here rather than referring to f.CompressedSize64 again.
Patch Set #5, Line 211: if err != nil {
Should this be err == nil?
Patch Set #5, Line 216: f.CompressedSize = uint32max
We should probably do what the code in FileInfoHeader does, which is
if f.CompressedSize64 > uint32max {
f.CompressedSize = uint32max
} else {
f.CompressedSize = uint32(f.CompressedSize64)
}Patch Set #5, Line 269: } else if desc.crc32 != r.f.CRC32 || r.hash.Sum32() != r.f.CRC32 {
Is the test of desc.crc32 != r.f.CRC32 correct? What if r.f.CRC32 is not set, because the value is supposed to come from the data descriptor?
Patch Set #5, Line 503: end := f.dataDescriptorLen() - 4
I don't see anything here to handle the case in which the signature is not present.
File src/archive/zip/reader_test.go:
Patch Set #5, Line 621: ioutil.ReadAll
Use io.ReadAll.
Patch Set #5, Line 629: t.Errorf("%v: OpenRaw returned unexpected bytes", f.Name)
Do something like
t.Logf("got %q", got)
t.Logf("want %q", want)
before the t.Errorf.
Patch Set #5, Line 1156: func TestReadDataDescriptor(t *testing.T) {
Should I get rid of this since it's only testing an unexported method?
This is good.
File src/archive/zip/writer.go:
Patch Set #5, Line 403: if h.raw && !h.hasDataDescriptor() {
Why the ! here? Don't we want to write this descriptor if h.hasDataDescriptor returns true?
File src/archive/zip/writer_test.go:
Patch Set #5, Line 368: func TestWriterCopy(t *testing.T) {
Is there a test of CreateRaw by itself?
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Eddie Scholtz uploaded patch set #6 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 234 insertions(+), 112 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Eddie Scholtz uploaded patch set #7 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 229 insertions(+), 112 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Eddie Scholtz uploaded patch set #8 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 229 insertions(+), 112 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Eddie Scholtz uploaded patch set #9 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 373 insertions(+), 112 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
12 comments:
File src/archive/zip/reader.go:
Patch Set #5, Line 58: func (f *FileHeader) hasDataDescriptor() bool {
If this method moves to FileHeader it should move to struct.go with the other FileHeader methods.
Done
Patch Set #5, Line 198: // any.
Really minor, but move "any." to the end of the previous line.
Done
Patch Set #5, Line 205: int64(f.CompressedSize64)
May as well use size here rather than referring to f.CompressedSize64 again.
No longer relevant.
Patch Set #5, Line 211: if err != nil {
Should this be err == nil?
Done
Patch Set #5, Line 216: f.CompressedSize = uint32max
We should probably do what the code in FileInfoHeader does, which is […]
Done
Patch Set #5, Line 269: } else if desc.crc32 != r.f.CRC32 || r.hash.Sum32() != r.f.CRC32 {
Is the test of desc.crc32 != r.f.CRC32 correct? What if r.f. […]
Done
Patch Set #5, Line 503: end := f.dataDescriptorLen() - 4
I don't see anything here to handle the case in which the signature is not present.
Done
File src/archive/zip/reader.go:
Patch Set #6, Line 237: if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
I think this first check was incorrect? Because 0 is a valid CRC32 value.
File src/archive/zip/reader_test.go:
Patch Set #5, Line 621: ioutil.ReadAll
Use io.ReadAll.
Done
Patch Set #5, Line 629: t.Errorf("%v: OpenRaw returned unexpected bytes", f.Name)
Do something like […]
Done
File src/archive/zip/writer.go:
Patch Set #5, Line 403: if h.raw && !h.hasDataDescriptor() {
Why the ! here? Don't we want to write this descriptor if h. […]
No, this method is for writing the local header that precedes the file. If there's a data descriptor, these values must be zero. If there isn't a data descriptor, we write them.
File src/archive/zip/writer_test.go:
Patch Set #5, Line 368: func TestWriterCopy(t *testing.T) {
Is there a test of CreateRaw by itself?
Added test for CreateRaw. Still need to iterate on it a bit more.
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
Eddie Scholtz uploaded patch set #10 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 377 insertions(+), 112 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
2 comments:
File src/archive/zip/writer_test.go:
Patch Set #5, Line 368: func TestWriterCopy(t *testing.T) {
Added test for CreateRaw. Still need to iterate on it a bit more.
Done
File src/archive/zip/writer_test.go:
Patch Set #10, Line 412: func TestWriterCreateRaw(t *testing.T) {
This test is very slow. Is that ok? Suggestions to improve? Other cases you think we should test?
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
Eddie Scholtz uploaded patch set #11 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 368 insertions(+), 105 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
8 comments:
File src/archive/zip/reader.go:
Patch Set #6, Line 237: if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
I think this first check was incorrect? Because 0 is a valid CRC32 value.
0 is a valid CRC32 value but I will bet that there are zip files out in the world that do not compute the CRC and just store 0 in the field. I think we need to retain this. At least, we definitely should not change this in this CL. (If you want to add another field recording whether the CRC came from a data descriptor, and is therefore assumed valid, that is fine.)
File src/archive/zip/reader.go:
Patch Set #11, Line 438: func readDataDescriptor(r io.Reader, f *File) error {
This doesn't seem right. The new idea in the proposal is that we still read the data descriptor. I don't think we have to change the way that Open works at all (but we can if we want). For OpenRaw we should jump ahead and read the data descriptor and use it to fill in the CRC32 field in the header if it is zero, and possibly also the UncompressedSize64 field.
Or so it seems to me. Let me know if you think otherwise.
Patch Set #11, Line 194: return r, err
This should just be
return r, nil
We know that err must be nil, and explicitly saying nil is clearer for the reader.
File src/archive/zip/reader_test.go:
Patch Set #11, Line 236: // According to the spec, the CRC32 in the central directory and
There is the spec and there is reality. The reality of zip files is that they don't correspond to the spec. We shouldn't change the behavior of this test.
Also, from a code health perspective, a CL that adds new API should never change an existing test. Changing an existing test should be done in a separate CL and discussed separately.
Patch Set #11, Line 1072: if err != ErrChecksum {
Seems to me that this test shouldn't change either
File src/archive/zip/writer.go:
Patch Set #5, Line 403: if h.raw && !h.hasDataDescriptor() {
No, this method is for writing the local header that precedes the file. […]
Ack
File src/archive/zip/writer.go:
Patch Set #11, Line 442: DataDescriptor
We don't have a DataDescriptor any more, so I guess just say "data descriptor". Actually I'm not sure that this phrase is trying to say.
File src/archive/zip/writer_test.go:
Patch Set #10, Line 412: func TestWriterCreateRaw(t *testing.T) {
This test is very slow. […]
How slow is very slow? It's OK to skip it if t.Short() returns true.
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Eddie Scholtz uploaded patch set #12 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 555 insertions(+), 62 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
7 comments:
File src/archive/zip/reader.go:
Patch Set #6, Line 237: if r.f.CRC32 != 0 && r.hash.Sum32() != r.f.CRC32 {
0 is a valid CRC32 value but I will bet that there are zip files out in the world that do not comput […]
Ok, I kept the existing behavior here.
File src/archive/zip/reader.go:
Patch Set #11, Line 438: func readDataDescriptor(r io.Reader, f *File) error {
This doesn't seem right. The new idea in the proposal is that we still read the data descriptor. […]
Ok, I used the approach rsc suggested and call this from Reader.init. Is it going to be in issue that we're adding a bunch of random reads throughout the file?
I maintain the existing behavior in that I only overwrite the CRC32 (even if it's not zero, otherwise I'll break existing tests).
I'm not convinced it's necessary to update UncompressedSize64.
Patch Set #11, Line 194: return r, err
This should just be […]
Done
File src/archive/zip/reader_test.go:
Patch Set #11, Line 236: // According to the spec, the CRC32 in the central directory and
There is the spec and there is reality. […]
Reverted this change.
Patch Set #11, Line 1072: if err != ErrChecksum {
Seems to me that this test shouldn't change either
Reverted this change.
File src/archive/zip/writer.go:
Patch Set #11, Line 442: DataDescriptor
We don't have a DataDescriptor any more, so I guess just say "data descriptor". […]
Removed.
File src/archive/zip/writer_test.go:
Patch Set #10, Line 412: func TestWriterCreateRaw(t *testing.T) {
How slow is very slow? It's OK to skip it if t.Short() returns true.
An additional 20-40 seconds.
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Patch set 12:Run-TryBot +1
2 comments:
File src/archive/zip/reader.go:
Patch Set #11, Line 438: func readDataDescriptor(r io.Reader, f *File) error {
Ok, I used the approach rsc suggested and call this from Reader.init. […]
Ack
File src/archive/zip/writer_test.go:
Patch Set #10, Line 412: func TestWriterCreateRaw(t *testing.T) {
An additional 20-40 seconds.
That is slow. I assume this is because of the "big" cases. I think those may simply be too big. Do they give valuable testing support?
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Eddie Scholtz uploaded patch set #13 to this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 542 insertions(+), 62 deletions(-)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Ian Lance Taylor, Joe Tsai.
1 comment:
File src/archive/zip/writer_test.go:
Patch Set #10, Line 412: func TestWriterCreateRaw(t *testing.T) {
That is slow. I assume this is because of the "big" cases. I think those may simply be too big. […]
Removed for now. I wanted to test crossing the uint32max boundary but probably not critical.
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Patch set 13:Run-TryBot +1
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Patch set 13:Code-Review +2
2 comments:
Patchset:
Thanks!
RELNOTES=yes
File src/archive/zip/writer_test.go:
Patch Set #10, Line 412: func TestWriterCreateRaw(t *testing.T) {
Removed for now. I wanted to test crossing the uint32max boundary but probably not critical.
Ack
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.
Attention is currently required from: Eddie Scholtz, Joe Tsai.
Patch set 13:Trust +1
Ian Lance Taylor submitted this change.
archive/zip: add File.OpenRaw, Writer.CreateRaw, Writer.Copy
These new methods provide support for cases where performance is a
primary concern. For example, copying files from an existing zip to a
new zip without incurring the decompression and compression overhead.
Using an optimized, external compression method and writing the output
to a zip archive. And compressing file contents in parallel and then
sequentially writing the compressed bytes to a zip archive.
TestWriterCopy is copied verbatim from https://github.com/rsc/zipmerge
Fixes #34974
Change-Id: Iade5bc245ba34cdbb86364bf59f79f38bb9e2eb6
Reviewed-on: https://go-review.googlesource.com/c/go/+/312310
Run-TryBot: Ian Lance Taylor <ia...@golang.org>
TryBot-Result: Go Bot <go...@golang.org>
Reviewed-by: Ian Lance Taylor <ia...@golang.org>
Trust: Carlos Amedee <car...@golang.org>
---
M src/archive/zip/reader.go
M src/archive/zip/reader_test.go
M src/archive/zip/struct.go
M src/archive/zip/writer.go
M src/archive/zip/writer_test.go
5 files changed, 542 insertions(+), 62 deletions(-)
diff --git a/src/archive/zip/reader.go b/src/archive/zip/reader.go
index 18f9833..808cf27 100644
--- a/src/archive/zip/reader.go
+++ b/src/archive/zip/reader.go
@@ -52,12 +52,9 @@
FileHeader
zip *Reader
zipr io.ReaderAt
- zipsize int64
headerOffset int64
-}
-
-func (f *File) hasDataDescriptor() bool {
- return f.Flags&0x8 != 0
+ zip64 bool // zip64 extended information extra field presence
+ descErr error // error reading the data descriptor during init
}
// OpenReader will open the Zip file specified by name and return a ReadCloser.
@@ -112,7 +109,7 @@
// a bad one, and then only report an ErrFormat or UnexpectedEOF if
// the file count modulo 65536 is incorrect.
for {
- f := &File{zip: z, zipr: r, zipsize: size}
+ f := &File{zip: z, zipr: r}
err = readDirectoryHeader(f, buf)
if err == ErrFormat || err == io.ErrUnexpectedEOF {
break
@@ -120,6 +117,7 @@
if err != nil {
return err
}
+ f.readDataDescriptor()
z.File = append(z.File, f)
}
if uint16(len(z.File)) != uint16(end.directoryRecords) { // only compare 16 bits here
@@ -180,26 +178,68 @@
return nil, ErrAlgorithm
}
var rc io.ReadCloser = dcomp(r)
- var desr io.Reader
- if f.hasDataDescriptor() {
- desr = io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, dataDescriptorLen)
- }
rc = &checksumReader{
rc: rc,
hash: crc32.NewIEEE(),
f: f,
- desr: desr,
}
return rc, nil
}
+// OpenRaw returns a Reader that provides access to the File's contents without
+// decompression.
+func (f *File) OpenRaw() (io.Reader, error) {
+ bodyOffset, err := f.findBodyOffset()
+ if err != nil {
+ return nil, err
+ }
+ r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset, int64(f.CompressedSize64))
+ return r, nil
+}
+
+func (f *File) readDataDescriptor() {
+ if !f.hasDataDescriptor() {
+ return
+ }
+
+ bodyOffset, err := f.findBodyOffset()
+ if err != nil {
+ f.descErr = err
+ return
+ }
+
+ // In section 4.3.9.2 of the spec: "However ZIP64 format MAY be used
+ // regardless of the size of a file. When extracting, if the zip64
+ // extended information extra field is present for the file the
+ // compressed and uncompressed sizes will be 8 byte values."
+ //
+ // Historically, this package has used the compressed and uncompressed
+ // sizes from the central directory to determine if the package is
+ // zip64.
+ //
+ // For this case we allow either the extra field or sizes to determine
+ // the data descriptor length.
+ zip64 := f.zip64 || f.isZip64()
+ n := int64(dataDescriptorLen)
+ if zip64 {
+ n = dataDescriptor64Len
+ }
+ size := int64(f.CompressedSize64)
+ r := io.NewSectionReader(f.zipr, f.headerOffset+bodyOffset+size, n)
+ dd, err := readDataDescriptor(r, zip64)
+ if err != nil {
+ f.descErr = err
+ return
+ }
+ f.CRC32 = dd.crc32
+}
+
type checksumReader struct {
rc io.ReadCloser
hash hash.Hash32
nread uint64 // number of bytes read so far
f *File
- desr io.Reader // if non-nil, where to read the data descriptor
- err error // sticky error
+ err error // sticky error
}
func (r *checksumReader) Stat() (fs.FileInfo, error) {
@@ -220,12 +260,12 @@
if r.nread != r.f.UncompressedSize64 {
return 0, io.ErrUnexpectedEOF
}
- if r.desr != nil {
- if err1 := readDataDescriptor(r.desr, r.f); err1 != nil {
- if err1 == io.EOF {
+ if r.f.hasDataDescriptor() {
+ if r.f.descErr != nil {
+ if r.f.descErr == io.EOF {
err = io.ErrUnexpectedEOF
} else {
- err = err1
+ err = r.f.descErr
}
} else if r.hash.Sum32() != r.f.CRC32 {
err = ErrChecksum
@@ -336,6 +376,8 @@
switch fieldTag {
case zip64ExtraID:
+ f.zip64 = true
+
// update directory values from the zip64 extra block.
// They should only be consulted if the sizes read earlier
// are maxed out.
@@ -435,8 +477,9 @@
return nil
}
-func readDataDescriptor(r io.Reader, f *File) error {
- var buf [dataDescriptorLen]byte
+func readDataDescriptor(r io.Reader, zip64 bool) (*dataDescriptor, error) {
+ // Create enough space for the largest possible size
+ var buf [dataDescriptor64Len]byte
// The spec says: "Although not originally assigned a
// signature, the value 0x08074b50 has commonly been adopted
@@ -446,10 +489,9 @@
// descriptors and should account for either case when reading
// ZIP files to ensure compatibility."
//
- // dataDescriptorLen includes the size of the signature but
- // first read just those 4 bytes to see if it exists.
+ // First read just those 4 bytes to see if the signature exists.
if _, err := io.ReadFull(r, buf[:4]); err != nil {
- return err
+ return nil, err
}
off := 0
maybeSig := readBuf(buf[:4])
@@ -458,21 +500,28 @@
// bytes.
off += 4
}
- if _, err := io.ReadFull(r, buf[off:12]); err != nil {
- return err
+
+ end := dataDescriptorLen - 4
+ if zip64 {
+ end = dataDescriptor64Len - 4
}
- b := readBuf(buf[:12])
- if b.uint32() != f.CRC32 {
- return ErrChecksum
+ if _, err := io.ReadFull(r, buf[off:end]); err != nil {
+ return nil, err
+ }
+ b := readBuf(buf[:end])
+
+ out := &dataDescriptor{
+ crc32: b.uint32(),
}
- // The two sizes that follow here can be either 32 bits or 64 bits
- // but the spec is not very clear on this and different
- // interpretations has been made causing incompatibilities. We
- // already have the sizes from the central directory so we can
- // just ignore these.
-
- return nil
+ if zip64 {
+ out.compressedSize = b.uint64()
+ out.uncompressedSize = b.uint64()
+ } else {
+ out.compressedSize = uint64(b.uint32())
+ out.uncompressedSize = uint64(b.uint32())
+ }
+ return out, nil
}
func readDirectoryEnd(r io.ReaderAt, size int64) (dir *directoryEnd, err error) {
diff --git a/src/archive/zip/reader_test.go b/src/archive/zip/reader_test.go
index 6ee62ef..35e681e 100644
--- a/src/archive/zip/reader_test.go
+++ b/src/archive/zip/reader_test.go
@@ -499,9 +499,15 @@
func readTestZip(t *testing.T, zt ZipTest) {
var z *Reader
var err error
+ var raw []byte
if zt.Source != nil {
rat, size := zt.Source()
z, err = NewReader(rat, size)
+ raw = make([]byte, size)
+ if _, err := rat.ReadAt(raw, 0); err != nil {
+ t.Errorf("ReadAt error=%v", err)
+ return
+ }
} else {
path := filepath.Join("testdata", zt.Name)
if zt.Obscured {
@@ -519,6 +525,12 @@
defer rc.Close()
z = &rc.Reader
}
+ var err2 error
+ raw, err2 = os.ReadFile(path)
+ if err2 != nil {
+ t.Errorf("ReadFile(%s) error=%v", path, err2)
+ return
+ }
}
if err != zt.Error {
t.Errorf("error=%v, want %v", err, zt.Error)
@@ -545,7 +557,7 @@
// test read of each file
for i, ft := range zt.File {
- readTestFile(t, zt, ft, z.File[i])
+ readTestFile(t, zt, ft, z.File[i], raw)
}
if t.Failed() {
return
@@ -557,7 +569,7 @@
for i := 0; i < 5; i++ {
for j, ft := range zt.File {
go func(j int, ft ZipTestFile) {
- readTestFile(t, zt, ft, z.File[j])
+ readTestFile(t, zt, ft, z.File[j], raw)
done <- true
}(j, ft)
n++
@@ -574,7 +586,7 @@
return t1.Equal(t2) && name1 == name2 && offset1 == offset2
}
-func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File) {
+func readTestFile(t *testing.T, zt ZipTest, ft ZipTestFile, f *File, raw []byte) {
if f.Name != ft.Name {
t.Errorf("name=%q, want %q", f.Name, ft.Name)
}
@@ -594,6 +606,31 @@
t.Errorf("%v: UncompressedSize=%#x does not match UncompressedSize64=%#x", f.Name, size, f.UncompressedSize64)
}
+ // Check that OpenRaw returns the correct byte segment
+ rw, err := f.OpenRaw()
+ if err != nil {
+ t.Errorf("%v: OpenRaw error=%v", f.Name, err)
+ return
+ }
+ start, err := f.DataOffset()
+ if err != nil {
+ t.Errorf("%v: DataOffset error=%v", f.Name, err)
+ return
+ }
+ got, err := io.ReadAll(rw)
+ if err != nil {
+ t.Errorf("%v: OpenRaw ReadAll error=%v", f.Name, err)
+ return
+ }
+ end := uint64(start) + f.CompressedSize64
+ want := raw[start:end]
+ if !bytes.Equal(got, want) {
+ t.Logf("got %q", got)
+ t.Logf("want %q", want)
+ t.Errorf("%v: OpenRaw returned unexpected bytes", f.Name)
+ return
+ }
+
r, err := f.Open()
if err != nil {
t.Errorf("%v", err)
@@ -1166,3 +1203,125 @@
t.Errorf("Error reading file: %v", err)
}
}
+
+func TestReadDataDescriptor(t *testing.T) {
+ tests := []struct {
+ desc string
+ in []byte
+ zip64 bool
+ want *dataDescriptor
+ wantErr error
+ }{{
+ desc: "valid 32 bit with signature",
+ in: []byte{
+ 0x50, 0x4b, 0x07, 0x08, // signature
+ 0x00, 0x01, 0x02, 0x03, // crc32
+ 0x04, 0x05, 0x06, 0x07, // compressed size
+ 0x08, 0x09, 0x0a, 0x0b, // uncompressed size
+ },
+ want: &dataDescriptor{
+ crc32: 0x03020100,
+ compressedSize: 0x07060504,
+ uncompressedSize: 0x0b0a0908,
+ },
+ }, {
+ desc: "valid 32 bit without signature",
+ in: []byte{
+ 0x00, 0x01, 0x02, 0x03, // crc32
+ 0x04, 0x05, 0x06, 0x07, // compressed size
+ 0x08, 0x09, 0x0a, 0x0b, // uncompressed size
+ },
+ want: &dataDescriptor{
+ crc32: 0x03020100,
+ compressedSize: 0x07060504,
+ uncompressedSize: 0x0b0a0908,
+ },
+ }, {
+ desc: "valid 64 bit with signature",
+ in: []byte{
+ 0x50, 0x4b, 0x07, 0x08, // signature
+ 0x00, 0x01, 0x02, 0x03, // crc32
+ 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // compressed size
+ 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, // uncompressed size
+ },
+ zip64: true,
+ want: &dataDescriptor{
+ crc32: 0x03020100,
+ compressedSize: 0x0b0a090807060504,
+ uncompressedSize: 0x131211100f0e0d0c,
+ },
+ }, {
+ desc: "valid 64 bit without signature",
+ in: []byte{
+ 0x00, 0x01, 0x02, 0x03, // crc32
+ 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // compressed size
+ 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, // uncompressed size
+ },
+ zip64: true,
+ want: &dataDescriptor{
+ crc32: 0x03020100,
+ compressedSize: 0x0b0a090807060504,
+ uncompressedSize: 0x131211100f0e0d0c,
+ },
+ }, {
+ desc: "invalid 32 bit with signature",
+ in: []byte{
+ 0x50, 0x4b, 0x07, 0x08, // signature
+ 0x00, 0x01, 0x02, 0x03, // crc32
+ 0x04, 0x05, // unexpected end
+ },
+ wantErr: io.ErrUnexpectedEOF,
+ }, {
+ desc: "invalid 32 bit without signature",
+ in: []byte{
+ 0x00, 0x01, 0x02, 0x03, // crc32
+ 0x04, 0x05, // unexpected end
+ },
+ wantErr: io.ErrUnexpectedEOF,
+ }, {
+ desc: "invalid 64 bit with signature",
+ in: []byte{
+ 0x50, 0x4b, 0x07, 0x08, // signature
+ 0x00, 0x01, 0x02, 0x03, // crc32
+ 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // compressed size
+ 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, // unexpected end
+ },
+ zip64: true,
+ wantErr: io.ErrUnexpectedEOF,
+ }, {
+ desc: "invalid 64 bit without signature",
+ in: []byte{
+ 0x00, 0x01, 0x02, 0x03, // crc32
+ 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, // compressed size
+ 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, // unexpected end
+ },
+ zip64: true,
+ wantErr: io.ErrUnexpectedEOF,
+ }}
+
+ for _, test := range tests {
+ t.Run(test.desc, func(t *testing.T) {
+ r := bytes.NewReader(test.in)
+
+ desc, err := readDataDescriptor(r, test.zip64)
+ if err != test.wantErr {
+ t.Fatalf("got err %v; want nil", err)
+ }
+ if test.want == nil {
+ return
+ }
+ if desc == nil {
+ t.Fatalf("got nil DataDescriptor; want non-nil")
+ }
+ if desc.crc32 != test.want.crc32 {
+ t.Errorf("got CRC32 %#x; want %#x", desc.crc32, test.want.crc32)
+ }
+ if desc.compressedSize != test.want.compressedSize {
+ t.Errorf("got CompressedSize %#x; want %#x", desc.compressedSize, test.want.compressedSize)
+ }
+ if desc.uncompressedSize != test.want.uncompressedSize {
+ t.Errorf("got UncompressedSize %#x; want %#x", desc.uncompressedSize, test.want.uncompressedSize)
+ }
+ })
+ }
+}
diff --git a/src/archive/zip/struct.go b/src/archive/zip/struct.go
index 3dc0c50..ff9f605 100644
--- a/src/archive/zip/struct.go
+++ b/src/archive/zip/struct.go
@@ -42,7 +42,7 @@
directoryHeaderLen = 46 // + filename + extra + comment
directoryEndLen = 22 // + comment
dataDescriptorLen = 16 // four uint32: descriptor signature, crc32, compressed size, size
- dataDescriptor64Len = 24 // descriptor with 8 byte sizes
+ dataDescriptor64Len = 24 // two uint32: signature, crc32 | two uint64: compressed size, size
directory64LocLen = 20 //
directory64EndLen = 56 // + extra
@@ -315,6 +315,10 @@
return h.CompressedSize64 >= uint32max || h.UncompressedSize64 >= uint32max
}
+func (f *FileHeader) hasDataDescriptor() bool {
+ return f.Flags&0x8 != 0
+}
+
func msdosModeToFileMode(m uint32) (mode fs.FileMode) {
if m&msdosDir != 0 {
mode = fs.ModeDir | 0777
@@ -386,3 +390,11 @@
}
return mode
}
+
+// dataDescriptor holds the data descriptor that optionally follows the file
+// contents in the zip file.
+type dataDescriptor struct {
+ crc32 uint32
+ compressedSize uint64
+ uncompressedSize uint64
+}
diff --git a/src/archive/zip/writer.go b/src/archive/zip/writer.go
index cdc534e..3b23cc3 100644
--- a/src/archive/zip/writer.go
+++ b/src/archive/zip/writer.go
@@ -37,6 +37,7 @@
type header struct {
*FileHeader
offset uint64
+ raw bool
}
// NewWriter returns a new Writer writing a zip file to w.
@@ -245,22 +246,31 @@
return true, require
}
+// prepare performs the bookkeeping operations required at the start of
+// CreateHeader and CreateRaw.
+func (w *Writer) prepare(fh *FileHeader) error {
+ if w.last != nil && !w.last.closed {
+ if err := w.last.close(); err != nil {
+ return err
+ }
+ }
+ if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
+ // See https://golang.org/issue/11144 confusion.
+ return errors.New("archive/zip: invalid duplicate FileHeader")
+ }
+ return nil
+}
+
// CreateHeader adds a file to the zip archive using the provided FileHeader
// for the file metadata. Writer takes ownership of fh and may mutate
// its fields. The caller must not modify fh after calling CreateHeader.
//
// This returns a Writer to which the file contents should be written.
// The file's contents must be written to the io.Writer before the next
-// call to Create, CreateHeader, or Close.
+// call to Create, CreateHeader, CreateRaw, or Close.
func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
- if w.last != nil && !w.last.closed {
- if err := w.last.close(); err != nil {
- return nil, err
- }
- }
- if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
- // See https://golang.org/issue/11144 confusion.
- return nil, errors.New("archive/zip: invalid duplicate FileHeader")
+ if err := w.prepare(fh); err != nil {
+ return nil, err
}
// The ZIP format has a sad state of affairs regarding character encoding.
@@ -365,7 +375,7 @@
ow = fw
}
w.dir = append(w.dir, h)
- if err := writeHeader(w.cw, fh); err != nil {
+ if err := writeHeader(w.cw, h); err != nil {
return nil, err
}
// If we're creating a directory, fw is nil.
@@ -373,7 +383,7 @@
return ow, nil
}
-func writeHeader(w io.Writer, h *FileHeader) error {
+func writeHeader(w io.Writer, h *header) error {
const maxUint16 = 1<<16 - 1
if len(h.Name) > maxUint16 {
return errLongName
@@ -390,9 +400,20 @@
b.uint16(h.Method)
b.uint16(h.ModifiedTime)
b.uint16(h.ModifiedDate)
- b.uint32(0) // since we are writing a data descriptor crc32,
- b.uint32(0) // compressed size,
- b.uint32(0) // and uncompressed size should be zero
+ // In raw mode (caller does the compression), the values are either
+ // written here or in the trailing data descriptor based on the header
+ // flags.
+ if h.raw && !h.hasDataDescriptor() {
+ b.uint32(h.CRC32)
+ b.uint32(uint32(min64(h.CompressedSize64, uint32max)))
+ b.uint32(uint32(min64(h.UncompressedSize64, uint32max)))
+ } else {
+ // When this package handle the compression, these values are
+ // always written to the trailing data descriptor.
+ b.uint32(0) // crc32
+ b.uint32(0) // compressed size
+ b.uint32(0) // uncompressed size
+ }
b.uint16(uint16(len(h.Name)))
b.uint16(uint16(len(h.Extra)))
if _, err := w.Write(buf[:]); err != nil {
@@ -405,6 +426,65 @@
return err
}
+func min64(x, y uint64) uint64 {
+ if x < y {
+ return x
+ }
+ return y
+}
+
+// CreateRaw adds a file to the zip archive using the provided FileHeader and
+// returns a Writer to which the file contents should be written. The file's
+// contents must be written to the io.Writer before the next call to Create,
+// CreateHeader, CreateRaw, or Close.
+//
+// In contrast to CreateHeader, the bytes passed to Writer are not compressed.
+func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) {
+ if err := w.prepare(fh); err != nil {
+ return nil, err
+ }
+
+ fh.CompressedSize = uint32(min64(fh.CompressedSize64, uint32max))
+ fh.UncompressedSize = uint32(min64(fh.UncompressedSize64, uint32max))
+
+ h := &header{
+ FileHeader: fh,
+ offset: uint64(w.cw.count),
+ raw: true,
+ }
+ w.dir = append(w.dir, h)
+ if err := writeHeader(w.cw, h); err != nil {
+ return nil, err
+ }
+
+ if strings.HasSuffix(fh.Name, "/") {
+ w.last = nil
+ return dirWriter{}, nil
+ }
+
+ fw := &fileWriter{
+ header: h,
+ zipw: w.cw,
+ }
+ w.last = fw
+ return fw, nil
+}
+
+// Copy copies the file f (obtained from a Reader) into w. It copies the raw
+// form directly bypassing decompression, compression, and validation.
+func (w *Writer) Copy(f *File) error {
+ r, err := f.OpenRaw()
+ if err != nil {
+ return err
+ }
+ fw, err := w.CreateRaw(&f.FileHeader)
+ if err != nil {
+ return err
+ }
+ _, err = io.Copy(fw, r)
+ return err
+}
+
// RegisterCompressor registers or overrides a custom compressor for a specific
// method ID. If a compressor for a given method is not found, Writer will
// default to looking up the compressor at the package level.
@@ -446,6 +526,9 @@
if w.closed {
return 0, errors.New("zip: write to closed file")
}
+ if w.raw {
+ return w.zipw.Write(p)
+ }
w.crc32.Write(p)
return w.rawCount.Write(p)
}
@@ -455,6 +538,9 @@
return errors.New("zip: file closed twice")
}
w.closed = true
+ if w.raw {
+ return w.writeDataDescriptor()
+ }
if err := w.comp.Close(); err != nil {
return err
}
@@ -474,26 +560,33 @@
fh.UncompressedSize = uint32(fh.UncompressedSize64)
}
+ return w.writeDataDescriptor()
+}
+
+func (w *fileWriter) writeDataDescriptor() error {
+ if !w.hasDataDescriptor() {
+ return nil
+ }
// Write data descriptor. This is more complicated than one would
// think, see e.g. comments in zipfile.c:putextended() and
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
// The approach here is to write 8 byte sizes if needed without
// adding a zip64 extra in the local header (too late anyway).
var buf []byte
- if fh.isZip64() {
+ if w.isZip64() {
buf = make([]byte, dataDescriptor64Len)
} else {
buf = make([]byte, dataDescriptorLen)
}
b := writeBuf(buf)
b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
- b.uint32(fh.CRC32)
- if fh.isZip64() {
- b.uint64(fh.CompressedSize64)
- b.uint64(fh.UncompressedSize64)
+ b.uint32(w.CRC32)
+ if w.isZip64() {
+ b.uint64(w.CompressedSize64)
+ b.uint64(w.UncompressedSize64)
} else {
- b.uint32(fh.CompressedSize)
- b.uint32(fh.UncompressedSize)
+ b.uint32(w.CompressedSize)
+ b.uint32(w.UncompressedSize)
}
_, err := w.zipw.Write(buf)
return err
diff --git a/src/archive/zip/writer_test.go b/src/archive/zip/writer_test.go
index 3fa8bef..97c6c52 100644
--- a/src/archive/zip/writer_test.go
+++ b/src/archive/zip/writer_test.go
@@ -6,8 +6,10 @@
import (
"bytes"
+ "compress/flate"
"encoding/binary"
"fmt"
+ "hash/crc32"
"io"
"io/fs"
"math/rand"
@@ -365,6 +367,171 @@
}
}
+func TestWriterCopy(t *testing.T) {
+ // make a zip file
+ buf := new(bytes.Buffer)
+ w := NewWriter(buf)
+ for _, wt := range writeTests {
+ testCreate(t, w, &wt)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read it back
+ src, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
+ if err != nil {
+ t.Fatal(err)
+ }
+ for i, wt := range writeTests {
+ testReadFile(t, src.File[i], &wt)
+ }
+
+ // make a new zip file copying the old compressed data.
+ buf2 := new(bytes.Buffer)
+ dst := NewWriter(buf2)
+ for _, f := range src.File {
+ if err := dst.Copy(f); err != nil {
+ t.Fatal(err)
+ }
+ }
+ if err := dst.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read the new one back
+ r, err := NewReader(bytes.NewReader(buf2.Bytes()), int64(buf2.Len()))
+ if err != nil {
+ t.Fatal(err)
+ }
+ for i, wt := range writeTests {
+ testReadFile(t, r.File[i], &wt)
+ }
+}
+
+func TestWriterCreateRaw(t *testing.T) {
+ files := []struct {
+ name string
+ content []byte
+ method uint16
+ flags uint16
+ crc32 uint32
+ uncompressedSize uint64
+ compressedSize uint64
+ }{
+ {
+ name: "small store w desc",
+ content: []byte("gophers"),
+ method: Store,
+ flags: 0x8,
+ },
+ {
+ name: "small deflate wo desc",
+ content: bytes.Repeat([]byte("abcdefg"), 2048),
+ method: Deflate,
+ },
+ }
+
+ // write a zip file
+ archive := new(bytes.Buffer)
+ w := NewWriter(archive)
+
+ for i := range files {
+ f := &files[i]
+ f.crc32 = crc32.ChecksumIEEE(f.content)
+ size := uint64(len(f.content))
+ f.uncompressedSize = size
+ f.compressedSize = size
+
+ var compressedContent []byte
+ if f.method == Deflate {
+ var buf bytes.Buffer
+ w, err := flate.NewWriter(&buf, flate.BestSpeed)
+ if err != nil {
+ t.Fatalf("flate.NewWriter err = %v", err)
+ }
+ _, err = w.Write(f.content)
+ if err != nil {
+ t.Fatalf("flate Write err = %v", err)
+ }
+ err = w.Close()
+ if err != nil {
+ t.Fatalf("flate Writer.Close err = %v", err)
+ }
+ compressedContent = buf.Bytes()
+ f.compressedSize = uint64(len(compressedContent))
+ }
+
+ h := &FileHeader{
+ Name: f.name,
+ Method: f.method,
+ Flags: f.flags,
+ CRC32: f.crc32,
+ CompressedSize64: f.compressedSize,
+ UncompressedSize64: f.uncompressedSize,
+ }
+ w, err := w.CreateRaw(h)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if compressedContent != nil {
+ _, err = w.Write(compressedContent)
+ } else {
+ _, err = w.Write(f.content)
+ }
+ if err != nil {
+ t.Fatalf("%s Write got %v; want nil", f.name, err)
+ }
+ }
+
+ if err := w.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ // read it back
+ r, err := NewReader(bytes.NewReader(archive.Bytes()), int64(archive.Len()))
+ if err != nil {
+ t.Fatal(err)
+ }
+ for i, want := range files {
+ got := r.File[i]
+ if got.Name != want.name {
+ t.Errorf("got Name %s; want %s", got.Name, want.name)
+ }
+ if got.Method != want.method {
+ t.Errorf("%s: got Method %#x; want %#x", want.name, got.Method, want.method)
+ }
+ if got.Flags != want.flags {
+ t.Errorf("%s: got Flags %#x; want %#x", want.name, got.Flags, want.flags)
+ }
+ if got.CRC32 != want.crc32 {
+ t.Errorf("%s: got CRC32 %#x; want %#x", want.name, got.CRC32, want.crc32)
+ }
+ if got.CompressedSize64 != want.compressedSize {
+ t.Errorf("%s: got CompressedSize64 %d; want %d", want.name, got.CompressedSize64, want.compressedSize)
+ }
+ if got.UncompressedSize64 != want.uncompressedSize {
+ t.Errorf("%s: got UncompressedSize64 %d; want %d", want.name, got.UncompressedSize64, want.uncompressedSize)
+ }
+
+ r, err := got.Open()
+ if err != nil {
+ t.Errorf("%s: Open err = %v", got.Name, err)
+ continue
+ }
+
+ buf, err := io.ReadAll(r)
+ if err != nil {
+ t.Errorf("%s: ReadAll err = %v", got.Name, err)
+ continue
+ }
+
+ if !bytes.Equal(buf, want.content) {
+ t.Errorf("%v: ReadAll returned unexpected bytes", got.Name)
+ }
+ }
+}
+
func testCreate(t *testing.T, w *Writer, wt *WriteTest) {
header := &FileHeader{
Name: wt.Name,
@@ -390,15 +557,15 @@
testFileMode(t, f, wt.Mode)
rc, err := f.Open()
if err != nil {
- t.Fatal("opening:", err)
+ t.Fatalf("opening %s: %v", f.Name, err)
}
b, err := io.ReadAll(rc)
if err != nil {
- t.Fatal("reading:", err)
+ t.Fatalf("reading %s: %v", f.Name, err)
}
err = rc.Close()
if err != nil {
- t.Fatal("closing:", err)
+ t.Fatalf("closing %s: %v", f.Name, err)
}
if !bytes.Equal(b, wt.Data) {
t.Errorf("File contents %q, want %q", b, wt.Data)
To view, visit change 312310. To unsubscribe, or for help writing mail filters, visit settings.