[mod] sumdb/dirhash: directory tree hash algorithm

121 views
Skip to first unread message

Russ Cox (Gerrit)

unread,
May 10, 2019, 10:59:55 AM5/10/19
to Hyang-Ah Hana Kim, Ian Lance Taylor, goph...@pubsubhelper.golang.org, Russ Cox, golang-co...@googlegroups.com

Russ Cox would like Hyang-Ah Hana Kim to review this change.

View Change

sumdb/dirhash: directory tree hash algorithm

Copied from cmd/go/internal/dirhash, with additional doc comments.

For golang/go#31761.

Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
---
A sumdb/dirhash/hash.go
A sumdb/dirhash/hash_test.go
2 files changed, 267 insertions(+), 0 deletions(-)

diff --git a/sumdb/dirhash/hash.go b/sumdb/dirhash/hash.go
new file mode 100644
index 0000000..ef5df6f
--- /dev/null
+++ b/sumdb/dirhash/hash.go
@@ -0,0 +1,132 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package dirhash defines hashes over directory trees.
+// These hashes are recorded in go.sum files and in the Go checksum database,
+// to allow verifying that a newly-downloaded module has the expected content.
+package dirhash
+
+import (
+ "archive/zip"
+ "crypto/sha256"
+ "encoding/base64"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "sort"
+ "strings"
+)
+
+// DefaultHash is the default hash function used in new go.sum entries.
+var DefaultHash Hash = Hash1
+
+// A Hash is a directory hash function.
+// It accepts a list of files along with a function that opens the content of each file.
+// It opens, reads, hashes, and closes each file and returns the overall directory hash.
+type Hash func(files []string, open func(string) (io.ReadCloser, error)) (string, error)
+
+// Hash1 is the "h1:" directory hash function, using SHA-256.
+//
+// Hash1 is "h1:" followed by the base64-encoded SHA-256 hash of a summary
+// prepared as if by the Unix command:
+//
+// find . -type f | sort | sha256sum
+//
+// More precisely, the hashed summary contains a single line for each file in the list,
+// ordered by sort.Strings applied to the file names, where each line consists of
+// the hexadecimal SHA-256 hash of the file content,
+// two spaces (U+0020), the file name, and a newline (U+000A).
+//
+// File names with newlines (U+000A) are disallowed.
+func Hash1(files []string, open func(string) (io.ReadCloser, error)) (string, error) {
+ h := sha256.New()
+ files = append([]string(nil), files...)
+ sort.Strings(files)
+ for _, file := range files {
+ if strings.Contains(file, "\n") {
+ return "", errors.New("dirhash: filenames with newlines are not supported")
+ }
+ r, err := open(file)
+ if err != nil {
+ return "", err
+ }
+ hf := sha256.New()
+ _, err = io.Copy(hf, r)
+ r.Close()
+ if err != nil {
+ return "", err
+ }
+ fmt.Fprintf(h, "%x %s\n", hf.Sum(nil), file)
+ }
+ return "h1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)), nil
+}
+
+// HashDir returns the hash of the local file system directory dir,
+// replacing the directory name itself with prefix in the file names
+// used in the hash function.
+func HashDir(dir, prefix string, hash Hash) (string, error) {
+ files, err := DirFiles(dir, prefix)
+ if err != nil {
+ return "", err
+ }
+ osOpen := func(name string) (io.ReadCloser, error) {
+ return os.Open(filepath.Join(dir, strings.TrimPrefix(name, prefix)))
+ }
+ return hash(files, osOpen)
+}
+
+// DirFiles returns the list of files in the tree rooted at dir,
+// replacing the directory name dir with prefix in each name.
+// The resulting names always use forward slashes.
+func DirFiles(dir, prefix string) ([]string, error) {
+ var files []string
+ dir = filepath.Clean(dir)
+ err := filepath.Walk(dir, func(file string, info os.FileInfo, err error) error {
+ if err != nil {
+ return err
+ }
+ if info.IsDir() {
+ return nil
+ }
+ rel := file
+ if dir != "." {
+ rel = file[len(dir)+1:]
+ }
+ f := filepath.Join(prefix, rel)
+ files = append(files, filepath.ToSlash(f))
+ return nil
+ })
+ if err != nil {
+ return nil, err
+ }
+ return files, nil
+}
+
+// HashZip returns the hash of the file content in the named zip file.
+// Only the file names and their contents are included in the hash:
+// the exact zip file format encoding, compression method,
+// per-file modification times, and other metadata are ignored.
+func HashZip(zipfile string, hash Hash) (string, error) {
+ z, err := zip.OpenReader(zipfile)
+ if err != nil {
+ return "", err
+ }
+ defer z.Close()
+ var files []string
+ zfiles := make(map[string]*zip.File)
+ for _, file := range z.File {
+ files = append(files, file.Name)
+ zfiles[file.Name] = file
+ }
+ zipOpen := func(name string) (io.ReadCloser, error) {
+ f := zfiles[name]
+ if f == nil {
+ return nil, fmt.Errorf("file %q not found in zip", name) // should never happen
+ }
+ return f.Open()
+ }
+ return hash(files, zipOpen)
+}
diff --git a/sumdb/dirhash/hash_test.go b/sumdb/dirhash/hash_test.go
new file mode 100644
index 0000000..ed463c1
--- /dev/null
+++ b/sumdb/dirhash/hash_test.go
@@ -0,0 +1,135 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package dirhash
+
+import (
+ "archive/zip"
+ "crypto/sha256"
+ "encoding/base64"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func h(s string) string {
+ return fmt.Sprintf("%x", sha256.Sum256([]byte(s)))
+}
+
+func htop(k string, s string) string {
+ sum := sha256.Sum256([]byte(s))
+ return k + ":" + base64.StdEncoding.EncodeToString(sum[:])
+}
+
+func TestHash1(t *testing.T) {
+ files := []string{"xyz", "abc"}
+ open := func(name string) (io.ReadCloser, error) {
+ return ioutil.NopCloser(strings.NewReader("data for " + name)), nil
+ }
+ want := htop("h1", fmt.Sprintf("%s %s\n%s %s\n", h("data for abc"), "abc", h("data for xyz"), "xyz"))
+ out, err := Hash1(files, open)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if out != want {
+ t.Errorf("Hash1(...) = %s, want %s", out, want)
+ }
+
+ _, err = Hash1([]string{"xyz", "a\nbc"}, open)
+ if err == nil {
+ t.Error("Hash1: expected error on newline in filenames")
+ }
+}
+
+func TestHashDir(t *testing.T) {
+ dir, err := ioutil.TempDir("", "dirhash-test-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+ if err := ioutil.WriteFile(filepath.Join(dir, "xyz"), []byte("data for xyz"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ if err := ioutil.WriteFile(filepath.Join(dir, "abc"), []byte("data for abc"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ want := htop("h1", fmt.Sprintf("%s %s\n%s %s\n", h("data for abc"), "prefix/abc", h("data for xyz"), "prefix/xyz"))
+ out, err := HashDir(dir, "prefix", Hash1)
+ if err != nil {
+ t.Fatalf("HashDir: %v", err)
+ }
+ if out != want {
+ t.Errorf("HashDir(...) = %s, want %s", out, want)
+ }
+}
+
+func TestHashZip(t *testing.T) {
+ f, err := ioutil.TempFile("", "dirhash-test-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.Remove(f.Name())
+ defer f.Close()
+
+ z := zip.NewWriter(f)
+ w, err := z.Create("prefix/xyz")
+ if err != nil {
+ t.Fatal(err)
+ }
+ w.Write([]byte("data for xyz"))
+ w, err = z.Create("prefix/abc")
+ if err != nil {
+ t.Fatal(err)
+ }
+ w.Write([]byte("data for abc"))
+ if err := z.Close(); err != nil {
+ t.Fatal(err)
+ }
+ if err := f.Close(); err != nil {
+ t.Fatal(err)
+ }
+
+ want := htop("h1", fmt.Sprintf("%s %s\n%s %s\n", h("data for abc"), "prefix/abc", h("data for xyz"), "prefix/xyz"))
+ out, err := HashZip(f.Name(), Hash1)
+ if err != nil {
+ t.Fatalf("HashDir: %v", err)
+ }
+ if out != want {
+ t.Errorf("HashDir(...) = %s, want %s", out, want)
+ }
+}
+
+func TestDirFiles(t *testing.T) {
+ dir, err := ioutil.TempDir("", "dirfiles-test-")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir)
+ if err := ioutil.WriteFile(filepath.Join(dir, "xyz"), []byte("data for xyz"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ if err := ioutil.WriteFile(filepath.Join(dir, "abc"), []byte("data for abc"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ if err := os.Mkdir(filepath.Join(dir, "subdir"), 0777); err != nil {
+ t.Fatal(err)
+ }
+ if err := ioutil.WriteFile(filepath.Join(dir, "subdir", "xyz"), []byte("data for subdir xyz"), 0666); err != nil {
+ t.Fatal(err)
+ }
+ prefix := "foo/b...@v2.3.4"
+ out, err := DirFiles(dir, prefix)
+ if err != nil {
+ t.Fatalf("DirFiles: %v", err)
+ }
+ for _, file := range out {
+ if !strings.HasPrefix(file, prefix) {
+ t.Errorf("Dir file = %s, want prefix %s", file, prefix)
+ }
+ }
+}

To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: mod
Gerrit-Branch: master
Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
Gerrit-Change-Number: 176464
Gerrit-PatchSet: 1
Gerrit-Owner: Russ Cox <r...@golang.org>
Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
Gerrit-Reviewer: Russ Cox <r...@golang.org>
Gerrit-MessageType: newchange

Gobot Gobot (Gerrit)

unread,
May 10, 2019, 11:01:19 AM5/10/19
to Russ Cox, goph...@pubsubhelper.golang.org, Hyang-Ah Hana Kim, golang-co...@googlegroups.com

TryBots beginning. Status page: https://farmer.golang.org/try?commit=abaa2951

View Change

    To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

    Gerrit-Project: mod
    Gerrit-Branch: master
    Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
    Gerrit-Change-Number: 176464
    Gerrit-PatchSet: 1
    Gerrit-Owner: Russ Cox <r...@golang.org>
    Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
    Gerrit-Reviewer: Russ Cox <r...@golang.org>
    Gerrit-CC: Gobot Gobot <go...@golang.org>
    Gerrit-Comment-Date: Fri, 10 May 2019 15:01:17 +0000
    Gerrit-HasComments: No
    Gerrit-Has-Labels: No
    Gerrit-MessageType: comment

    Gobot Gobot (Gerrit)

    unread,
    May 10, 2019, 11:04:22 AM5/10/19
    to Russ Cox, goph...@pubsubhelper.golang.org, Hyang-Ah Hana Kim, golang-co...@googlegroups.com

    TryBots are happy.

    Patch set 1:TryBot-Result +1

    View Change

      To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

      Gerrit-Project: mod
      Gerrit-Branch: master
      Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
      Gerrit-Change-Number: 176464
      Gerrit-PatchSet: 1
      Gerrit-Owner: Russ Cox <r...@golang.org>
      Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
      Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
      Gerrit-Reviewer: Russ Cox <r...@golang.org>
      Gerrit-Comment-Date: Fri, 10 May 2019 15:04:20 +0000
      Gerrit-HasComments: No
      Gerrit-Has-Labels: Yes
      Gerrit-MessageType: comment

      Hyang-Ah Hana Kim (Gerrit)

      unread,
      May 10, 2019, 1:53:35 PM5/10/19
      to Russ Cox, goph...@pubsubhelper.golang.org, Hyang-Ah Hana Kim, Gobot Gobot, golang-co...@googlegroups.com

      Patch set 1:Code-Review +2

      View Change

        To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

        Gerrit-Project: mod
        Gerrit-Branch: master
        Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
        Gerrit-Change-Number: 176464
        Gerrit-PatchSet: 1
        Gerrit-Owner: Russ Cox <r...@golang.org>
        Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
        Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
        Gerrit-Reviewer: Russ Cox <r...@golang.org>
        Gerrit-Comment-Date: Fri, 10 May 2019 17:53:31 +0000

        Russ Cox (Gerrit)

        unread,
        May 10, 2019, 3:02:26 PM5/10/19
        to Russ Cox, goph...@pubsubhelper.golang.org, Hyang-Ah Hana Kim, Gobot Gobot, golang-co...@googlegroups.com

        Uploaded patch set 2: Patch Set 1 was rebased.

        View Change

          To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

          Gerrit-Project: mod
          Gerrit-Branch: master
          Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
          Gerrit-Change-Number: 176464
          Gerrit-PatchSet: 2
          Gerrit-Owner: Russ Cox <r...@golang.org>
          Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
          Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
          Gerrit-Reviewer: Russ Cox <r...@golang.org>
          Gerrit-Comment-Date: Fri, 10 May 2019 19:02:20 +0000
          Gerrit-HasComments: No
          Gerrit-Has-Labels: No
          Gerrit-MessageType: comment

          Gobot Gobot (Gerrit)

          unread,
          May 10, 2019, 3:02:36 PM5/10/19
          to Russ Cox, goph...@pubsubhelper.golang.org, Hyang-Ah Hana Kim, golang-co...@googlegroups.com

          TryBots beginning. Status page: https://farmer.golang.org/try?commit=2e99a13b

          View Change

            To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

            Gerrit-Project: mod
            Gerrit-Branch: master
            Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
            Gerrit-Change-Number: 176464
            Gerrit-PatchSet: 2
            Gerrit-Owner: Russ Cox <r...@golang.org>
            Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
            Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
            Gerrit-Reviewer: Russ Cox <r...@golang.org>
            Gerrit-Comment-Date: Fri, 10 May 2019 19:02:33 +0000

            Gobot Gobot (Gerrit)

            unread,
            May 10, 2019, 3:05:03 PM5/10/19
            to Russ Cox, goph...@pubsubhelper.golang.org, Hyang-Ah Hana Kim, golang-co...@googlegroups.com

            TryBots are happy.

            Patch set 2:TryBot-Result +1

            View Change

              To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

              Gerrit-Project: mod
              Gerrit-Branch: master
              Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
              Gerrit-Change-Number: 176464
              Gerrit-PatchSet: 2
              Gerrit-Owner: Russ Cox <r...@golang.org>
              Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
              Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
              Gerrit-Reviewer: Russ Cox <r...@golang.org>
              Gerrit-Comment-Date: Fri, 10 May 2019 19:05:02 +0000

              Russ Cox (Gerrit)

              unread,
              May 13, 2019, 2:37:29 PM5/13/19
              to Russ Cox, goph...@pubsubhelper.golang.org, Gobot Gobot, Hyang-Ah Hana Kim, golang-co...@googlegroups.com

              Uploaded patch set 3: Patch Set 2 was rebased.

              View Change

                To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

                Gerrit-Project: mod
                Gerrit-Branch: master
                Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
                Gerrit-Change-Number: 176464
                Gerrit-PatchSet: 3
                Gerrit-Owner: Russ Cox <r...@golang.org>
                Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
                Gerrit-Reviewer: Russ Cox <r...@golang.org>
                Gerrit-Comment-Date: Mon, 13 May 2019 18:37:25 +0000

                Russ Cox (Gerrit)

                unread,
                May 13, 2019, 2:37:30 PM5/13/19
                to Russ Cox, goph...@pubsubhelper.golang.org, golang-...@googlegroups.com, Gobot Gobot, Hyang-Ah Hana Kim, golang-co...@googlegroups.com

                Russ Cox merged this change.

                View Change

                Approvals: Hyang-Ah Hana Kim: Looks good to me, approved
                sumdb/dirhash: directory tree hash algorithm

                Copied from cmd/go/internal/dirhash, with additional doc comments.

                For golang/go#31761.

                Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
                Reviewed-on: https://go-review.googlesource.com/c/mod/+/176464
                Reviewed-by: Hyang-Ah Hana Kim <hya...@gmail.com>
                Gerrit-PatchSet: 4
                Gerrit-Owner: Russ Cox <r...@golang.org>
                Gerrit-Reviewer: Gobot Gobot <go...@golang.org>
                Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
                Gerrit-Reviewer: Russ Cox <r...@golang.org>
                Gerrit-MessageType: merged

                Masoud Shokohi (Gerrit)

                unread,
                Apr 29, 2021, 10:07:57 PM4/29/21
                to Russ Cox, goph...@pubsubhelper.golang.org, Go Bot, Hyang-Ah Hana Kim, golang-co...@googlegroups.com

                View Change

                1 comment:

                • Patchset:

                To view, visit change 176464. To unsubscribe, or for help writing mail filters, visit settings.

                Gerrit-Project: mod
                Gerrit-Branch: master
                Gerrit-Change-Id: Id56c1bbb6f27e69194f383d49b9def6876f948be
                Gerrit-Change-Number: 176464
                Gerrit-PatchSet: 4
                Gerrit-Owner: Russ Cox <r...@golang.org>
                Gerrit-Reviewer: Go Bot <go...@golang.org>
                Gerrit-Reviewer: Hyang-Ah Hana Kim <hya...@gmail.com>
                Gerrit-Reviewer: Russ Cox <r...@golang.org>
                Gerrit-CC: Masoud Shokohi <mass.s...@gmail.com>
                Gerrit-Comment-Date: Fri, 30 Apr 2021 02:07:51 +0000
                Gerrit-HasComments: Yes
                Gerrit-Has-Labels: No
                Gerrit-MessageType: comment
                Reply all
                Reply to author
                Forward
                0 new messages