[pkgsite-metrics] cmd/jobs: support starting a job

2 views
Skip to first unread message

Jonathan Amsterdam (Gerrit)

unread,
May 26, 2023, 9:12:41 AM5/26/23
to Zvonimir Pavlinovic, Maceo Thompson, goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Maceo Thompson, Zvonimir Pavlinovic.

Jonathan Amsterdam would like Zvonimir Pavlinovic and Maceo Thompson to review this change.

View Change

cmd/jobs: support starting a job

Add a "start" subcommand that copies a binary to GCS and enqueues tasks.

It replaces the internal enqueue_analysis.sh script previously used.

It improves on that script by providing better error messages, and
optimizing the upload by skipping it if the binary is already there.

Change-Id: Ia692fab16c04f9fca84eb0dee5526fa5f120be5b
---
M cmd/jobs/main.go
1 file changed, 146 insertions(+), 3 deletions(-)

diff --git a/cmd/jobs/main.go b/cmd/jobs/main.go
index 638d1ab..5141643 100644
--- a/cmd/jobs/main.go
+++ b/cmd/jobs/main.go
@@ -6,7 +6,9 @@
package main

import (
+ "bytes"
"context"
+ "crypto/md5"
"encoding/json"
"errors"
"flag"
@@ -14,15 +16,23 @@
"io"
"net/http"
"os"
+ "path"
+ "path/filepath"
"reflect"
+ "strconv"
"text/tabwriter"
"time"

credsapi "cloud.google.com/go/iam/credentials/apiv1"
credspb "cloud.google.com/go/iam/credentials/apiv1/credentialspb"
+ "cloud.google.com/go/storage"
"golang.org/x/pkgsite-metrics/internal/jobs"
+ "google.golang.org/api/impersonate"
+ "google.golang.org/api/option"
)

+const projectID = "go-ecosystem"
+
var env = flag.String("env", "prod", "worker environment (dev or prod)")

var commands = []command{
@@ -30,10 +40,12 @@
"print an identity token", doPrintToken},
{"list", "",
"list jobs", doList},
- {"show", "jobID...",
+ {"show", "JOBID...",
"display information about jobs", doShow},
- {"cancel", "jobID...",
+ {"cancel", "JOBID...",
"cancel the jobs", doCancel},
+ {"start", "BINARY [MIN_IMPORTERS]",
+ "start a job", doStart},
}

type command struct {
@@ -155,6 +167,137 @@
return nil
}

+func doStart(ctx context.Context, args []string) error {
+ // Validate arguments.
+ if len(args) < 1 || len(args) > 2 {
+ return errors.New("wrong number of args: want BINARY [MIN_IMPORTERS]")
+ }
+ min := -1
+ if len(args) > 1 {
+ m, err := strconv.Atoi(args[1])
+ if err != nil {
+ return err
+ }
+ if m < 0 {
+ return errors.New("MIN_IMPORTERS cannot be negative")
+ }
+ min = m
+ }
+ binaryFile := args[0]
+ if fi, err := os.Stat(binaryFile); err != nil {
+ if errors.Is(err, os.ErrNotExist) {
+ return fmt.Errorf("%s does not exist", binaryFile)
+ }
+ return err
+ } else if fi.IsDir() {
+ return fmt.Errorf("%s is a directory, not a file", binaryFile)
+ }
+
+ // Copy binary to GCS if it's not already there.
+ if err := uploadAnalysisBinary(ctx, binaryFile); err != nil {
+ return err
+ }
+
+ // Ask the server to enqueue scan tasks.
+ idtoken, err := requestImpersonateIdentityToken(ctx)
+ if err != nil {
+ return err
+ }
+ url := fmt.Sprintf("%s/analysis/enqueue?binary=%s&user=%s", workerURL, filepath.Base(binaryFile), os.Getenv("USER"))
+ if min >= 0 {
+ url += fmt.Sprintf("&min=%d", min)
+ }
+ fmt.Println(url)
+ body, err := httpGet(ctx, url, idtoken)
+ if err != nil {
+ return err
+ }
+ fmt.Printf("%s\n", body)
+ return nil
+}
+
+// uploadAnalysisBinary copies binaryFile to the GCS location used for
+// analysis binaries.
+// As an optimization, it skips the upload if the file is already on GCS
+// and has the same checksum as the local file.
+func uploadAnalysisBinary(ctx context.Context, binaryFile string) error {
+ var upload bool
+ const bucketName = projectID
+ binaryName := filepath.Base(binaryFile)
+ objectName := path.Join("analysis-binaries", binaryName)
+
+ ts, err := impersonate.CredentialsTokenSource(ctx, impersonate.CredentialsConfig{
+ TargetPrincipal: fmt.Sprintf("impersonate@%s.iam.gserviceaccount.com", projectID),
+ Scopes: []string{"https://www.googleapis.com/auth/cloud-platform"},
+ })
+ if err != nil {
+ return err
+ }
+
+ c, err := storage.NewClient(ctx, option.WithTokenSource(ts))
+ if err != nil {
+ return err
+ }
+ defer c.Close()
+ bucket := c.Bucket(bucketName)
+ object := bucket.Object(objectName)
+ attrs, err := object.Attrs(ctx)
+ if errors.Is(err, storage.ErrObjectNotExist) {
+ fmt.Printf("%s does not exist, uploading\n", object.ObjectName())
+ upload = true
+ } else if err != nil {
+ return err
+ } else if g, w := len(attrs.MD5), md5.Size; g != w {
+ return fmt.Errorf("len(attrs.MD5) = %d, wanted %d", g, w)
+ } else {
+ localMD5, err := fileMD5(binaryFile)
+ if err != nil {
+ return err
+ }
+ upload = !bytes.Equal(localMD5, attrs.MD5)
+ if upload {
+ fmt.Printf("binary %sexists on GCS but hashes don't match; uploading\n", binaryName)
+ } else {
+ fmt.Printf("%s already on GCS with same checksum; not uploading\n", binaryFile)
+ }
+ }
+ if upload {
+ if err := copyToGCS(ctx, object, binaryFile); err != nil {
+ return err
+ }
+ fmt.Printf("copied %s to %s\n", binaryFile, object.ObjectName())
+ }
+ return nil
+}
+
+// fileMD5 computes the MD5 checksum of the given file.
+func fileMD5(filename string) ([]byte, error) {
+ f, err := os.Open(filename)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ hash := md5.New()
+ if _, err := io.Copy(hash, f); err != nil {
+ return nil, err
+ }
+ return hash.Sum(nil)[:], nil
+}
+
+// copyToLocalFile copies the filename to the GCS object.
+func copyToGCS(ctx context.Context, object *storage.ObjectHandle, filename string) error {
+ src, err := os.Open(filename)
+ if err != nil {
+ return err
+ }
+ defer src.Close()
+ dest := object.NewWriter(ctx)
+ if _, err := io.Copy(dest, src); err != nil {
+ return err
+ }
+ return dest.Close()
+}
+
// requestJSON requests the path from the worker, then reads the returned body
// and unmarshals it as JSON.
func requestJSON[T any](ctx context.Context, path, token string) (*T, error) {
@@ -201,7 +344,7 @@
return "", err
}
defer c.Close()
- serviceAccountEmail := "imper...@go-ecosystem.iam.gserviceaccount.com"
+ serviceAccountEmail := fmt.Sprintf("impersonate@%s.iam.gserviceaccount.com", projectID)
req := &credspb.GenerateIdTokenRequest{
Name: "projects/-/serviceAccounts/" + serviceAccountEmail,
Audience: workerURL,

To view, visit change 498556. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-MessageType: newchange
Gerrit-Project: pkgsite-metrics
Gerrit-Branch: master
Gerrit-Change-Id: Ia692fab16c04f9fca84eb0dee5526fa5f120be5b
Gerrit-Change-Number: 498556
Gerrit-PatchSet: 1
Gerrit-Owner: Jonathan Amsterdam <j...@google.com>
Gerrit-Reviewer: Jonathan Amsterdam <j...@google.com>
Gerrit-Reviewer: Maceo Thompson <maceot...@google.com>
Gerrit-Reviewer: Zvonimir Pavlinovic <zpavl...@google.com>
Gerrit-Attention: Zvonimir Pavlinovic <zpavl...@google.com>
Gerrit-Attention: Maceo Thompson <maceot...@google.com>

Jonathan Amsterdam (Gerrit)

unread,
May 27, 2023, 8:35:40 AM5/27/23
to goph...@pubsubhelper.golang.org, golang-co...@googlegroups.com

Attention is currently required from: Jonathan Amsterdam, Maceo Thompson, Zvonimir Pavlinovic.

Jonathan Amsterdam uploaded patch set #2 to this change.

View Change

The following approvals got outdated and were removed: Run-TryBot+1 by Jonathan Amsterdam, TryBot-Result+1 by Gopher Robot

cmd/jobs: support starting a job

Add a "start" subcommand that copies a binary to GCS and enqueues tasks.

It replaces the internal enqueue_analysis.sh script previously used.

It improves on that script by providing better error messages, and
optimizing the upload by skipping it if the binary is already there.

Change-Id: Ia692fab16c04f9fca84eb0dee5526fa5f120be5b
---
M cmd/jobs/main.go
1 file changed, 145 insertions(+), 3 deletions(-)

To view, visit change 498556. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-MessageType: newpatchset
Gerrit-Project: pkgsite-metrics
Gerrit-Branch: master
Gerrit-Change-Id: Ia692fab16c04f9fca84eb0dee5526fa5f120be5b
Gerrit-Change-Number: 498556
Gerrit-PatchSet: 2
Gerrit-Owner: Jonathan Amsterdam <j...@google.com>
Gerrit-Reviewer: Gopher Robot <go...@golang.org>
Gerrit-Reviewer: Jonathan Amsterdam <j...@google.com>
Gerrit-Reviewer: Maceo Thompson <maceot...@google.com>
Gerrit-Reviewer: Zvonimir Pavlinovic <zpavl...@google.com>
Gerrit-Attention: Zvonimir Pavlinovic <zpavl...@google.com>
Gerrit-Attention: Jonathan Amsterdam <j...@google.com>
Gerrit-Attention: Maceo Thompson <maceot...@google.com>

Zvonimir Pavlinovic (Gerrit)

unread,
May 28, 2023, 7:01:52 AM5/28/23
to Jonathan Amsterdam, goph...@pubsubhelper.golang.org, Gopher Robot, Maceo Thompson, golang-co...@googlegroups.com

Attention is currently required from: Jonathan Amsterdam, Maceo Thompson.

Patch set 2:Code-Review +2

View Change

    To view, visit change 498556. To unsubscribe, or for help writing mail filters, visit settings.

    Gerrit-MessageType: comment
    Gerrit-Project: pkgsite-metrics
    Gerrit-Branch: master
    Gerrit-Change-Id: Ia692fab16c04f9fca84eb0dee5526fa5f120be5b
    Gerrit-Change-Number: 498556
    Gerrit-PatchSet: 2
    Gerrit-Owner: Jonathan Amsterdam <j...@google.com>
    Gerrit-Reviewer: Gopher Robot <go...@golang.org>
    Gerrit-Reviewer: Jonathan Amsterdam <j...@google.com>
    Gerrit-Reviewer: Maceo Thompson <maceot...@google.com>
    Gerrit-Reviewer: Zvonimir Pavlinovic <zpavl...@google.com>
    Gerrit-Attention: Jonathan Amsterdam <j...@google.com>
    Gerrit-Attention: Maceo Thompson <maceot...@google.com>
    Gerrit-Comment-Date: Sun, 28 May 2023 11:01:48 +0000
    Gerrit-HasComments: No
    Gerrit-Has-Labels: Yes

    Jonathan Amsterdam (Gerrit)

    unread,
    May 30, 2023, 12:37:53 PM5/30/23
    to goph...@pubsubhelper.golang.org, golang-...@googlegroups.com, Zvonimir Pavlinovic, Gopher Robot, Maceo Thompson, golang-co...@googlegroups.com

    Jonathan Amsterdam submitted this change.

    View Change

    Approvals: Zvonimir Pavlinovic: Looks good to me, approved Gopher Robot: TryBots succeeded Jonathan Amsterdam: Run TryBots
    cmd/jobs: support starting a job

    Add a "start" subcommand that copies a binary to GCS and enqueues tasks.

    It replaces the internal enqueue_analysis.sh script previously used.

    It improves on that script by providing better error messages, and
    optimizing the upload by skipping it if the binary is already there.

    Change-Id: Ia692fab16c04f9fca84eb0dee5526fa5f120be5b
    Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/498556
    TryBot-Result: Gopher Robot <go...@golang.org>
    Run-TryBot: Jonathan Amsterdam <j...@google.com>
    Reviewed-by: Zvonimir Pavlinovic <zpavl...@google.com>

    ---
    M cmd/jobs/main.go
    1 file changed, 145 insertions(+), 3 deletions(-)

    
    
    diff --git a/cmd/jobs/main.go b/cmd/jobs/main.go
    index 638d1ab..b77660d 100644
    @@ -155,6 +167,136 @@
    +			fmt.Printf("binary %s exists on GCS but hashes don't match; uploading\n", binaryName)
    @@ -201,7 +343,7 @@

    return "", err
    }
    defer c.Close()
    - serviceAccountEmail := "imper...@go-ecosystem.iam.gserviceaccount.com"
    + serviceAccountEmail := fmt.Sprintf("impersonate@%s.iam.gserviceaccount.com", projectID)
    req := &credspb.GenerateIdTokenRequest{
    Name: "projects/-/serviceAccounts/" + serviceAccountEmail,
    Audience: workerURL,

    To view, visit change 498556. To unsubscribe, or for help writing mail filters, visit settings.

    Gerrit-MessageType: merged
    Gerrit-Project: pkgsite-metrics
    Gerrit-Branch: master
    Gerrit-Change-Id: Ia692fab16c04f9fca84eb0dee5526fa5f120be5b
    Gerrit-Change-Number: 498556
    Gerrit-PatchSet: 3
    Reply all
    Reply to author
    Forward
    0 new messages