B2 configuration line build s3 config

1 view
Skip to first unread message

nor...@perkeep.org

unread,
Jun 17, 2022, 10:35:51 AM6/17/22
to camlistor...@googlegroups.com


https://github.com/perkeep/perkeep/commit/eed05eef5baa0e26549c682aa374536804c9bdbf

commit eed05eef5baa0e26549c682aa374536804c9bdbf
Author: kalidor <kal...@unixed.fr>
Date: Sat Mar 19 20:38:00 2022 +0100

B2 configuration line build s3 config

diff --git a/doc/server-config.md b/doc/server-config.md
index c00b688..3d81c26 100644
--- a/doc/server-config.md
+++ b/doc/server-config.md
@@ -94,7 +94,7 @@ At least one of these must be set:
* `blobPath`: local disk path to store blobs. (valid for diskpacked too).
* `s3`: "`key:secret:bucket[/optional/dir]`" or
"`key:secret:bucket[/optional/dir]:hostname`" (with colons, but no quotes).
-* `b2`: "`account_id:application_key:bucket[/optional/dir]`".
+* `b2`: "`account_id:application_key:bucket[/optional/dir]:endpoint`"
* `googlecloudstorage`: "`clientId:clientSecret:refreshToken:bucketName[/optional/dir]`"

The `s3` storage option's `hostname` value may be set to use an S3-compatible
@@ -102,8 +102,6 @@ endpoint instead of AWS S3, such as `my-minio-server.example.com`. A specific
region may be specified by using [Low-level Configuration](#lowlevel), though
the bucket's region will generally be detected automatically.

-The `s3` storage option can be used for `b2` since it's possible to use AWS go SDK [backblaze article](https://help.backblaze.com/hc/en-us/articles/360047629713-Using-the-AWS-Go-SDK-with-B2). However since it requires the region to be provided, using [Low-level Configuration](#lowlevel) is preferred.
-
Additionally, there are two mutually exclusive options:

* `packRelated`: if true, blobs are automatically repacked for fast read access.
diff --git a/doc/storage-examples.md b/doc/storage-examples.md
index 181876e..369ed74 100644
--- a/doc/storage-examples.md
+++ b/doc/storage-examples.md
@@ -68,12 +68,12 @@ It is advisable to create a dedicated Application Key for Perkeep:

Finally, add the b2 config line to your perkeep `server-config.json`:
```
-"b2": "keyID:applicationKey:bucket"
+"b2": "keyID:applicationKey:bucket:endpoint"
```

-It is also possible to use `s3` configuration to interact with `b2` API.
+- `endpoint` can be found on the backblaze buckets' interface https://secure.backblaze.com/b2_buckets.htm.

-This is an example of low-level configuration file:
+Using low-level configuration:

```json
"/bs2/": {
diff --git a/go.mod b/go.mod
index 8e10f9e..e5c5c81 100644
--- a/go.mod
+++ b/go.mod
@@ -9,7 +9,6 @@ require (
cloud.google.com/go/logging v1.3.0
cloud.google.com/go/storage v1.10.0
filippo.io/age v1.0.0
- github.com/FiloSottile/b2 v0.0.0-20170207175032-b197f7a2c317 // indirect
github.com/aws/aws-sdk-go v1.14.31
github.com/bradfitz/latlong v0.0.0-20170410180902-f3db6d0dff40
github.com/facebookgo/ensure v0.0.0-20160127193407-b4ab57deab51 // indirect
@@ -32,7 +31,6 @@ require (
github.com/mattn/go-mastodon v0.0.5-0.20190517015615-8f6192e26b66
github.com/miekg/dns v1.1.43
github.com/nf/cr2 v0.0.0-20140528043846-05d46fef4f2f
- github.com/perkeep/b2 v0.0.0-20180913003434-1ae8d9b78db9
github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/sftp v1.13.2
github.com/plaid/plaid-go v0.0.0-20161222051224-02b6af68061b
diff --git a/pkg/blobserver/b2/b2.go b/pkg/blobserver/b2/b2.go
deleted file mode 100644
index e8767f3..0000000
--- a/pkg/blobserver/b2/b2.go
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
-Copyright 2016 The Perkeep Authors
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package b2
-
-import (
- "bytes"
- "context"
- "errors"
- "fmt"
- "io"
- "net/http"
- "os"
- "path"
- "strings"
-
- "perkeep.org/pkg/blob"
- "perkeep.org/pkg/blobserver"
- "perkeep.org/pkg/blobserver/memory"
- "perkeep.org/pkg/constants"
-
- "github.com/perkeep/b2"
- "go4.org/jsonconfig"
- "go4.org/syncutil"
-)
-
-type Storage struct {
- cl *b2.Client
- b *b2.BucketInfo
- // optional "directory" where the blobs are stored, instead of at the root of the bucket.
- // b2 is actually flat, which in effect just means that all the objects should have this
- // dirPrefix as a prefix of their key.
- // If non empty, it should be a slash separated path with a trailing slash and no starting
- // slash.
- dirPrefix string
- cache *memory.Storage // or nil for no cache
-}
-
-func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (blobserver.Storage, error) {
- var (
- auth = config.RequiredObject("auth")
- bucket = config.RequiredString("bucket")
- cacheSize = config.OptionalInt64("cacheSize", 32<<20)
-
- accountID = auth.RequiredString("account_id")
- appKey = auth.RequiredString("application_key")
- )
-
- if err := config.Validate(); err != nil {
- return nil, err
- }
- if err := auth.Validate(); err != nil {
- return nil, err
- }
-
- var dirPrefix string
- if parts := strings.SplitN(bucket, "/", 2); len(parts) > 1 {
- dirPrefix = parts[1]
- bucket = parts[0]
- }
- if dirPrefix != "" && !strings.HasSuffix(dirPrefix, "/") {
- dirPrefix += "/"
- }
-
- t := http.DefaultTransport.(*http.Transport).Clone()
- t.MaxIdleConnsPerHost = 50 // we do delete bursts
- httpClient := &http.Client{Transport: t}
- cl, err := b2.NewClient(accountID, appKey, httpClient)
- if err != nil {
- return nil, err
- }
- b, err := cl.BucketByName(bucket, true)
- if err != nil {
- return nil, err
- }
-
- s := &Storage{
- cl: cl, b: b,
- dirPrefix: dirPrefix,
- }
-
- if cacheSize != 0 {
- s.cache = memory.NewCache(cacheSize)
- }
-
- return s, nil
-}
-
-func (s *Storage) EnumerateBlobs(ctx context.Context, dest chan<- blob.SizedRef, after string, limit int) error {
- defer close(dest)
- l := s.b.ListFiles(s.dirPrefix + after)
- l.SetPageCount(limit)
- for i := 0; i < limit && l.Next(); i++ {
- fi := l.FileInfo()
- dir, file := path.Split(fi.Name)
- if dir != s.dirPrefix {
- continue
- }
- if file == after {
- i--
- continue // ListFiles starting point is *included*
- }
- br, ok := blob.Parse(file)
- if !ok {
- return fmt.Errorf("b2: non-Perkeep object named %q found in bucket", file)
- }
- select {
- case dest <- blob.SizedRef{Ref: br, Size: uint32(fi.ContentLength)}:
- case <-ctx.Done():
- return ctx.Err()
- }
- }
- return l.Err()
-}
-
-func (s *Storage) ReceiveBlob(ctx context.Context, br blob.Ref, source io.Reader) (blob.SizedRef, error) {
- // TODO: pass ctx to b2 library, once github.com/FiloSottile/b2 supports it.
- var buf bytes.Buffer
- size, err := io.Copy(&buf, source)
- if err != nil {
- return blob.SizedRef{}, err
- }
-
- b := bytes.NewReader(buf.Bytes())
- fi, err := s.b.Upload(b, s.dirPrefix+br.String(), "")
- if err != nil {
- return blob.SizedRef{}, err
- }
-
- if int64(fi.ContentLength) != size {
- return blob.SizedRef{}, fmt.Errorf("b2: expected ContentLength %d, got %d", size, fi.ContentLength)
- }
- if br.HashName() == "sha1" && fi.ContentSHA1 != br.Digest() {
- return blob.SizedRef{}, fmt.Errorf("b2: expected ContentSHA1 %s, got %s", br.Digest(), fi.ContentSHA1)
- }
-
- if s.cache != nil {
- // NoHash because it's already verified if we read it without
- // errors from the source, and uploaded it without mismatch.
- blobserver.ReceiveNoHash(ctx, s.cache, br, &buf)
- }
- return blob.SizedRef{Ref: br, Size: uint32(size)}, nil
-}
-
-func (s *Storage) StatBlobs(ctx context.Context, blobs []blob.Ref, fn func(blob.SizedRef) error) error {
- // TODO: use cache
- gate := syncutil.NewGate(5) // arbitrary cap
- return blobserver.StatBlobsParallelHelper(ctx, blobs, fn, gate, func(br blob.Ref) (sb blob.SizedRef, err error) {
- fi, err := s.b.GetFileInfoByName(s.dirPrefix + br.String())
- if err == b2.FileNotFoundError {
- return sb, nil
- }
- if err != nil {
- return sb, err
- }
- if br.HashName() == "sha1" && fi.ContentSHA1 != br.Digest() {
- return sb, errors.New("b2: remote ContentSHA1 mismatch")
- }
- size := fi.ContentLength
- if size > constants.MaxBlobSize {
- return sb, fmt.Errorf("blob %s stat size too large (%d)", br, size)
- }
- return blob.SizedRef{Ref: br, Size: uint32(size)}, nil
- })
-}
-
-func (s *Storage) Fetch(ctx context.Context, br blob.Ref) (rc io.ReadCloser, size uint32, err error) {
- if s.cache != nil {
- if rc, size, err = s.cache.Fetch(ctx, br); err == nil {
- return
- }
- }
- // TODO: pass ctx to b2 library, once github.com/FiloSottile/b2 supports it.
- r, fi, err := s.cl.DownloadFileByName(s.b.Name, s.dirPrefix+br.String())
- if err, ok := b2.UnwrapError(err); ok && err.Status == 404 {
- return nil, 0, os.ErrNotExist
- }
- if err != nil {
- return nil, 0, err
- }
-
- if br.HashName() == "sha1" && fi.ContentSHA1 != br.Digest() {
- return nil, 0, errors.New("b2: remote ContentSHA1 mismatch")
- }
-
- if int64(fi.ContentLength) >= int64(1<<32) {
- r.Close()
- return nil, 0, errors.New("object larger than a uint32")
- }
- size = uint32(fi.ContentLength)
- if size > constants.MaxBlobSize {
- r.Close()
- return nil, size, errors.New("object too big")
- }
- return r, size, nil
-}
-
-func (s *Storage) RemoveBlobs(ctx context.Context, blobs []blob.Ref) error {
- if s.cache != nil {
- s.cache.RemoveBlobs(ctx, blobs)
- }
- gate := syncutil.NewGate(5) // arbitrary
- var grp syncutil.Group
- for i := range blobs {
- gate.Start()
- br := blobs[i]
- grp.Go(func() error {
- defer gate.Done()
- fi, err := s.b.GetFileInfoByName(s.dirPrefix + br.String())
- if err == b2.FileNotFoundError {
- return nil
- }
- if err != nil {
- return err
- }
- if br.HashName() == "sha1" && fi.ContentSHA1 != br.Digest() {
- return errors.New("b2: remote ContentSHA1 mismatch")
- }
- return s.cl.DeleteFile(fi.ID, fi.Name)
- })
- }
- return grp.Err()
-}
-
-func init() {
- blobserver.RegisterStorageConstructor("b2", blobserver.StorageConstructor(newFromConfig))
-}
diff --git a/pkg/blobserver/b2/b2_test.go b/pkg/blobserver/b2/b2_test.go
deleted file mode 100644
index bdb1361..0000000
--- a/pkg/blobserver/b2/b2_test.go
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
-Copyright 2016 The Perkeep Authors
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-package b2
-
-import (
- "context"
- "flag"
- "fmt"
- "log"
- "math/rand"
- "path"
- "strings"
- "testing"
- "time"
-
- "perkeep.org/pkg/blob"
- "perkeep.org/pkg/blobserver"
- "perkeep.org/pkg/blobserver/storagetest"
-
- "go4.org/jsonconfig"
-)
-
-var (
- accountID = flag.String("account-id", "", "B2 Account ID for testing")
- appKey = flag.String("application-key", "", "B2 Application Key for testing")
-)
-
-func TestStorage(t *testing.T) {
- testStorage(t, "")
-}
-
-func TestStorageWithBucketDir(t *testing.T) {
- testStorage(t, "/bl/obs/")
-}
-
-func testStorage(t *testing.T, bucketDir string) {
- ctx := context.Background()
- if *accountID == "" && *appKey == "" {
- t.Skip("Skipping test without --account-id or --application-key flag")
- }
-
- rn := rand.New(rand.NewSource(time.Now().UnixNano())).Intn(1000000)
- bucket := fmt.Sprintf("camli-test-%d", rn)
- bucketWithDir := path.Join(bucket, bucketDir)
-
- storagetest.TestOpt(t, storagetest.Opts{
- New: func(t *testing.T) (sto blobserver.Storage, cleanup func()) {
- sto, err := newFromConfig(nil, jsonconfig.Obj{
- "bucket": bucketWithDir,
- "auth": map[string]interface{}{
- "account_id": *accountID,
- "application_key": *appKey,
- },
- })
- if err != nil {
- t.Fatal(err)
- }
- if !testing.Short() {
- log.Printf("Warning: this test does many serial operations. Without the go test -short flag, this test will be very slow.")
- }
- if bucketWithDir != bucket {
- // Adding "a", and "c" objects in the bucket to make sure objects out of the
- // "directory" are not touched and have no influence.
- for _, key := range []string{"a", "c"} {
- if _, err := sto.(*Storage).b.Upload(strings.NewReader(key), key, ""); err != nil {
- t.Fatalf("could not insert object %s in bucket %v: %v", key, sto.(*Storage).b.Name, err)
- }
- }
- }
-
- clearBucket := func(beforeTests bool) func() {
- return func() {
- var all []blob.Ref
- blobserver.EnumerateAll(ctx, sto, func(sb blob.SizedRef) error {
- t.Logf("Deleting: %v", sb.Ref)
- all = append(all, sb.Ref)
- return nil
- })
- if err := sto.RemoveBlobs(ctx, all); err != nil {
- t.Fatalf("Error removing blobs during cleanup: %v", err)
- }
- if beforeTests {
- return
- }
- if bucketWithDir != bucket {
- // checking that "a" and "c" at the root were left untouched.
- for _, key := range []string{"a", "c"} {
- fi, err := sto.(*Storage).b.GetFileInfoByName(key)
- if err != nil {
- t.Fatalf("could not remove object %s after tests: %v", key, err)
- }
- if err := sto.(*Storage).cl.DeleteFile(fi.ID, fi.Name); err != nil {
- t.Fatalf("could not remove object %s after tests: %v", key, err)
- }
-
- }
- }
- if err := sto.(*Storage).b.Delete(); err != nil {
- t.Fatalf("could not remove5D bucket %s after tests: %v", sto.(*Storage).b.Name, err)
- }
- }
- }
- clearBucket(true)()
- return sto, clearBucket(false)
- },
- })
-}
diff --git a/pkg/blobserver/s3/s3.go b/pkg/blobserver/s3/s3.go
index b0da9c8..5fc1276 100644
--- a/pkg/blobserver/s3/s3.go
+++ b/pkg/blobserver/s3/s3.go
@@ -168,6 +168,7 @@ func newFromConfigWithTransport(_ blobserver.Loader, config jsonconfig.Obj, tran

func init() {
blobserver.RegisterStorageConstructor("s3", blobserver.StorageConstructor(newFromConfig))
+ blobserver.RegisterStorageConstructor("b12", blobserver.StorageConstructor(newFromConfig))
}

// isNotFound checks for s3 errors which indicate the object doesn't exist.
diff --git a/pkg/blobserver/s3/s3_preflight.go b/pkg/blobserver/s3/s3_preflight.go
index af5a7ca..298d4f3 100644
--- a/pkg/blobserver/s3/s3_preflight.go
+++ b/pkg/blobserver/s3/s3_preflight.go
@@ -77,7 +77,7 @@ func normalizeBucketLocation(ctx context.Context, cfg client.ConfigProvider, end
if err != nil {
return bucketInfo{}, err
}
- // if isAWS is false, this is b2 related, region should be provided
+ // if isAWS is false, this is b2 related
if !isAWS {
return bucketInfo{
endpoint: endpoint,
diff --git a/pkg/serverinit/genconfig.go b/pkg/serverinit/genconfig.go
index 54a28d1..811092e 100644
--- a/pkg/serverinit/genconfig.go
+++ b/pkg/serverinit/genconfig.go
@@ -619,76 +619,7 @@ func (b *lowBuilder) addS3Config(s3 string) error {
}

func (b *lowBuilder) addB2Config(b2 string) error {
- f := strings.SplitN(b2, ":", 3)
- if len(f) < 3 {
- return errors.New(`genconfig: expected "b2" field to be of form "account_id:application_key:bucket[/optional/dir]"`)
- }
- account, key, bucket := f[0], f[1], f[2]
- isReplica := b.hasPrefix("/bs/")
- b2Prefix := "/bs/"
- if isReplica {
- b2Prefix = "/sto-b2/"
- }
-
- b2Args := func(bucket string) args {
- a := args{
- "bucket": bucket,
- "auth": map[string]interface{}{
- "account_id": account,
- "application_key": key,
- },
- }
- return a
- }
-
- if !b.high.PackRelated {
- b.addPrefix(b2Prefix, "storage-b2", b2Args(bucket))
- } else {
- bsLoose := "/bs-loose/"
- bsPacked := "/bs-packed/"
- if isReplica {
- bsLoose = "/sto-b2-bs-loose/"
- bsPacked = "/sto-b2-bs-packed/"
- }
-
- b.addPrefix(bsLoose, "storage-b2", b2Args(path.Join(bucket, "loose")))
- b.addPrefix(bsPacked, "storage-b2", b2Args(path.Join(bucket, "packed")))
-
- // If index is DBMS, then blobPackedIndex is in DBMS too.
- // Otherwise blobPackedIndex is same file-based DB as the index,
- // in same dir, but named packindex.dbtype.
- blobPackedIndex, err := b.sortedStorageAt(dbBlobpackedIndex, filepath.Join(b.indexFileDir(), "packindex"))
- if err != nil {
- return err
- }
- b.addPrefix(b2Prefix, "storage-blobpacked", args{
- "smallBlobs": "/bs-loose/",
- "largeBlobs": "/bs-packed/",
- "metaIndex": blobPackedIndex,
- })
- }
-
- if isReplica {
- if b.high.BlobPath == "" && !b.high.MemoryStorage {
- panic("unexpected empty blobpath with sync-to-b2")
- }
- b.addPrefix("/sync-to-b2/", "sync", args{
- "from": "/bs/",
- "to": b2Prefix,
- "queue": b.thatQueueUnlessMemory(
- map[string]interface{}{
- "type": b.kvFileType(),
- "file": filepath.Join(b.high.BlobPath, "sync-to-b2-queue."+b.kvFileType()),
- }),
- })
- return nil
- }
-
- b.addPrefix("/cache/", "storage-filesystem", args{
- "path": filepath.Join(tempDir(), "camli-cache"),
- })
-
- return nil
+ return b.addS3Config(b2)
}

func (b *lowBuilder) addGoogleDriveConfig(v string) error {
diff --git a/server/perkeepd/perkeepd.go b/server/perkeepd/perkeepd.go
index 246fa72..5350074 100644
--- a/server/perkeepd/perkeepd.go
+++ b/server/perkeepd/perkeepd.go
@@ -50,7 +50,6 @@ import (

// Storage options:
_ "perkeep.org/pkg/blobserver/azure"
- _ "perkeep.org/pkg/blobserver/b2"
"perkeep.org/pkg/blobserver/blobpacked"
_ "perkeep.org/pkg/blobserver/cond"
_ "perkeep.org/pkg/blobserver/diskpacked"
Reply all
Reply to author
Forward
0 new messages