When the ref is 40 hexadecimal bytes, compute the oid of the incoming
data and make sure it matches the ref. When it doesn't, for now, just
throw. This prevents the remote from being able to trick us,
accidentally or intentionally.
Signed-off-by: Rob Browning <
r...@defaultvalue.org>
Tested-by: Rob Browning <
r...@defaultvalue.org>
---
Proposed for main.
lib/bup/git.py | 6 +++++-
lib/bup/repo/remote.py | 23 ++++++++++++++++++++---
note/main.md | 5 +++++
3 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/lib/bup/git.py b/lib/bup/git.py
index d5665436..37d38f36 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -207,10 +207,14 @@ def demangle_name(name, mode):
return (name, BUP_NORMAL)
+def start_sha1(kind, size):
+ assert kind in (b'tree', b'commit', b'blob', b'tag'), kind
+ return Sha1(b'%s %d\0' % (kind, size))
+
def calc_hash(type, content):
"""Calculate some content's hash in the Git fashion."""
header = b'%s %d\0' % (type, len(content))
- sum = Sha1(header)
+ sum = start_sha1(type, len(content))
sum.update(content)
return sum.digest()
diff --git a/lib/bup/repo/remote.py b/lib/bup/repo/remote.py
index 9545eb23..70218d4b 100644
--- a/lib/bup/repo/remote.py
+++ b/lib/bup/repo/remote.py
@@ -1,8 +1,13 @@
-from bup import client
+from binascii import hexlify
+import re
+
+from bup import client, git
from bup.repo.base import _make_base, RepoProtocol
+_hex_rx = re.compile(br'[0-9a-fA-F]')
+
class RemoteRepo(RepoProtocol):
def __init__(self, address, create=False, compression_level=None,
max_pack_size=None, max_pack_objects=None):
@@ -58,11 +63,23 @@ class RemoteRepo(RepoProtocol):
# Yield all the data here so that we don't finish the
# cat_batch iterator (triggering its cleanup) until all of the
# data has been read. Otherwise we'd be out of sync with the
- # server.
+ # server. If the ref is 40 hex digits, then assume it's an
+ # oid, and verify that the data provided by the remote
+ # actually has that oid. If not, throw.
items = self.client.cat_batch((ref,))
oidx, typ, size, it = info = next(items)
yield info[:-1]
- if oidx: yield from it
+ if oidx:
+ if len(ref) != 40 or not _hex_rx.fullmatch(ref):
+ yield from it
+ else:
+ actual_oid = git.start_sha1(typ, size)
+ for data in it:
+ actual_oid.update(data)
+ yield data
+ actual_oid = actual_oid.digest()
+ if hexlify(actual_oid) != ref:
+ raise Exception(f'received {actual_oid.hex()}, expected oid {ref}')
assert not next(items, None)
def write_commit(self, tree, parent,
diff --git a/note/main.md b/note/main.md
index 797c3979..6ec8f0c1 100644
--- a/note/main.md
+++ b/note/main.md
@@ -119,6 +119,11 @@ General
example, replacing paths with missing contents with synthesized
"repair files". See `bup-get`(1) for additional information.
+* `bup` now verifies the hash of the incoming remote data when
+ requesting a specific hash (rather than an arbitrary "ref").
+ Combined with `--source-url`, this further decreases the trust
+ required in a remote.
+
* The default pack compression level can now be configured via either
`pack.compression` or `core.compression`. See `bup-config`(5) for
additional information.
--
2.47.3