Fix CatPipe.get / walk_object with include_data=False

1 view
Skip to first unread message

Rob Browning

unread,
Aug 18, 2025, 2:35:45 PMAug 18
to bup-...@googlegroups.com
When include_data=False, make sure we return an integer size from
get(), and always drain the blob iterator in walk_object.

Pushed to main.

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4

Rob Browning

unread,
Aug 18, 2025, 2:35:46 PMAug 18
to bup-...@googlegroups.com
Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/git.py | 2 +-
test/int/test_git.py | 15 +++++++++++----
2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/lib/bup/git.py b/lib/bup/git.py
index 2b13be06..3a3a2b49 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -1429,13 +1429,13 @@ class CatPipe:
if len(info) != 3 or len(info[0]) != 40:
raise GitError('expected object (id, type, size), got %r' % info)
oidx, typ, size = info
+ size = int(size)

if not include_data:
self.inprogress = None
yield oidx, typ, size
return

- size = int(size)
try:
it = chunkyreader(p.stdout, size)
yield oidx, typ, size
diff --git a/test/int/test_git.py b/test/int/test_git.py
index 83888621..1230ce16 100644
--- a/test/int/test_git.py
+++ b/test/int/test_git.py
@@ -403,11 +403,18 @@ def test_cat_pipe(tmpdir):
b'cat-file', b'-t', b'src').strip()
size = int(exo(b'git', b'--git-dir', bupdir,
b'cat-file', b'-s', b'src'))
+
it = git.cp().get(b'src')
- get_info = next(it)
- for buf in next(it):
- pass
- WVPASSEQ((oidx, typ, size), get_info)
+ assert (oidx, typ, size) == next(it)
+ data = b''.join(it)
+ assert data.startswith(b'tree ')
+ assert b'\nauthor ' in data
+ assert b'\ncommitter ' in data
+
+ it = git.cp().get(b'src', include_data=False)
+ assert (oidx, typ, size) == next(it)
+ assert b'' == b''.join(it)
+

def _create_idx(d, i):
idx = git.PackIdxV2Writer()
--
2.47.2

Rob Browning

unread,
Aug 18, 2025, 2:35:47 PMAug 18
to bup-...@googlegroups.com
Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/git.py | 1 -
1 file changed, 1 deletion(-)

diff --git a/lib/bup/git.py b/lib/bup/git.py
index 3a3a2b49..e9647f36 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -1437,7 +1437,6 @@ class CatPipe:
return

try:
- it = chunkyreader(p.stdout, size)
yield oidx, typ, size
for blob in chunkyreader(p.stdout, size):
yield blob
--
2.47.2

Rob Browning

unread,
Aug 18, 2025, 2:35:47 PMAug 18
to bup-...@googlegroups.com
When we don't have an expected type, we have to request the data, but
previously we wouldn't remember to drain the data iterator for blobs,
whose data we don't need to read otherwise.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/git.py | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/bup/git.py b/lib/bup/git.py
index e9647f36..be30abca 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -1539,10 +1539,9 @@ def walk_object(get_ref, oidx, *, stop_at=None, include_data=None,
yield [*parents, item] if result == 'path' else item
continue

- if exp_typ in (b'commit', b'tree', None): # must have the data
- item_it = get_ref(oidx, include_data=True)
- else:
- item_it = get_ref(oidx, include_data=include_data)
+ # must have data for commits, trees, or unknown
+ got_data = (exp_typ in (b'commit', b'tree', None)) or include_data
+ item_it = get_ref(oidx, include_data=got_data)
get_oidx, typ, _ = next(item_it)
if not get_oidx:
item = WalkItem(oid=unhexlify(oidx), type=exp_typ, name=name,
@@ -1556,6 +1555,8 @@ def walk_object(get_ref, oidx, *, stop_at=None, include_data=None,

# FIXME: set the mode based on the type when the mode is None
if typ == b'blob' and not include_data:
+ if got_data: # i.e. exp_typ was None
+ for _ in item_it: pass
data = None
else:
data = b''.join(item_it)
--
2.47.2

Rob Browning

unread,
Aug 18, 2025, 3:52:10 PMAug 18
to bup-...@googlegroups.com
Rob Browning <r...@defaultvalue.org> writes:

> When include_data=False, make sure we return an integer size from
> get(), and always drain the blob iterator in walk_object.
>
> Pushed to main.

Also cherry-picked the CatPipe.get fixes to 0.33.x.
Reply all
Reply to author
Forward
0 new messages