[PATCH 1/1] prune-older: handle duplicate save names

1 view
Skip to first unread message

Rob Browning

unread,
Jul 19, 2025, 6:09:39 PMJul 19
to bup-...@googlegroups.com
Don't fail with "error: cannot access SAVE in SAVE" when a branch has
duplicate save names. Provide vfs.save_names_for_commit_utcs() to
provide the correct save names for a series of commit times and rely
on it in prune older. Ensure the prune-older tests cover this case.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---

Proposed for main and 0.33.x.

lib/bup/cmd/prune_older.py | 28 +++++++++++++++-------------
lib/bup/vfs.py | 11 ++++++-----
note/main.md | 5 +++++
test/ext/test_prune_older.py | 12 ++++++++----
4 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/lib/bup/cmd/prune_older.py b/lib/bup/cmd/prune_older.py
index 2eef948f..f439f293 100644
--- a/lib/bup/cmd/prune_older.py
+++ b/lib/bup/cmd/prune_older.py
@@ -12,6 +12,7 @@ from bup.helpers import die_if_errors, log, partition, period_as_secs
from bup.io import byte_stream
from bup.repo import LocalRepo
from bup.rm import bup_rm
+from bup.vfs import save_names_for_commit_utcs


def branches(refnames=tuple()):
@@ -19,14 +20,11 @@ def branches(refnames=tuple()):
in git.list_refs(patterns=(b'refs/heads/' + n for n in refnames),
limit_to_heads=True))

-def save_name(branch, utc):
- return branch + b'/' \
- + strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
-
def classify_saves(saves, period_start):
- """For each (utc, id) in saves, yield (True, (utc, id)) if the save
- should be kept and (False, (utc, id)) if the save should be removed.
- The ids are binary hashes.
+ """For each (utc, ...) in saves, yield (True, (utc, ...)) if the
+ save should be kept and (False, (utc, ...)) if the save should be
+ removed.
+
"""

def retain_newest_in_region(region):
@@ -139,16 +137,20 @@ def main(argv):
removals = []
for branch, branch_id in branches(roots):
die_if_errors()
- saves = ((utc, unhexlify(oidx)) for (oidx, utc) in
- git.rev_list(branch_id, format=b'%at', parse=parse_info))
- for keep_save, (utc, id) in classify_saves(saves, period_start):
+ # At the moment, oids are irrelevant; the save name is crucial
+ revs = list(git.rev_list(branch_id, format=b'%at', parse=parse_info))
+ saves = ((utc, unhexlify(oidx), save_name) \
+ for ((oidx, utc), save_name) \
+ in zip(revs, save_names_for_commit_utcs(x[1] for x in revs)))
+ for keep_save, (utc, oid_, save_name) \
+ in classify_saves(saves, period_start):
assert(keep_save in (False, True))
# FIXME: base removals on hashes
if opt.pretend:
- out.write((b'+ ' if keep_save else b'- ')
- + save_name(branch, utc) + b'\n')
+ out.write(b'%s %s/%s\n' % (b'+ ' if keep_save else b'- ',
+ branch, save_name))
elif not keep_save:
- removals.append(save_name(branch, utc))
+ removals.append(b'%s/%s' % (branch, save_name))

if not opt.pretend:
die_if_errors()
diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py
index 10aa0806..b6a3b587 100644
--- a/lib/bup/vfs.py
+++ b/lib/bup/vfs.py
@@ -722,16 +722,17 @@ def _reverse_suffix_duplicates(strs):
seen[name][0] -= 1
del seen

+def save_names_for_commit_utcs(utcs):
+ names = (strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
+ for utc in utcs)
+ return _reverse_suffix_duplicates(names)
+
def parse_rev(f):
items = f.readline().split(None)
assert len(items) == 2
tree, auth_sec = items
return unhexlify(tree), int(auth_sec)

-def _name_for_rev(rev):
- commit_oidx, (tree_oid, utc) = rev
- return strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
-
def _item_for_rev(rev):
commit_oidx, (tree_oid, utc) = rev
coid = unhexlify(commit_oidx)
@@ -757,7 +758,7 @@ def cache_commit(repo, oid, require_meta=True):
parse=parse_rev)
rev_items, rev_names = tee(revs)
revs = None # Don't disturb the tees
- rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
+ rev_names = save_names_for_commit_utcs(x[1][1] for x in rev_names)
rev_items = (_item_for_rev(x) for x in rev_items)
tip = None
for name, item in zip(rev_names, rev_items):
diff --git a/note/main.md b/note/main.md
index abf02b5d..7f85cdf0 100644
--- a/note/main.md
+++ b/note/main.md
@@ -162,6 +162,11 @@ Bugs
when they were directly related. Now it appends across all
duplicates.

+* `bup prune-older` should no longer be confused by duplicate save
+ names, i.e. commits with duplicate commit times (author
+ dates). Previously it would fail with a message like "error: cannot
+ access SAVE in SAVE".
+
* When run on an existing repository, `bup init` will no longer change
existing `core.logAllRefUpdates` settings.

diff --git a/test/ext/test_prune_older.py b/test/ext/test_prune_older.py
index 39f5c1ad..06a46b32 100644
--- a/test/ext/test_prune_older.py
+++ b/test/ext/test_prune_older.py
@@ -23,13 +23,17 @@ def create_older_random_saves(n, start_utc, end_utc):
with open(b'foo', 'wb') as f:
pass
ex([b'git', b'add', b'foo'])
- utcs = set()
+ utcs = []
while len(utcs) != n:
- utcs.add(randint(start_utc, end_utc))
+ utc = randint(start_utc, end_utc)
+ utcs.append(utc)
+ if n > 1: # ensure we have some duplicates
+ for i in range(min(10, max(1, n // 3))):
+ utcs[i] = utcs[-i]
utcs = sorted(utcs)
- for utc in utcs:
+ for i, utc in enumerate(utcs):
with open(b'foo', 'wb') as f:
- f.write(b'%d\n' % utc)
+ f.write(b'%d\n' % i)
ex([b'git', b'commit', b'--date', b'%d' % utc, b'-qam', b'%d' % utc])
ex([b'git', b'gc', b'--aggressive'])
return utcs
--
2.47.2

Rob Browning

unread,
Jul 20, 2025, 3:48:30 PMJul 20
to bup-...@googlegroups.com
Rob Browning <r...@defaultvalue.org> writes:

> Don't fail with "error: cannot access SAVE in SAVE" when a branch has
> duplicate save names. Provide vfs.save_names_for_commit_utcs() to
> provide the correct save names for a series of commit times and rely
> on it in prune older. Ensure the prune-older tests cover this case.
>
> Signed-off-by: Rob Browning <r...@defaultvalue.org>
> Tested-by: Rob Browning <r...@defaultvalue.org>
> ---
>
> Proposed for main and 0.33.x.

Pushed to main.

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4

Rob Browning

unread,
Jul 20, 2025, 5:11:43 PMJul 20
to bup-...@googlegroups.com
Don't fail with "error: cannot access SAVE in SAVE" when a branch has
duplicate save names. Provide vfs.save_names_for_commit_utcs() to
provide the correct save names for a series of commit times and rely
on it in prune older. Ensure the prune-older tests cover this case.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
(cherry picked from commit bb9e7e3bce755b882735ae5ea762d2fbd0a0c837)
---
lib/bup/cmd/prune_older.py | 28 +++++++++++++++-------------
lib/bup/vfs.py | 11 ++++++-----
note/0.33.x.md | 5 +++++
test/ext/test_prune_older.py | 12 ++++++++----
4 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/lib/bup/cmd/prune_older.py b/lib/bup/cmd/prune_older.py
index 900abd9a..b7962190 100644
--- a/lib/bup/cmd/prune_older.py
+++ b/lib/bup/cmd/prune_older.py
@@ -13,6 +13,7 @@ from bup.helpers import die_if_errors, log, partition, period_as_secs
from bup.io import byte_stream
from bup.repo import LocalRepo
from bup.rm import bup_rm
+from bup.vfs import save_names_for_commit_utcs


def branches(refnames=tuple()):
@@ -20,14 +21,11 @@ def branches(refnames=tuple()):
in git.list_refs(patterns=(b'refs/heads/' + n for n in refnames),
limit_to_heads=True))

-def save_name(branch, utc):
- return branch + b'/' \
- + strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
-
def classify_saves(saves, period_start):
- """For each (utc, id) in saves, yield (True, (utc, id)) if the save
- should be kept and (False, (utc, id)) if the save should be removed.
- The ids are binary hashes.
+ """For each (utc, ...) in saves, yield (True, (utc, ...)) if the
+ save should be kept and (False, (utc, ...)) if the save should be
+ removed.
+
"""

def retain_newest_in_region(region):
@@ -140,16 +138,20 @@ def main(argv):
index 781733f2..8c74fe42 100644
--- a/lib/bup/vfs.py
+++ b/lib/bup/vfs.py
@@ -750,16 +750,17 @@ def _reverse_suffix_duplicates(strs):
seen[name][0] -= 1
del seen

+def save_names_for_commit_utcs(utcs):
+ names = (strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
+ for utc in utcs)
+ return _reverse_suffix_duplicates(names)
+
def parse_rev(f):
items = f.readline().split(None)
assert len(items) == 2
tree, auth_sec = items
return unhexlify(tree), int(auth_sec)

-def _name_for_rev(rev):
- commit_oidx, (tree_oid, utc) = rev
- return strftime('%Y-%m-%d-%H%M%S', localtime(utc)).encode('ascii')
-
def _item_for_rev(rev):
commit_oidx, (tree_oid, utc) = rev
coid = unhexlify(commit_oidx)
@@ -785,7 +786,7 @@ def cache_commit(repo, oid, require_meta=True):
parse=parse_rev)
rev_items, rev_names = tee(revs)
revs = None # Don't disturb the tees
- rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
+ rev_names = save_names_for_commit_utcs(x[1][1] for x in rev_names)
rev_items = (_item_for_rev(x) for x in rev_items)
tip = None
for name, item in zip(rev_names, rev_items):
diff --git a/note/0.33.x.md b/note/0.33.x.md
index b11245b6..f9334c0f 100644
--- a/note/0.33.x.md
+++ b/note/0.33.x.md
@@ -32,6 +32,11 @@ Bugs
when they were directly related. Now it appends across all
duplicates.

+* `bup prune-older` should no longer be confused by duplicate save
+ names, i.e. commits with duplicate commit times (author
+ dates). Previously it would fail with a message like "error: cannot
+ access SAVE in SAVE".
+
Thanks to (at least)
====================

diff --git a/test/ext/test_prune_older.py b/test/ext/test_prune_older.py
index 36e19633..1e656b27 100644
--- a/test/ext/test_prune_older.py
+++ b/test/ext/test_prune_older.py
@@ -31,13 +31,17 @@ def create_older_random_saves(n, start_utc, end_utc):
Reply all
Reply to author
Forward
0 new messages