Fix f*sync and duplicate save name issues in 0.33.x too

2 views
Skip to first unread message

Rob Browning

unread,
Jul 20, 2025, 5:11:43 PMJul 20
to bup-...@googlegroups.com
Pushed to 0.33.x

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4

Rob Browning

unread,
Jul 20, 2025, 5:11:44 PMJul 20
to bup-...@googlegroups.com, Johannes Berg
From: Johannes Berg <joha...@sipsolutions.net>

Save names are YYYY-MM-DD-HHMMSS formatted commit timestamp. When two
commits are made one after the other with the same timestamp, the VFS
also appends "-D" with as many digits as needed and numbers to
disambiguate. This is mostly fine, but if you "go back in
time" (e.g. change/fix clock), the repo can have the same commit time
for two commits that are *not* adjacent, e.g. by "save 1, save 2, roll
back clock, save 3". In this case, "save 1" and "save 3" might have
identical VFS names, causing collisions and hidden saves in
ls/web/etc.

This problem could also be created by any relevant rearrangement of
commits, say by bup get --pick.

Fix this by populating a map for all names to see how many we have,
and then counting down globally over the whole list rather than just
adjacent runs of the same time.

Signed-off-by: Johannes Berg <joha...@sipsolutions.net>
Reviewed-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
(cherry picked from commit aea7d6566643704c771192b1e5178f5bb00e2b0e)
---
lib/bup/vfs.py | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py
index bbe5ba08..781733f2 100644
--- a/lib/bup/vfs.py
+++ b/lib/bup/vfs.py
@@ -50,7 +50,7 @@ from __future__ import absolute_import, print_function
from binascii import hexlify, unhexlify
from collections import namedtuple
from errno import EINVAL, ELOOP, ENOTDIR
-from itertools import chain, groupby, tee
+from itertools import chain, tee
from random import randrange
from stat import S_IFDIR, S_IFLNK, S_IFREG, S_ISDIR, S_ISLNK, S_ISREG
from time import localtime, strftime
@@ -726,20 +726,29 @@ def tree_items_with_meta(repo, oid, tree_data, names):
_save_name_rx = re.compile(br'^\d\d\d\d-\d\d-\d\d-\d{6}(-\d+)?$')

def _reverse_suffix_duplicates(strs):
- """Yields the elements of strs, with any runs of duplicate values
+ """Yields the elements of strs, with any duplicate values
suffixed with -N suffixes, where the zero padded integer N
decreases to 0 by 1 (e.g. 10, 09, ..., 00).

"""
- for name, duplicates in groupby(strs):
- ndup = len(tuple(duplicates))
+ seen = {}
+ strs = list(strs)
+ for name in strs:
+ if name in seen:
+ seen[name][0] += 1
+ seen[name][1] += 1
+ else:
+ seen[name] = [1, 1]
+ for name in strs:
+ curdup, ndup = seen[name]
if ndup == 1:
yield name
else:
ndig = len(str(ndup - 1))
fmt = b'%s-' + b'%0' + (b'%d' % ndig) + b'd'
- for i in range(ndup - 1, -1, -1):
- yield fmt % (name, i)
+ yield fmt % (name, curdup - 1)
+ seen[name][0] -= 1
+ del seen

def parse_rev(f):
items = f.readline().split(None)
@@ -779,9 +788,9 @@ def cache_commit(repo, oid, require_meta=True):
rev_names = _reverse_suffix_duplicates(_name_for_rev(x) for x in rev_names)
rev_items = (_item_for_rev(x) for x in rev_items)
tip = None
- for item in rev_items:
- name = next(rev_names)
+ for name, item in zip(rev_names, rev_items):
tip = tip or (name, item)
+ assert not name in entries
entries[name] = item
entries[b'latest'] = FakeLink(meta=default_symlink_mode, target=tip[0])
revlist_key = b'rvl:' + tip[1].coid
--
2.47.2

Rob Browning

unread,
Jul 20, 2025, 5:11:44 PMJul 20
to bup-...@googlegroups.com
Also include a release note.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
(cherry picked from commit eb9e639ecddacb36b8937fa8480ba800e66f3cde)
---
note/0.33.x.md | 7 +++++++
test/int/test_vfs.py | 4 ++++
2 files changed, 11 insertions(+)

diff --git a/note/0.33.x.md b/note/0.33.x.md
index 737c82c3..b11245b6 100644
--- a/note/0.33.x.md
+++ b/note/0.33.x.md
@@ -25,6 +25,13 @@ Bugs
notes](0.33.4-from-0.33.3.md) for additional information. `bup` now
uses hardlinks instead.

+* Saves with identical dates won't end up with the same name
+ (e.g. 2025-01-08-135615 in "bup ls BRANCH") when they're not
+ adjacent -- when one isn't the other's direct parent. Previously bup
+ appended an increasing "-N" to disambiguate duplicates, but only
+ when they were directly related. Now it appends across all
+ duplicates.
+
Thanks to (at least)
====================

diff --git a/test/int/test_vfs.py b/test/int/test_vfs.py
index 55270876..aa8c0835 100644
--- a/test/int/test_vfs.py
+++ b/test/int/test_vfs.py
@@ -146,6 +146,10 @@ def test_reverse_suffix_duplicates():
wvpasseq((b'x-1', b'x-0', b'y'), suffix((b'x', b'x', b'y')))
wvpasseq((b'x', b'y-1', b'y-0'), suffix((b'x', b'y', b'y')))
wvpasseq((b'x', b'y-1', b'y-0', b'z'), suffix((b'x', b'y', b'y', b'z')))
+ wvpasseq((b'x', b'y-2', b'y-1', b'z', b'y-0'), suffix((b'x', b'y', b'y', b'z', b'y')))
+ wvpasseq((b'y-2', b'x', b'y-1', b'y-0', b'z'), suffix((b'y', b'x', b'y', b'y', b'z')))
+ wvpasseq((b'w', b'y-2', b'x', b'y-1', b'y-0', b'z'),
+ suffix((b'w', b'y', b'x', b'y', b'y', b'z')))

def test_misc(tmpdir):
bup_dir = tmpdir + b'/bup'
--
2.47.2

Rob Browning

unread,
Jul 20, 2025, 5:11:44 PMJul 20
to bup-...@googlegroups.com
Always rename durably. See

2ff4a75b166e1a54c9fbdaa2066e7c1c08620ec2
atomically_replaced_file: make replacement durable by default

for a subsequent elaboration.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/helpers.py | 3 +++
1 file changed, 3 insertions(+)

diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index 02fb731a..cf00f8a8 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -800,6 +800,9 @@ class atomically_replaced_file:
with self.cleanup:
if not (self.canceled or exc_type):
os.rename(self.tmp_path, self.path)
+ parent = os.path.dirname(self.path) or b'.'
+ with finalized(os.open(parent, os.O_RDONLY), os.close) as fd:
+ fsync(fd)
def cancel(self):
self.canceled = True

--
2.47.2

Rob Browning

unread,
Jul 20, 2025, 5:11:44 PMJul 20
to bup-...@googlegroups.com
Thanks to Anton Khirnov for reporting the problem.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
(cherry picked from commit b841841428f902225265e3b4bffa36f477c2a395)
---
lib/bup/client.py | 4 +++-
lib/bup/cmd/midx.py | 5 ++++-
lib/bup/hlinkdb.py | 4 +++-
lib/bup/index.py | 2 ++
4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/lib/bup/client.py b/lib/bup/client.py
index bda49d79..621c8b6f 100644
--- a/lib/bup/client.py
+++ b/lib/bup/client.py
@@ -9,7 +9,7 @@ import socket
from bup import git, ssh, vfs
from bup.compat import environ, pending_raise
from bup.helpers import (Conn, atomically_replaced_file, chunkyreader, debug1,
- debug2, linereader, lines_until_sentinel,
+ debug2, fsync, linereader, lines_until_sentinel,
mkdirp, nullcontext_if_not, progress, qprogress, DemuxConn)
from bup.io import path_msg
from bup.vint import write_bvec
@@ -282,6 +282,8 @@ class Client:
count += len(b)
qprogress('Receiving index from server: %d/%d\r' % (count, n))
progress('Receiving index from server: %d/%d, done.\n' % (count, n))
+ f.flush()
+ fsync(f.fileno())
self.check_ok()

def _make_objcache(self):
diff --git a/lib/bup/cmd/midx.py b/lib/bup/cmd/midx.py
index 947ade31..aadea8aa 100644
--- a/lib/bup/cmd/midx.py
+++ b/lib/bup/cmd/midx.py
@@ -5,7 +5,8 @@ import glob, os, math, resource, struct, sys

from bup import options, git, midx, _helpers, xstat
from bup.compat import ExitStack, argv_bytes, hexstr
-from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1, fdatasync,
+from bup.helpers import (Sha1, add_error, atomically_replaced_file, debug1,
+ fdatasync, fsync,
log, mmap_readwrite, qprogress,
saved_errors, unlink)
from bup.io import byte_stream, path_msg
@@ -169,6 +170,8 @@ def _do_midx(outdir, outfilename, infilenames, prefixstr,
count = merge_into(fmap, bits, total, inp)
f.seek(0, os.SEEK_END)
f.write(b'\0'.join(allfilenames))
+ f.flush()
+ fsync(f.fileno())

# This is just for testing (if you enable this, don't clear inp above)
# if 0:
diff --git a/lib/bup/hlinkdb.py b/lib/bup/hlinkdb.py
index f7e5d721..4f16da0a 100644
--- a/lib/bup/hlinkdb.py
+++ b/lib/bup/hlinkdb.py
@@ -2,7 +2,7 @@
from contextlib import ExitStack
import os, pickle

-from bup.helpers import atomically_replaced_file, unlink
+from bup.helpers import atomically_replaced_file, fsync, unlink


def pickle_load(filename):
@@ -44,6 +44,8 @@ class HLinkDB:
buffering=65536)
with self._cleanup.enter_context(self._pending_save) as f:
pickle.dump(self._node_paths, f, 2)
+ f.flush()
+ fsync(f.fileno())
else: # No data
self._cleanup.callback(lambda: unlink(self._filename))
self._cleanup = self._cleanup.pop_all()
diff --git a/lib/bup/index.py b/lib/bup/index.py
index 448155f9..6788eec1 100644
--- a/lib/bup/index.py
+++ b/lib/bup/index.py
@@ -7,6 +7,7 @@ from bup._helpers import UINT_MAX, bytescmp
from bup.compat import pending_raise
from bup.helpers import (add_error,
atomically_replaced_file,
+ fsync,
log, merge_iter, mmap_readwrite,
progress, qprogress, resolve_parent, slashappend)

@@ -588,6 +589,7 @@ class Writer:
self.pending_index.cancel()
else:
self.flush()
+ fsync(self.f.fileno())

def __del__(self):
assert self.closed
--
2.47.2

Rob Browning

unread,
Jul 20, 2025, 5:11:44 PMJul 20
to bup-...@googlegroups.com
Provide fsync in addition to fdatasync, and replace os.fsync with it
so macos will be right. See also:

a6f5302d00ca5c8d53f942129d1d5a3bc37ddd7a
Simplify fsync/fdatasync when not on macos; use them everywhere

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/helpers.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index cccddf52..02fb731a 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -101,7 +101,7 @@ except AttributeError:
if sys.platform.startswith('darwin'):
# Apparently os.fsync on OS X doesn't guarantee to sync all the way down
import fcntl
- def fdatasync(fd):
+ def fsync(fd):
try:
return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
except IOError as e:
@@ -110,8 +110,10 @@ if sys.platform.startswith('darwin'):
return _fdatasync(fd)
else:
raise
+ fdatasync = fsync
else:
- fdatasync = _fdatasync
+ fdatasync = getattr(os, 'fdatasync', os.fsync) # currently always fdatasync
+ fsync = os.fsync


def partition(predicate, stream):
--
2.47.2

Reply all
Reply to author
Forward
0 new messages