Fix a number of durability issues

1 view
Skip to first unread message

Rob Browning

unread,
Jul 18, 2025, 2:12:18 PMJul 18
to bup-...@googlegroups.com
Proposed for main. I may backport the key bits to 0.33.x later.

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4

Rob Browning

unread,
Jul 18, 2025, 2:12:18 PMJul 18
to bup-...@googlegroups.com
split('x') returns '' for the parent, replace it with '.'.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/helpers.py | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index 2d252efc..d4110544 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -789,12 +789,16 @@ class atomically_replaced_file:
self.buffering = buffering
self.canceled = False
self.tmp_path = None
+ self._path_parent, self._path_base = os.path.split(self.path)
+ if not self._path_parent:
+ self._path_parent = '.'
+ assert self._path_base, f'{self._path_base} is a directory'
self.cleanup = ExitStack()
def __enter__(self):
with self.cleanup:
- parent, name = os.path.split(self.path)
- tmpdir = self.cleanup.enter_context(temp_dir(dir=parent,
- prefix=name + b'-'))
+ tmpdir = temp_dir(dir=self._path_parent,
+ prefix=self._path_base + b'-')
+ tmpdir = self.cleanup.enter_context(tmpdir)
self.tmp_path = tmpdir + b'/pending'
f = open(self.tmp_path, mode=self.mode, buffering=self.buffering)
f = self.cleanup.enter_context(f)
--
2.47.2

Rob Browning

unread,
Jul 18, 2025, 2:12:18 PMJul 18
to bup-...@googlegroups.com
Switch Reader to tmp_path since file.name is just the basename when
you open via dir_fd.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/helpers.py | 16 ++++++++++++++--
lib/bup/index.py | 2 +-
2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index cc2f7c6a..08fc6b53 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -77,6 +77,13 @@ def temp_dir(*args, **kwargs):
return finalized(mkdtemp(*args, **kwargs), lambda x: rmtree(x))


+def open_in(fd, path, *args, **kwargs):
+ """Open path with dir_fd set to fd via open()'s opener."""
+ assert 'opener' not in kwargs
+ def opener(name, mode):
+ return os.open(name, mode, dir_fd=fd)
+ return open(path, *args, opener=opener, **kwargs)
+
if hasattr(os, 'O_PATH'):
def open_path_fd(path): return os.open(path, os.O_PATH)
else:
@@ -801,6 +808,7 @@ class atomically_replaced_file:
self.buffering = buffering
self.canceled = False
self.tmp_path = None
+ self._tmp_dir_fd = None
self._path_parent_fd = None
self._path_parent, self._path_base = os.path.split(self.path)
if not self._path_parent:
@@ -823,15 +831,19 @@ class atomically_replaced_file:
tmpdir = temp_dir(dir=self._path_parent,
prefix=self._path_base + b'-')
tmpdir = self._cleanup.enter_context(tmpdir)
+ tmpdir_ctx = os_closed(open_path_fd(tmpdir))
+ self._tmp_dir_fd = self._cleanup.enter_context(tmpdir_ctx)
self.tmp_path = tmpdir + b'/pending'
- f = open(self.tmp_path, mode=self.mode, buffering=self.buffering)
+ f = open_in(self._tmp_dir_fd, b'pending', mode=self.mode,
+ buffering=self.buffering)
f = self._cleanup.enter_context(f)
self._cleanup = self._cleanup.pop_all()
return f
def __exit__(self, exc_type, exc_value, traceback):
with self._cleanup:
if not (self.canceled or exc_type):
- os.rename(self.tmp_path, self._path_base,
+ os.rename(b'pending', self._path_base,
+ src_dir_fd=self._tmp_dir_fd,
dst_dir_fd=self._path_parent_fd)
def cancel(self):
self.canceled = True
diff --git a/lib/bup/index.py b/lib/bup/index.py
index f59e31d5..b57d2b22 100644
--- a/lib/bup/index.py
+++ b/lib/bup/index.py
@@ -617,7 +617,7 @@ class Writer:

def new_reader(self):
self.flush()
- return Reader(self.f.name)
+ return Reader(self.pending_index.tmp_path)


def _slashappend_or_add_error(p, caller):
--
2.47.2

Rob Browning

unread,
Jul 18, 2025, 2:12:19 PMJul 18
to bup-...@googlegroups.com
Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/git.py | 2 +-
lib/bup/helpers.py | 29 ++++++++++++++++-------------
2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/lib/bup/git.py b/lib/bup/git.py
index 318cab57..b734b190 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -985,7 +985,7 @@ class LocalPackStore():
b'objects/pack/pack-' + hexlify(packbin))
os.rename(tmpdir + b'/pack', nameprefix + b'.pack')
os.rename(tmpdir + b'/idx', nameprefix + b'.idx')
- os.fsync(pfd)
+ fdatasync(pfd)
if self._on_pack_finish:
self._on_pack_finish(nameprefix)
if self._run_midx:
diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index affa336d..2d252efc 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -102,25 +102,28 @@ def last(iterable):
pass
return result

-try:
- _fdatasync = os.fdatasync
-except AttributeError:
- _fdatasync = os.fsync

-if sys.platform.startswith('darwin'):
- # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
+if not sys.platform.startswith('darwin'):
+ fsync = os.fsync
+ fdatasync = getattr(os, 'fdatasync', os.fsync) # currently always fdatasync
+else:
+ # macos doesn't guarantee to sync all the way down (see fsync(2))
import fcntl
- def fdatasync(fd):
+ def _fullsync(fd):
try:
- return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
+ # Ignore result - errors will throw, other values undocumented
+ fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
+ return True
except IOError as e:
# Fallback for file systems (SMB) that do not support F_FULLFSYNC
- if e.errno == errno.ENOTSUP:
- return _fdatasync(fd)
- else:
+ if e.errno != errno.ENOTSUP:
raise
-else:
- fdatasync = _fdatasync
+ return False
+ def fsync(fd): return _fullsync(fd) or os.fsync(fd)
+ if getattr(os, 'fdatasync', None): # ...in case it's added someday
+ def fdatasync(fd): return _fullsync(fd) or os.fdatasync(fd)
+ else:
+ def fdatasync(fd): return _fullsync(fd) or os.fsync(fd)


def partition(predicate, stream):
--
2.47.2

Rob Browning

unread,
Jul 18, 2025, 2:12:19 PMJul 18
to bup-...@googlegroups.com
Assume that if O_PATH exists, it's what we want.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/helpers.py | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index 8fd9037c..cc2f7c6a 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -76,6 +76,15 @@ def temp_dir(*args, **kwargs):
# https://github.com/python/cpython/issues/88458
return finalized(mkdtemp(*args, **kwargs), lambda x: rmtree(x))

+
+if hasattr(os, 'O_PATH'):
+ def open_path_fd(path): return os.open(path, os.O_PATH)
+else:
+ def open_path_fd(path): return os.open(path, os.O_RDONLY)
+
+def os_closed(x): return finalized(x, os.close)
+
+
# singleton used when we don't care where the object is
OBJECT_EXISTS = None

@@ -792,6 +801,7 @@ class atomically_replaced_file:
self.buffering = buffering
self.canceled = False
self.tmp_path = None
+ self._path_parent_fd = None
self._path_parent, self._path_base = os.path.split(self.path)
if not self._path_parent:
self._path_parent = '.'
@@ -803,6 +813,8 @@ class atomically_replaced_file:
ctx.enter_context(finalized(set_closed))
self._closed = False
# Anything requiring cleanup must be after this and guarded by ctx
+ ctx = os_closed(open_path_fd(self._path_parent))
+ self._path_parent_fd = self._cleanup.enter_context(ctx)
self._cleanup = self._cleanup.pop_all()
def __del__(self):
assert self._closed
@@ -819,7 +831,8 @@ class atomically_replaced_file:
def __exit__(self, exc_type, exc_value, traceback):
with self._cleanup:
if not (self.canceled or exc_type):
- os.rename(self.tmp_path, self.path)
+ os.rename(self.tmp_path, self._path_base,
+ dst_dir_fd=self._path_parent_fd)
def cancel(self):
self.canceled = True

--
2.47.2

Rob Browning

unread,
Jul 18, 2025, 2:12:20 PMJul 18
to bup-...@googlegroups.com
Note that atomically_replaced_file only (optionally) makes the
replacement (rename) durable. It's currently up to the caller to sync
the file itself in whatever way desirect. That's not optional at the
moment because some callers like hlinkdb (for its "two phase commmit")
assume they can close the file whenever they like.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/helpers.py | 23 ++++++++++++++++-------
test/int/test_helpers.py | 12 ++++++++----
2 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index 08fc6b53..d9ef85d2 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -777,7 +777,7 @@ def chunkyreader(f, count = None):


class atomically_replaced_file:
- def __init__(self, path, mode='w', buffering=-1):
+ def __init__(self, path, mode='w', buffering=-1, sync=True):
"""Return a context manager supporting the atomic replacement of a file.

The context manager yields an open file object that has been
@@ -786,7 +786,10 @@ class atomically_replaced_file:
the target path (atomically if the platform allows it) if
there are no exceptions, and the temporary directory will
always be removed. Calling cancel() will prevent the
- replacement.
+ replacement. When sync is true (the default), the replacement
+ will be made durable in the fsync sense, otherwise, it's up to
+ the caller to fsync the path parent. It's always up to the
+ caller to sync the returned file itself, if desired.

The file object will have a name attribute containing the
file's path, and the mode and buffering arguments will be
@@ -797,6 +800,8 @@ class atomically_replaced_file:

with atomically_replaced_file('foo.txt', 'w') as f:
f.write('hello jack.')
+ f.flush()
+ os.fsync(f.fileno())

"""
# Anything requiring cleanup must come after _closed is set to
@@ -808,6 +813,7 @@ class atomically_replaced_file:
self.buffering = buffering
self.canceled = False
self.tmp_path = None
+ self._sync = sync
self._tmp_dir_fd = None
self._path_parent_fd = None
self._path_parent, self._path_base = os.path.split(self.path)
@@ -821,7 +827,7 @@ class atomically_replaced_file:
ctx.enter_context(finalized(set_closed))
self._closed = False
# Anything requiring cleanup must be after this and guarded by ctx
- ctx = os_closed(open_path_fd(self._path_parent))
+ ctx = os_closed(os.open(self._path_parent, os.O_RDONLY))
self._path_parent_fd = self._cleanup.enter_context(ctx)
self._cleanup = self._cleanup.pop_all()
def __del__(self):
@@ -841,10 +847,13 @@ class atomically_replaced_file:
return f
def __exit__(self, exc_type, exc_value, traceback):
with self._cleanup:
- if not (self.canceled or exc_type):
- os.rename(b'pending', self._path_base,
- src_dir_fd=self._tmp_dir_fd,
- dst_dir_fd=self._path_parent_fd)
+ if self.canceled or exc_type:
+ return
+ os.rename(b'pending', self._path_base,
+ src_dir_fd=self._tmp_dir_fd,
+ dst_dir_fd=self._path_parent_fd)
+ if self._sync:
+ fdatasync(self._path_parent_fd)
def cancel(self):
self.canceled = True

diff --git a/test/int/test_helpers.py b/test/int/test_helpers.py
index 8964b7bf..287d131d 100644
--- a/test/int/test_helpers.py
+++ b/test/int/test_helpers.py
@@ -145,17 +145,20 @@ def test_batchpipe():
WVPASSEQ(next(batches, None), None)


-def test_atomically_replaced_file(tmpdir):
+...@pytest.mark.parametrize('sync_atomic_replace', (True, False))
+def test_atomically_replaced_file(sync_atomic_replace, tmpdir):
target_file = os.path.join(tmpdir, b'test-atomic-write')

- with atomically_replaced_file(target_file, mode='w') as f:
+ with atomically_replaced_file(target_file, mode='w',
+ sync=sync_atomic_replace) as f:
f.write('asdf')
WVPASSEQ(f.mode, 'w')
f = open(target_file, 'r')
WVPASSEQ(f.read(), 'asdf')

try:
- with atomically_replaced_file(target_file, mode='w') as f:
+ with atomically_replaced_file(target_file, mode='w',
+ sync=sync_atomic_replace) as f:
f.write('wxyz')
raise Exception()
except:
@@ -163,7 +166,8 @@ def test_atomically_replaced_file(tmpdir):
with open(target_file) as f:
WVPASSEQ(f.read(), 'asdf')

- with atomically_replaced_file(target_file, mode='wb') as f:
+ with atomically_replaced_file(target_file, mode='wb',
+ sync=sync_atomic_replace) as f:
f.write(os.urandom(20))
WVPASSEQ(f.mode, 'wb')

--
2.47.2

Nix

unread,
Jul 19, 2025, 8:34:24 AMJul 19
to Rob Browning, bup-...@googlegroups.com
On 18 Jul 2025, Rob Browning spake thusly:
Syncing directory changes on Linux also requires more than an fdatasync
on some filesystems: you must fsync an fd to the directory as well.
(But if this is only metadata changes, the fdatasync is fine.)

--
NULL && (void)

Greg Troxel

unread,
Jul 19, 2025, 10:28:37 AMJul 19
to Nix, Rob Browning, bup-...@googlegroups.com
Nix <n...@esperi.org.uk> writes:

> Syncing directory changes on Linux also requires more than an fdatasync
> on some filesystems: you must fsync an fd to the directory as well.
> (But if this is only metadata changes, the fdatasync is fine.)

I haven't looked at this yet, but a higher-level point is that what you
have to do on FooOS isn't the issue; it's what POSIX says must be
sufficient. The POSIX text appears subtle:


https://pubs.opengroup.org/onlinepubs/9799919799/functions/fsync.html

https://pubs.opengroup.org/onlinepubs/9799919799/functions/fdatasync.html

Rob Browning

unread,
Jul 19, 2025, 5:38:54 PMJul 19
to Nix, bup-...@googlegroups.com
Nix <n...@esperi.org.uk> writes:

> Syncing directory changes on Linux also requires more than an fdatasync
> on some filesystems: you must fsync an fd to the directory as well.
> (But if this is only metadata changes, the fdatasync is fine.)

My current impression is that to have the best chance of durably
renaming you have to (ignoring macos):

- make sure the buffers are flushed (fflush, f.flush(), etc.)
- fsync or fdatasync the fd (depending on what you need) and close
- rename()
- fdatasync the destination parent dir

Does that also match up with your impression?

Thanks

Rob Browning

unread,
Jul 19, 2025, 5:46:14 PMJul 19
to Greg Troxel, Nix, bup-...@googlegroups.com
Right, my impression is that POSIX doesn't really provide the definitive
promises we need, so in the end, assuming the two aren't at odds, I
think we want to make sure we're compatible with what POSIX
promises/requires and then make sure within that, we're doing something
that also actually works in all the relevant situations.

Hence having to work around macos' behvavior, regardless (unless we
decide to stop supporting macos).

Greg Troxel

unread,
Jul 19, 2025, 6:25:46 PMJul 19
to Rob Browning, Nix, bup-...@googlegroups.com
Rob Browning <r...@defaultvalue.org> writes:

>> The POSIX text appears subtle:
>>
>> https://pubs.opengroup.org/onlinepubs/9799919799/functions/fsync.html
>>
>> https://pubs.opengroup.org/onlinepubs/9799919799/functions/fdatasync.html
>
> Right, my impression is that POSIX doesn't really provide the definitive
> promises we need, so in the end, assuming the two aren't at odds, I
> think we want to make sure we're compatible with what POSIX
> promises/requires and then make sure within that, we're doing something
> that also actually works in all the relevant situations.

That seems reasonable. I just like to try to be POSIX-first and then
accomodate.

> Hence having to work around macos' behvavior, regardless (unless we
> decide to stop supporting macos).

There are so many macs out there that desupporting macos would be a
really unfortunate outcome.

If you just mean a doc note that f*sync on macOS doesn't do what POSIX
expects, and thus when running on macOS the safety guarantees are not
met, that's fine.


> My current impression is that to have the best chance of durably
> renaming you have to (ignoring macos):
>
> - make sure the buffers are flushed (fflush, f.flush(), etc.)
> - fsync or fdatasync the fd (depending on what you need) and close
> - rename()
> - fdatasync the destination parent dir

That seems ok to me. Maybe you should fsync the parent dir, on the
theory that the odds of unwritten metadata that is costly to sync is
low, and that might work better.


I had no idea about macOS, but I guess:

https://transactional.blog/blog/2022-darwins-deceptive-durability

https://www.phoronix.com/forums/forum/software/bsd-mac-os-x-hurd-others/1309928-turns-out-macos-has-been-benchmark-cheating-in-writes-by-not-doing-proper-fsync


I guess it's a SMOP that somebody who uses macs can provide, to emit the
right incantations instead of the POSIX ones.

Rob Browning

unread,
Jul 19, 2025, 7:00:17 PMJul 19
to Greg Troxel, Nix, bup-...@googlegroups.com
Greg Troxel <g...@lexort.com> writes:

> If you just mean a doc note that f*sync on macOS doesn't do what POSIX
> expects, and thus when running on macOS the safety guarantees are not
> met, that's fine.

No I meant that we just continue to go ahead and do what's required on
macos (see F_FULLSYNC in helpers).

> That seems ok to me. Maybe you should fsync the parent dir, on the
> theory that the odds of unwritten metadata that is costly to sync is
> low, and that might work better.

Yeah, I've been mulling that over too with these changes. I *think*
fdatasync is probably sufficient, but my guess is that fsync on a dir
probably doesn't cost enough more to matter, so "why not"...

> I guess it's a SMOP that somebody who uses macs can provide, to emit
> the right incantations instead of the POSIX ones.

Hah - already done (we hope).

Thanks

Nix

unread,
Jul 20, 2025, 8:30:28 AMJul 20
to Rob Browning, bup-...@googlegroups.com
On 19 Jul 2025, Rob Browning outgrape:

> Nix <n...@esperi.org.uk> writes:
>
>> Syncing directory changes on Linux also requires more than an fdatasync
>> on some filesystems: you must fsync an fd to the directory as well.
>> (But if this is only metadata changes, the fdatasync is fine.)
>
> My current impression is that to have the best chance of durably
> renaming you have to (ignoring macos):
>
> - make sure the buffers are flushed (fflush, f.flush(), etc.)
> - fsync or fdatasync the fd (depending on what you need) and close
> - rename()
> - fdatasync the destination parent dir
>
> Does that also match up with your impression?

Yes. Article from 16 years ago, still relevant (not much has changed
since then except that xfs gained better journalling about decade ago
and no longer randomly fills files with NULs on crash):

https://lwn.net/Articles/351422/

(You should fsync the file's fd instead of fdatasyncing if you care
about dates and times or other inode-bound info being updated, or if
you've extended the file or filled in holes in it. I wish I didn't have
to put the last bit in, but I *just this week* dicovered that Solaris
11.4 gets this wrong, unlike 11.3, which is fine :( )

Johannes Berg

unread,
Jul 20, 2025, 8:48:07 AMJul 20
to Nix, Rob Browning, bup-...@googlegroups.com
On Sun, 2025-07-20 at 13:30 +0100, Nix wrote:
>
> (You should fsync the file's fd instead of fdatasyncing if you care
> about dates and times or other inode-bound info being updated, or if
> you've extended the file or filled in holes in it. I wish I didn't have
> to put the last bit in, but I *just this week* dicovered that Solaris
> 11.4 gets this wrong, unlike 11.3, which is fine :( )

Huh, does that mean basically - on Solaris - you'd always have to, since
writing a completely new file is "extending" in a sense?

johannes

Rob Browning

unread,
Jul 20, 2025, 1:43:21 PMJul 20
to Nix, bup-...@googlegroups.com
Nix <n...@esperi.org.uk> writes:

> (You should fsync the file's fd instead of fdatasyncing if you care
> about dates and times or other inode-bound info being updated, or if
> you've extended the file or filled in holes in it. I wish I didn't have
> to put the last bit in, but I *just this week* dicovered that Solaris
> 11.4 gets this wrong, unlike 11.3, which is fine :( )

Right, I expect that general difference there between fdatasync and
fsync (but certainly not the issue with extensions/holes).

And yeah, I think I may go ahead and fsync() the rename parent dir
instead -- unlikely to hurt much, might help.

Nix

unread,
Jul 20, 2025, 2:36:34 PMJul 20
to Johannes Berg, Rob Browning, bup-...@googlegroups.com
On 20 Jul 2025, Johannes Berg verbalised:
Well, yes, but you'll only really notice if you're unlucky enough to
crash at the wrong time. (This is probably a bug in 11.4.)

--
NULL && (void)

Rob Browning

unread,
Jul 20, 2025, 3:47:31 PMJul 20
to bup-...@googlegroups.com
Rob Browning <r...@defaultvalue.org> writes:

> Proposed for main. I may backport the key bits to 0.33.x later.

Pushed to main, after adding a commit to switch all the fdatasyncs to
fsync, since the performance difference shoudn't matter for our uses.

Thanks
Reply all
Reply to author
Forward
0 new messages