Simplfy drecurse and don't chdir during traversal

3 views
Skip to first unread message

Rob Browning

unread,
Mar 15, 2026, 6:25:34 PM (9 days ago) Mar 15
to bup-...@googlegroups.com
Proposed for main.

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4

Rob Browning

unread,
Mar 15, 2026, 6:25:35 PM (9 days ago) Mar 15
to bup-...@googlegroups.com
Pass the path to _dirlist() so we can include the full path in error
messages, include the path in the repository exclusion message,
simplify and make some messages more consistent.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/drecurse.py | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/lib/bup/drecurse.py b/lib/bup/drecurse.py
index b095f3f5..bb177237 100644
--- a/lib/bup/drecurse.py
+++ b/lib/bup/drecurse.py
@@ -3,6 +3,7 @@ from os import O_DIRECTORY, O_NOFOLLOW, fsencode
from stat import S_ISDIR
import stat, os

+from bup import xstat
from bup._helpers import open_noatime, openat_noatime
from bup.helpers \
import (add_error,
@@ -10,17 +11,16 @@ from bup.helpers \
finalized,
resolve_parent,
should_rx_exclude_path)
-from bup import xstat
-from bup.io import path_msg
+from bup.io import path_msg as pm


-def _dirlist(fd):
+def _dirlist(fd, path):
l = []
for n in os.listdir(fd):
try:
st = xstat.lstat(n, dir_fd=fd)
except OSError as e:
- add_error(Exception('%s: %s' % (resolve_parent(n), str(e))))
+ add_error(Exception(f'{pm(resolve_parent(path))}/{n}: {e}'))
continue
l.append((fsencode(n + '/' if S_ISDIR(st.st_mode) else n), st))
l.sort(reverse=True)
@@ -31,13 +31,13 @@ def _recursive_dirlist(prepend, dir_fd, xdev,
excluded_paths=None,
exclude_rxs=None,
xdev_exceptions=frozenset()):
- for name, pst in _dirlist(dir_fd):
+ for name, pst in _dirlist(dir_fd, prepend):
path = prepend + name
npath = None
if excluded_paths:
npath = os.path.normpath(path)
if npath in excluded_paths:
- debug1('Skipping %r: excluded.\n' % path_msg(path))
+ debug1('Excluding path {pm(path)}')
continue
if exclude_rxs and should_rx_exclude_path(path, exclude_rxs):
continue
@@ -45,18 +45,17 @@ def _recursive_dirlist(prepend, dir_fd, xdev,
yield path, pst
continue
if bup_dir is not None and (npath or os.path.normpath(path)) == bup_dir:
- debug1('Skipping BUP_DIR.\n')
+ debug1(f'Excluding repository {pm(bup_dir)}\n')
continue
if xdev is not None and pst.st_dev != xdev \
and path not in xdev_exceptions:
- debug1('Skipping contents of %r: different filesystem.\n'
- % path_msg(path))
+ debug1(f'Excluding filesystem {pm(path)}\n')
yield path, pst
continue
try:
sub_fd = openat_noatime(dir_fd, name, O_NOFOLLOW | O_DIRECTORY)
except OSError as e:
- add_error('%s: %s' % (prepend, e))
+ add_error(Exception(f'{pm(prepend)}/{pm(name)}: {e}'))
yield path, pst
continue
with finalized(sub_fd, os.close):
@@ -69,7 +68,6 @@ def _recursive_dirlist(prepend, dir_fd, xdev,
xdev_exceptions=xdev_exceptions)
yield path, pst

-
def recursive_dirlist(paths, xdev, bup_dir=None,
excluded_paths=None,
exclude_rxs=None,
@@ -80,7 +78,7 @@ def recursive_dirlist(paths, xdev, bup_dir=None,
try:
pst = xstat.lstat(path)
except OSError as e:
- add_error('recursive_dirlist: %s' % e)
+ add_error(Exception(f'{pm(path)}: {e}'))
continue
if not stat.S_ISDIR(pst.st_mode):
yield path, pst
--
2.47.3

Rob Browning

unread,
Mar 15, 2026, 6:25:35 PM (9 days ago) Mar 15
to bup-...@googlegroups.com
This should be preferable in general, but the main purpose is to allow
drecurse to eventually leave the working directory alone (i.e. no
chdir()).

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/drecurse.py | 55 ++++++++++++++++++++++++---------------------
1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/lib/bup/drecurse.py b/lib/bup/drecurse.py
index 3d1950d7..b26c7dc9 100644
--- a/lib/bup/drecurse.py
+++ b/lib/bup/drecurse.py
@@ -1,8 +1,9 @@

-from os import O_DIRECTORY, O_NOFOLLOW
+from os import O_DIRECTORY, O_NOFOLLOW, fsencode
+from stat import S_ISDIR
import stat, os

-from bup._helpers import open_noatime
+from bup._helpers import open_noatime, openat_noatime
from bup.helpers \
import (add_error,
debug1,
@@ -18,25 +19,24 @@ from bup.io import path_msg
# - avoid race conditions caused by doing listdir() on a changing symlink


-def _dirlist():
+def _dirlist(fd):
l = []
- for n in os.listdir(b'.'):
+ for n in os.listdir(fd):
try:
- st = xstat.lstat(n)
+ st = xstat.lstat(n, dir_fd=fd)
except OSError as e:
add_error(Exception('%s: %s' % (resolve_parent(n), str(e))))
continue
- if stat.S_ISDIR(st.st_mode):
- n += b'/'
- l.append((n,st))
+ l.append((fsencode(n + '/' if S_ISDIR(st.st_mode) else n), st))
l.sort(reverse=True)
return l

-def _recursive_dirlist(prepend, xdev, bup_dir=None,
+def _recursive_dirlist(prepend, dir_fd, xdev,
+ bup_dir=None,
excluded_paths=None,
exclude_rxs=None,
xdev_exceptions=frozenset()):
- for (name,pst) in _dirlist():
+ for name, pst in _dirlist(dir_fd):
path = prepend + name
npath = None
if excluded_paths:
@@ -59,30 +59,33 @@ def _recursive_dirlist(prepend, xdev, bup_dir=None,
yield path, pst
continue
try:
- fd = open_noatime(name, O_NOFOLLOW | O_DIRECTORY)
+ sub_fd = openat_noatime(dir_fd, name, O_NOFOLLOW | O_DIRECTORY)
except OSError as e:
add_error('%s: %s' % (prepend, e))
yield path, pst
continue
- with finalized(fd, os.close):
- os.fchdir(fd)
- yield from _recursive_dirlist(prepend=path, xdev=xdev,
- bup_dir=bup_dir,
- excluded_paths=excluded_paths,
- exclude_rxs=exclude_rxs,
- xdev_exceptions=xdev_exceptions)
+ with finalized(sub_fd, os.close):
+ os.fchdir(sub_fd)
+ yield from _recursive_dirlist(prepend=path,
+ dir_fd=sub_fd,
+ xdev=xdev,
+ bup_dir=bup_dir,
+ excluded_paths=excluded_paths,
+ exclude_rxs=exclude_rxs,
+ xdev_exceptions=xdev_exceptions)
os.chdir(b'..')
- yield (path, pst)
+ yield path, pst


def recursive_dirlist(paths, xdev, bup_dir=None,
excluded_paths=None,
exclude_rxs=None,
xdev_exceptions=frozenset()):
+ for path in paths:
+ assert isinstance(path, bytes), path
startdir = open_noatime(b'.', O_NOFOLLOW | O_DIRECTORY)
with finalized(startdir, os.close):
try:
- assert not isinstance(paths, str)
for path in paths:
try:
pst = xstat.lstat(path)
@@ -93,15 +96,17 @@ def recursive_dirlist(paths, xdev, bup_dir=None,
yield path, pst
continue
try:
- opened_pfile = open_noatime(path, O_NOFOLLOW | O_DIRECTORY)
+ path_fd = open_noatime(path, O_NOFOLLOW | O_DIRECTORY)
except OSError as e:
add_error(e)
continue
- with finalized(opened_pfile, os.close) as pfile:
+ with finalized(path_fd, os.close):
+ os.fchdir(path_fd)
xdev = pst.st_dev if xdev else None
- os.fchdir(pfile)
- prepend = os.path.join(path, b'')
- yield from _recursive_dirlist(prepend=prepend, xdev=xdev,
+ prepend = path if path[-1] == b'/'[0] else path + b'/'
+ yield from _recursive_dirlist(prepend=prepend,
+ dir_fd=path_fd,
+ xdev=xdev,
bup_dir=bup_dir,
excluded_paths=excluded_paths,
exclude_rxs=exclude_rxs,
--
2.47.3

Rob Browning

unread,
Mar 15, 2026, 6:25:35 PM (9 days ago) Mar 15
to bup-...@googlegroups.com
Now that all the relevant operations rely on a directory file
descriptor, stop changing the working directory during the traversal.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/drecurse.py | 62 ++++++++++++++++++---------------------------
1 file changed, 24 insertions(+), 38 deletions(-)

diff --git a/lib/bup/drecurse.py b/lib/bup/drecurse.py
index b26c7dc9..b095f3f5 100644
--- a/lib/bup/drecurse.py
+++ b/lib/bup/drecurse.py
@@ -14,11 +14,6 @@ from bup import xstat
from bup.io import path_msg


-# the use of fchdir() and lstat() is for two reasons:
-# - help out the kernel by not making it repeatedly look up the absolute path
-# - avoid race conditions caused by doing listdir() on a changing symlink
-
-
def _dirlist(fd):
l = []
for n in os.listdir(fd):
@@ -65,7 +60,6 @@ def _recursive_dirlist(prepend, dir_fd, xdev,
yield path, pst
continue
with finalized(sub_fd, os.close):
- os.fchdir(sub_fd)
yield from _recursive_dirlist(prepend=path,
dir_fd=sub_fd,
xdev=xdev,
@@ -73,7 +67,6 @@ def _recursive_dirlist(prepend, dir_fd, xdev,
excluded_paths=excluded_paths,
exclude_rxs=exclude_rxs,
xdev_exceptions=xdev_exceptions)
- os.chdir(b'..')
yield path, pst


@@ -83,35 +76,28 @@ def recursive_dirlist(paths, xdev, bup_dir=None,
xdev_exceptions=frozenset()):
for path in paths:
assert isinstance(path, bytes), path
- startdir = open_noatime(b'.', O_NOFOLLOW | O_DIRECTORY)
- with finalized(startdir, os.close):
+ for path in paths:
+ try:
+ pst = xstat.lstat(path)
+ except OSError as e:
+ add_error('recursive_dirlist: %s' % e)
+ continue
+ if not stat.S_ISDIR(pst.st_mode):
+ yield path, pst
+ continue
try:
- for path in paths:
- try:
- pst = xstat.lstat(path)
- except OSError as e:
- add_error('recursive_dirlist: %s' % e)
- continue
- if not stat.S_ISDIR(pst.st_mode):
- yield path, pst
- continue
- try:
- path_fd = open_noatime(path, O_NOFOLLOW | O_DIRECTORY)
- except OSError as e:
- add_error(e)
- continue
- with finalized(path_fd, os.close):
- os.fchdir(path_fd)
- xdev = pst.st_dev if xdev else None
- prepend = path if path[-1] == b'/'[0] else path + b'/'
- yield from _recursive_dirlist(prepend=prepend,
- dir_fd=path_fd,
- xdev=xdev,
- bup_dir=bup_dir,
- excluded_paths=excluded_paths,
- exclude_rxs=exclude_rxs,
- xdev_exceptions=xdev_exceptions)
- os.fchdir(startdir)
- yield (prepend,pst)
- finally:
- os.fchdir(startdir)
+ path_fd = open_noatime(path, O_NOFOLLOW | O_DIRECTORY)
+ except OSError as e:
+ add_error(e)
+ continue
+ with finalized(path_fd, os.close):
+ xdev = pst.st_dev if xdev else None
+ prepend = path if path[-1] == b'/'[0] else path + b'/'
+ yield from _recursive_dirlist(prepend=prepend,
+ dir_fd=path_fd,
+ xdev=xdev,
+ bup_dir=bup_dir,
+ excluded_paths=excluded_paths,
+ exclude_rxs=exclude_rxs,
+ xdev_exceptions=xdev_exceptions)
+ yield prepend, pst
--
2.47.3

Rob Browning

unread,
Mar 15, 2026, 6:25:35 PM (9 days ago) Mar 15
to bup-...@googlegroups.com
Switch to open_noatime so that we'll avoid changing the atime when
possible, and replace finalized_fd with finalized(fd, os.close).

Drop compat.MAYBE_LARGEFILE for now since open_noatime handles it and
nothing else needs it.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/compat.py | 4 +---
lib/bup/drecurse.py | 21 +++++++++------------
2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/lib/bup/compat.py b/lib/bup/compat.py
index 0ac7f44b..a4ef8644 100644
--- a/lib/bup/compat.py
+++ b/lib/bup/compat.py
@@ -5,15 +5,13 @@
from os import environb as environ
# pylint: enable=unused-import
from os import fsencode
-import dataclasses, os, sys, traceback
+import dataclasses, sys, traceback

import bup_main


ver = sys.version_info

-MAYBE_LARGEFILE = getattr(os, 'O_LARGEFILE', 0)
-
if (ver.major, ver.minor) >= (3, 10):
# pylint: disable=unused-import
# pylint: disable-next=unused-import
diff --git a/lib/bup/drecurse.py b/lib/bup/drecurse.py
index 800add4e..3d1950d7 100644
--- a/lib/bup/drecurse.py
+++ b/lib/bup/drecurse.py
@@ -1,8 +1,8 @@

-from os import O_NOFOLLOW
+from os import O_DIRECTORY, O_NOFOLLOW
import stat, os

-from bup.compat import MAYBE_LARGEFILE
+from bup._helpers import open_noatime
from bup.helpers \
import (add_error,
debug1,
@@ -18,11 +18,6 @@ from bup.io import path_msg
# - avoid race conditions caused by doing listdir() on a changing symlink


-def finalized_fd(path):
- fd = os.open(path, os.O_RDONLY|MAYBE_LARGEFILE|O_NOFOLLOW|os.O_NDELAY)
- return finalized(fd, os.close)
-
-
def _dirlist():
l = []
for n in os.listdir(b'.'):
@@ -64,12 +59,13 @@ def _recursive_dirlist(prepend, xdev, bup_dir=None,
yield path, pst
continue
try:
- with finalized_fd(name) as fd:
- os.fchdir(fd)
+ fd = open_noatime(name, O_NOFOLLOW | O_DIRECTORY)
except OSError as e:
add_error('%s: %s' % (prepend, e))
yield path, pst
continue
+ with finalized(fd, os.close):
+ os.fchdir(fd)
yield from _recursive_dirlist(prepend=path, xdev=xdev,
bup_dir=bup_dir,
excluded_paths=excluded_paths,
@@ -83,7 +79,8 @@ def recursive_dirlist(paths, xdev, bup_dir=None,
excluded_paths=None,
exclude_rxs=None,
xdev_exceptions=frozenset()):
- with finalized_fd(b'.') as startdir:
+ startdir = open_noatime(b'.', O_NOFOLLOW | O_DIRECTORY)
+ with finalized(startdir, os.close):
try:
assert not isinstance(paths, str)
for path in paths:
@@ -96,11 +93,11 @@ def recursive_dirlist(paths, xdev, bup_dir=None,
yield path, pst
continue
try:
- opened_pfile = finalized_fd(path)
+ opened_pfile = open_noatime(path, O_NOFOLLOW | O_DIRECTORY)
except OSError as e:
add_error(e)
continue
- with opened_pfile as pfile:
+ with finalized(opened_pfile, os.close) as pfile:
xdev = pst.st_dev if xdev else None
os.fchdir(pfile)
prepend = os.path.join(path, b'')
--
2.47.3

Reply all
Reply to author
Forward
0 new messages