[PATCH 1/1] Drop workaround for negative cygwin uid/gid

0 views
Skip to first unread message

Rob Browning

unread,
Mar 14, 2026, 4:10:44 PM (10 days ago) Mar 14
to bup-...@googlegroups.com
I suspect this has been long fixed, but if not, we can reintroduce a
workaround, but just for cygwin. Dropping it allows us to switch to
native stat_results (now that they always have nanosecond timestamps)
which notably increases the number of paths per second for a drecurse
traversal of a large directory.

The workaround was originally introduced in

8417c11948532ba890b862dcad9d37810d482616
index.py: handle uid/gid == -1 on cygwin

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---

Pushed to main.

lib/bup/index.py | 12 ++++----
lib/bup/metadata.py | 6 ++--
lib/bup/xstat.py | 67 ++++++++++++++++-----------------------------
note/main.md | 7 +++++
4 files changed, 40 insertions(+), 52 deletions(-)

diff --git a/lib/bup/index.py b/lib/bup/index.py
index cd571c8d..8c0ec72e 100644
--- a/lib/bup/index.py
+++ b/lib/bup/index.py
@@ -218,13 +218,13 @@ class Entry:
def stale(self, st, check_device=True):
if self.size != st.st_size:
return True
- if self.mtime != st.st_mtime:
+ if self.mtime != st.st_mtime_ns:
return True
if self.sha == EMPTY_SHA:
return True
if not self.gitmode:
return True
- if self.ctime != st.st_ctime:
+ if self.ctime != st.st_ctime_ns:
return True
if self.ino != st.st_ino:
return True
@@ -242,9 +242,9 @@ class Entry:
self.dev = st.st_dev
self.ino = st.st_ino
self.nlink = st.st_nlink
- self.ctime = st.st_ctime
- self.mtime = st.st_mtime
- self.atime = st.st_atime
+ self.ctime = st.st_ctime_ns
+ self.mtime = st.st_mtime_ns
+ self.atime = st.st_atime_ns
self.size = st.st_size
self.mode = st.st_mode
self.flags |= IX_EXISTS
@@ -608,7 +608,7 @@ class Writer:
assert(isdir == endswith)
e = NewEntry(basename, name, self.tmax,
st.st_dev, st.st_ino, st.st_nlink,
- st.st_ctime, st.st_mtime, st.st_atime,
+ st.st_ctime_ns, st.st_mtime_ns, st.st_atime_ns,
st.st_size, st.st_mode, gitmode, sha, flags,
meta_ofs, 0, 0)
else:
diff --git a/lib/bup/metadata.py b/lib/bup/metadata.py
index fb9e17ca..af64b2f3 100644
--- a/lib/bup/metadata.py
+++ b/lib/bup/metadata.py
@@ -244,9 +244,9 @@ class Metadata:
self.size = st.st_size
self.uid = st.st_uid
self.gid = st.st_gid
- self.atime = st.st_atime
- self.mtime = st.st_mtime
- self.ctime = st.st_ctime
+ self.atime = st.st_atime_ns
+ self.mtime = st.st_mtime_ns
+ self.ctime = st.st_ctime_ns
self.user = self.group = b''
entry = pwd_from_uid(st.st_uid)
if entry:
diff --git a/lib/bup/xstat.py b/lib/bup/xstat.py
index 5fbc4a10..6f98d67c 100644
--- a/lib/bup/xstat.py
+++ b/lib/bup/xstat.py
@@ -49,49 +49,30 @@ def lutime(path, times):
"""Times must be provided as (atime_ns, mtime_ns)."""
os.utime(path, ns=times, follow_symlinks=False)

-_cygwin_sys = sys.platform.startswith('cygwin')
-
-def _fix_cygwin_id(id):
- if id < 0:
- id += 0x100000000
- assert(id >= 0)
- return id
-
-
-class stat_result:
- __slots__ = ('st_mode', 'st_ino', 'st_dev', 'st_nlink', 'st_uid', 'st_gid',
- 'st_rdev', 'st_size', 'st_atime', 'st_mtime', 'st_ctime')
- @staticmethod
- def from_py_stat(st):
- result = stat_result()
- result.st_mode = st.st_mode
- result.st_ino = st.st_ino
- result.st_dev = st.st_dev
- result.st_nlink = st.st_nlink
- result.st_uid = st.st_uid
- result.st_gid = st.st_gid
- result.st_rdev = st.st_rdev
- result.st_size = st.st_size
- # Inlined timespec_to_nsecs after profiling
- result.st_atime = st.st_atime_ns
- result.st_mtime = st.st_mtime_ns
- result.st_ctime = st.st_ctime_ns
- if _cygwin_sys:
- result.st_uid = _fix_cygwin_id(result.st_uid)
- result.st_gid = _fix_cygwin_id(result.st_gid)
- return result
-
-
-def stat(path):
- return stat_result.from_py_stat(os.stat(path))
-
-
-def fstat(path):
- return stat_result.from_py_stat(os.fstat(path))
-
-
-def lstat(path):
- return stat_result.from_py_stat(os.lstat(path))
+
+if not sys.platform.startswith('cygwin'):
+ stat = os.stat
+ fstat = os.fstat
+ lstat = os.lstat
+else:
+ # These are potentially redundant until/unless we remove the
+ # metadata _add_common guards (which we could, given that posix
+ # allows negative values).
+ def stat(path):
+ st = os.stat(path)
+ assert st.st_uid >= 0, st
+ assert st.st_gid >= 0, st
+ return st
+ def fstat(path):
+ st = os.fstat(path)
+ assert st.st_uid >= 0, st
+ assert st.st_gid >= 0, st
+ return st
+ def lstat(path):
+ st = os.lstat(path)
+ assert st.st_uid >= 0, st
+ assert st.st_gid >= 0, st
+ return st


def mode_str(mode):
diff --git a/note/main.md b/note/main.md
index 37ec65f6..383b794f 100644
--- a/note/main.md
+++ b/note/main.md
@@ -188,6 +188,13 @@ General
quoted instead of Python "backslashreplace"d, but note that the
format is not settled, i.e. may continue to change.

+* We assume that Cygwin has long fixed the issue where it might return
+ a negative uid or gid, and the workaround has been removed, but bup
+ should still halt if one is encountered. This has allowed us to
+ improve performance when handling a lot of stat results (e.g. index
+ traversals), but please let us know if the assumption is incorrect,
+ and we can reintroduce a more narrowly tailored fix.
+
Bugs
----

--
2.47.3

Reply all
Reply to author
Forward
0 new messages