[PATCH 1/1] Use an optional context manager on the packidx class.

2 views
Skip to first unread message

Luca Carlon

unread,
May 21, 2020, 4:28:47 PM5/21/20
to bup-...@googlegroups.com, Luca Carlon
Signed-off-by: Luca Carlon <carlo...@gmail.com>
---
lib/bup/gc.py | 59 +++++++++++++++++++++++++-------------------------
lib/bup/git.py | 22 +++++++++++++++++++
2 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/lib/bup/gc.py b/lib/bup/gc.py
index b23029d..e663cc0 100644
--- a/lib/bup/gc.py
+++ b/lib/bup/gc.py
@@ -163,40 +163,39 @@ def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
if verbosity:
qprogress('preserving live data (%d%% complete)\r'
% ((float(collect_count) / existing_count) * 100))
- idx = git.open_idx(idx_name)
-
- idx_live_count = 0
- for sha in idx:
- if live_objects.exists(sha):
- idx_live_count += 1
+ with git.open_idx(idx_name) as idx:
+ idx_live_count = 0
+ for sha in idx:
+ if live_objects.exists(sha):
+ idx_live_count += 1
+
+ collect_count += idx_live_count
+ if idx_live_count == 0:
+ if verbosity:
+ log('deleting %s\n'
+ % path_msg(git.repo_rel(basename(idx_name))))
+ ns.stale_files.append(idx_name)
+ ns.stale_files.append(idx_name[:-3] + b'pack')
+ continue
+
+ live_frac = idx_live_count / float(len(idx))
+ if live_frac > ((100 - threshold) / 100.0):
+ if verbosity:
+ log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
+ live_frac * 100))
+ continue

- collect_count += idx_live_count
- if idx_live_count == 0:
if verbosity:
- log('deleting %s\n'
- % path_msg(git.repo_rel(basename(idx_name))))
+ log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
+ live_frac * 100))
+ for sha in idx:
+ if live_objects.exists(sha):
+ item_it = cat_pipe.get(hexlify(sha))
+ _, typ, _ = next(item_it)
+ writer.just_write(sha, typ, b''.join(item_it))
+
ns.stale_files.append(idx_name)
ns.stale_files.append(idx_name[:-3] + b'pack')
- continue
-
- live_frac = idx_live_count / float(len(idx))
- if live_frac > ((100 - threshold) / 100.0):
- if verbosity:
- log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
- live_frac * 100))
- continue
-
- if verbosity:
- log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
- live_frac * 100))
- for sha in idx:
- if live_objects.exists(sha):
- item_it = cat_pipe.get(hexlify(sha))
- _, typ, _ = next(item_it)
- writer.just_write(sha, typ, b''.join(item_it))
-
- ns.stale_files.append(idx_name)
- ns.stale_files.append(idx_name[:-3] + b'pack')

if verbosity:
progress('preserving live data (%d%% complete)\n'
diff --git a/lib/bup/git.py b/lib/bup/git.py
index dd78aa4..ead9140 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -414,6 +414,12 @@ class PackIdxV1(PackIdx):
# Avoid slicing shatable for individual hashes (very high overhead)
self.shatable = buffer(self.map, self.sha_ofs, self.nsha * 24)

+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self.close()
+
def __len__(self):
return int(self.nsha) # int() from long for python 2

@@ -433,6 +439,11 @@ class PackIdxV1(PackIdx):
start = self.sha_ofs + 4
for ofs in range(start, start + 24 * self.nsha, 24):
yield self.map[ofs : ofs + 20]
+
+ def close(self):
+ if self.map is not None:
+ self.map.close()
+ self.map = None


class PackIdxV2(PackIdx):
@@ -451,6 +462,12 @@ class PackIdxV2(PackIdx):
self.ofs64table_ofs = self.ofstable_ofs + self.nsha * 4
# Avoid slicing this for individual hashes (very high overhead)
self.shatable = buffer(self.map, self.sha_ofs, self.nsha*20)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, *args):
+ self.close()

def __len__(self):
return int(self.nsha) # int() from long for python 2
@@ -476,6 +493,11 @@ class PackIdxV2(PackIdx):
start = self.sha_ofs
for ofs in range(start, start + 20 * self.nsha, 20):
yield self.map[ofs : ofs + 20]
+
+ def close(self):
+ if self.map is not None:
+ self.map.close()
+ self.map = None


_mpi_count = 0
--
2.25.1

Rob Browning

unread,
May 23, 2020, 8:41:27 PM5/23/20
to Luca Carlon, bup-...@googlegroups.com
Luca Carlon <carlo...@gmail.com> writes:

> Signed-off-by: Luca Carlon <carlo...@gmail.com>

Pushed to master with some minor adjustments by Johannes, and
cherry-picked to 0.30.x by me.

Thanks to you both.

commit e059fb80731c631d364f4c56e7202ed16e98ad84 (rlb-u/tmp/pending, tmp/pending)
Author: Luca Carlon <carlo...@gmail.com>
Date: Thu May 21 22:28:41 2020 +0200

git/midx: provide context managers for idx classes

Opening files and then mmap()ing them keeps the files open at the
filesystem level, and then they cannot be fully removed until the
fd is closed when giving up the mapping.

On most filesystems, the file still exists but is no longer visible
ion this case. However, at least on CIFS this results in the file
still being visible in the folder, but it can no longer be opened
again, or such. This leads to a crash in 'bup gc' because it wants
to re-evaluate the idx it just tried to delete.

Teach the PackIdx classes the context manager protocol so we can
easily unmap once they're no longer needed, and use that in bup gc
(for now only there).

For consistency, already add the context manager protocol also to
the midx, even if it's not strictly needed yet since bup gc won't
actually do this to an midx.

Signed-off-by: Luca Carlon <carlo...@gmail.com>
[add commit message based on error description, add midx part,
remove shatable to avoid live pointers into unmapped region]
Signed-off-by: Johannes Berg <joha...@sipsolutions.net>
Reviewed-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4
Reply all
Reply to author
Forward
0 new messages