[PATCH 1/1] Unmap mmapped mem explicitly to free the fd asap.

Luca Carlon

unread,

May 21, 2020, 4:34:57 PM5/21/20

to bup-...@googlegroups.com, Luca Carlon

Signed-off-by: Luca Carlon <carlo...@gmail.com>
---
cmd/bloom-cmd.py | 35 ++++++++--------
cmd/list-idx-cmd.py | 24 +++++------
cmd/margin-cmd.py | 90 ++++++++++++++++++++---------------------
cmd/memtest-cmd.py | 59 +++++++++++++--------------
cmd/midx-cmd.py | 66 +++++++++++++++---------------
cmd/server-cmd.py | 8 ++--
cmd/tag-cmd.py | 8 ++--
lib/bup/gc.py | 63 ++++++++++++++---------------
lib/bup/git.py | 52 +++++++++++++++++++-----
lib/bup/t/tclient.py | 40 +++++++++---------
lib/bup/t/tgit.py | 96 ++++++++++++++++++++++----------------------
11 files changed, 287 insertions(+), 254 deletions(-)

diff --git a/cmd/bloom-cmd.py b/cmd/bloom-cmd.py
index d7537ca..7805792 100755
--- a/cmd/bloom-cmd.py
+++ b/cmd/bloom-cmd.py
@@ -55,9 +55,10 @@ def check_bloom(path, bloomfilename, idx):
idx = os.path.join(path, idx)
log('bloom: bloom file: %s\n' % path_msg(rbloomfilename))
log('bloom: checking %s\n' % path_msg(ridx))
- for objsha in git.open_idx(idx):
- if not b.exists(objsha):
- add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
+ with git.open_idx(idx) as objshas:
+ for objsha in objshas:
+ if not b.exists(objsha):
+ add_error('bloom: ERROR: object %s missing' % hexstr(objsha))

_first = None
@@ -77,14 +78,14 @@ def do_bloom(path, outfilename, k):
rest_count = 0
for i, name in enumerate(glob.glob(b'%s/*.idx' % path)):
progress('bloom: counting: %d\r' % i)
- ix = git.open_idx(name)
- ixbase = os.path.basename(name)
- if b and (ixbase in b.idxnames):
- rest.append(name)
- rest_count += len(ix)
- else:
- add.append(name)
- add_count += len(ix)
+ with git.open_idx(name) as ix:
+ ixbase = os.path.basename(name)
+ if b and (ixbase in b.idxnames):
+ rest.append(name)
+ rest_count += len(ix)
+ else:
+ add.append(name)
+ add_count += len(ix)

if not add:
debug1("bloom: nothing to do.\n")
@@ -128,12 +129,12 @@ def do_bloom(path, outfilename, k):
count = 0
icount = 0
for name in add:
- ix = git.open_idx(name)
- qprogress('bloom: writing %.2f%% (%d/%d objects)\r'
- % (icount*100.0/add_count, icount, add_count))
- b.add_idx(ix)
- count += 1
- icount += len(ix)
+ with git.open_idx(name) as ix:
+ qprogress('bloom: writing %.2f%% (%d/%d objects)\r'
+ % (icount*100.0/add_count, icount, add_count))
+ b.add_idx(ix)
+ count += 1
+ icount += len(ix)

# Currently, there's an open file object for tfname inside b.
# Make sure it's closed before rename.
diff --git a/cmd/list-idx-cmd.py b/cmd/list-idx-cmd.py
index 78bb0a0..67492d3 100755
--- a/cmd/list-idx-cmd.py
+++ b/cmd/list-idx-cmd.py
@@ -48,21 +48,21 @@ count = 0
idxfiles = [argv_bytes(x) for x in extra]
for name in idxfiles:
try:
- ix = git.open_idx(name)
+ with git.open_idx(name) as ix:
+ if len(opt.find) == 40:
+ if ix.exists(bin):
+ out.write(b'%s %s\n' % (name, find))
+ else:
+ # slow, exhaustive search
+ for _i in ix:
+ i = hexlify(_i)
+ if i.startswith(find):
+ out.write(b'%s %s\n' % (name, i))
+ qprogress('Searching: %d\r' % count)
+ count += 1
except git.GitError as e:
add_error('%r: %s' % (name, e))
continue
- if len(opt.find) == 40:
- if ix.exists(bin):
- out.write(b'%s %s\n' % (name, find))
- else:
- # slow, exhaustive search
- for _i in ix:
- i = hexlify(_i)
- if i.startswith(find):
- out.write(b'%s %s\n' % (name, i))
- qprogress('Searching: %d\r' % count)
- count += 1

if saved_errors:
log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
diff --git a/cmd/margin-cmd.py b/cmd/margin-cmd.py
index 14e7cd7..e58e424 100755
--- a/cmd/margin-cmd.py
+++ b/cmd/margin-cmd.py
@@ -28,52 +28,52 @@ if extra:

git.check_repo_or_die()

-mi = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
+with git.open_idx_list(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx) as mi:

-def do_predict(ix, out):
- total = len(ix)
- maxdiff = 0
- for count,i in enumerate(ix):
- prefix = struct.unpack('!Q', i[:8])[0]
- expected = prefix * total // (1 << 64)
- diff = count - expected
- maxdiff = max(maxdiff, abs(diff))
- out.write(b'%d of %d (%.3f%%) '
- % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
- out.flush()
- assert(count+1 == len(ix))
+ def do_predict(ix, out):
+ total = len(ix)
+ maxdiff = 0
+ for count,i in enumerate(ix):
+ prefix = struct.unpack('!Q', i[:8])[0]
+ expected = prefix * total // (1 << 64)
+ diff = count - expected
+ maxdiff = max(maxdiff, abs(diff))
+ out.write(b'%d of %d (%.3f%%) '
+ % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
+ out.flush()
+ assert(count+1 == len(ix))

-sys.stdout.flush()
-out = byte_stream(sys.stdout)
+ sys.stdout.flush()
+ out = byte_stream(sys.stdout)

-if opt.predict:
- if opt.ignore_midx:
- for pack in mi.packs:
- do_predict(pack, out)
+ if opt.predict:
+ if opt.ignore_midx:
+ for pack in mi.packs:
+ do_predict(pack, out)
+ else:
+ do_predict(mi, out)
else:
- do_predict(mi, out)
-else:
- # default mode: find longest matching prefix
- last = b'\0'*20
- longmatch = 0
- for i in mi:
- if i == last:
- continue
- #assert(str(i) >= last)
- pm = _helpers.bitmatch(last, i)
- longmatch = max(longmatch, pm)
- last = i
- out.write(b'%d\n' % longmatch)
- log('%d matching prefix bits\n' % longmatch)
- doublings = math.log(len(mi), 2)
- bpd = longmatch / doublings
- log('%.2f bits per doubling\n' % bpd)
- remain = 160 - longmatch
- rdoublings = remain / bpd
- log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings))
- larger = 2**rdoublings
- log('%g times larger is possible\n' % larger)
- perperson = larger/POPULATION_OF_EARTH
- log('\nEveryone on earth could have %d data sets like yours, all in one\n'
- 'repository, and we would expect 1 object collision.\n'
- % int(perperson))
+ # default mode: find longest matching prefix
+ last = b'\0'*20
+ longmatch = 0
+ for i in mi:
+ if i == last:
+ continue
+ #assert(str(i) >= last)
+ pm = _helpers.bitmatch(last, i)
+ longmatch = max(longmatch, pm)
+ last = i
+ out.write(b'%d\n' % longmatch)
+ log('%d matching prefix bits\n' % longmatch)
+ doublings = math.log(len(mi), 2)
+ bpd = longmatch / doublings
+ log('%.2f bits per doubling\n' % bpd)
+ remain = 160 - longmatch
+ rdoublings = remain / bpd
+ log('%d bits (%.2f doublings) remaining\n' % (remain, rdoublings))
+ larger = 2**rdoublings
+ log('%g times larger is possible\n' % larger)
+ perperson = larger/POPULATION_OF_EARTH
+ log('\nEveryone on earth could have %d data sets like yours, all in one\n'
+ 'repository, and we would expect 1 object collision.\n'
+ % int(perperson))
diff --git a/cmd/memtest-cmd.py b/cmd/memtest-cmd.py
index bf5f0d5..2025902 100755
--- a/cmd/memtest-cmd.py
+++ b/cmd/memtest-cmd.py
@@ -86,36 +86,35 @@ if extra:
o.fatal('no arguments expected')

git.check_repo_or_die()
-m = git.PackIdxList(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx)
-
-sys.stdout.flush()
-out = byte_stream(sys.stdout)
-
-report(-1, out)
-_helpers.random_sha()
-report(0, out)
-
-if opt.existing:
- def foreverit(mi):
- while 1:
- for e in mi:
- yield e
- objit = iter(foreverit(m))
-
-for c in range(opt.cycles):
- for n in range(opt.number):
- if opt.existing:
- bin = next(objit)
- assert(m.exists(bin))
- else:
- bin = _helpers.random_sha()
-
- # technically, a randomly generated object id might exist.
- # but the likelihood of that is the likelihood of finding
- # a collision in sha-1 by accident, which is so unlikely that
- # we don't care.
- assert(not m.exists(bin))
- report((c+1)*opt.number, out)
+with git.open_idx_list(git.repo(b'objects/pack'), ignore_midx=opt.ignore_midx) as m:
+ sys.stdout.flush()
+ out = byte_stream(sys.stdout)
+
+ report(-1, out)
+ _helpers.random_sha()
+ report(0, out)
+
+ if opt.existing:
+ def foreverit(mi):
+ while 1:
+ for e in mi:
+ yield e
+ objit = iter(foreverit(m))
+
+ for c in range(opt.cycles):
+ for n in range(opt.number):
+ if opt.existing:
+ bin = next(objit)
+ assert(m.exists(bin))
+ else:
+ bin = _helpers.random_sha()
+
+ # technically, a randomly generated object id might exist.
+ # but the likelihood of that is the likelihood of finding
+ # a collision in sha-1 by accident, which is so unlikely that
+ # we don't care.
+ assert(not m.exists(bin))
+ report((c+1)*opt.number, out)

if bloom._total_searches:
out.write(b'bloom: %d objects searched in %d steps: avg %.3f steps/object\n'
diff --git a/cmd/midx-cmd.py b/cmd/midx-cmd.py
index cadf7c3..097fa19 100755
--- a/cmd/midx-cmd.py
+++ b/cmd/midx-cmd.py
@@ -53,36 +53,36 @@ def check_midx(name):
nicename = git.repo_rel(name)
log('Checking %s.\n' % path_msg(nicename))
try:
- ix = git.open_idx(name)
+ with git.open_idx(name) as ix:
+ for count,subname in enumerate(ix.idxnames):
+ with git.open_idx(os.path.join(os.path.dirname(name), subname)) as sub:
+ for ecount,e in enumerate(sub):
+ if not (ecount % 1234):
+ qprogress(' %d/%d: %s %d/%d\r'
+ % (count, len(ix.idxnames),
+ git.shorten_hash(subname).decode('ascii'),
+ ecount, len(sub)))
+ if not sub.exists(e):
+ add_error("%s: %s: %s missing from idx"
+ % (path_msg(nicename),
+ git.shorten_hash(subname).decode('ascii'),
+ hexstr(e)))
+ if not ix.exists(e):
+ add_error("%s: %s: %s missing from midx"
+ % (path_msg(nicename),
+ git.shorten_hash(subname).decode('ascii'),
+ hexstr(e)))
+ prev = None
+ for ecount,e in enumerate(ix):
+ if not (ecount % 1234):
+ qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
+ if e and prev and not e >= prev:
+ add_error('%s: ordering error: %s < %s'
+ % (nicename, hexstr(e), hexstr(prev)))
+ prev = e
except git.GitError as e:
add_error('%s: %s' % (pathmsg(name), e))
return
- for count,subname in enumerate(ix.idxnames):
- sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
- for ecount,e in enumerate(sub):
- if not (ecount % 1234):
- qprogress(' %d/%d: %s %d/%d\r'
- % (count, len(ix.idxnames),
- git.shorten_hash(subname).decode('ascii'),
- ecount, len(sub)))
- if not sub.exists(e):
- add_error("%s: %s: %s missing from idx"
- % (path_msg(nicename),
- git.shorten_hash(subname).decode('ascii'),
- hexstr(e)))
- if not ix.exists(e):
- add_error("%s: %s: %s missing from midx"
- % (path_msg(nicename),
- git.shorten_hash(subname).decode('ascii'),
- hexstr(e)))
- prev = None
- for ecount,e in enumerate(ix):
- if not (ecount % 1234):
- qprogress(' Ordering: %d/%d\r' % (ecount, len(ix)))
- if e and prev and not e >= prev:
- add_error('%s: ordering error: %s < %s'
- % (nicename, hexstr(e), hexstr(prev)))
- prev = e

_first = None
@@ -99,7 +99,7 @@ def _do_midx(outdir, outfilename, infilenames, prefixstr):
midxs = []
try:
for name in infilenames:
- ix = git.open_idx(name)
+ ix = git.open_idx_noctx(name)
midxs.append(ix)
inp.append((
ix.map,
@@ -180,9 +180,9 @@ def do_midx_dir(path, outfilename, prout):
midxs = glob.glob(b'%s/*.midx' % path)
contents = {}
for mname in midxs:
- m = git.open_idx(mname)
- contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
- sizes[mname] = len(m)
+ with git.open_idx(mname) as m:
+ contents[mname] = [(b'%s/%s' % (path,i)) for i in m.idxnames]
+ sizes[mname] = len(m)

# sort the biggest+newest midxes first, so that we can eliminate
# smaller (or older) redundant ones that come later in the list
@@ -203,8 +203,8 @@ def do_midx_dir(path, outfilename, prout):
idxs = [k for k in glob.glob(b'%s/*.idx' % path) if not already.get(k)]

for iname in idxs:
- i = git.open_idx(iname)
- sizes[iname] = len(i)
+ with git.open_idx(iname) as i:
+ sizes[iname] = len(i)

all = [(sizes[n],n) for n in (midxs + idxs)]

diff --git a/cmd/server-cmd.py b/cmd/server-cmd.py
index 2c8cc05..dea61f2 100755
--- a/cmd/server-cmd.py
+++ b/cmd/server-cmd.py
@@ -78,10 +78,10 @@ def send_index(conn, name):
_init_session()
assert name.find(b'/') < 0
assert name.endswith(b'.idx')
- idx = git.open_idx(git.repo(b'objects/pack/%s' % name))
- conn.write(struct.pack('!I', len(idx.map)))
- conn.write(idx.map)
- conn.ok()
+ with git.open_idx(git.repo(b'objects/pack/%s' % name)) as idx:
+ conn.write(struct.pack('!I', len(idx.map)))
+ conn.write(idx.map)
+ conn.ok()

def receive_objects_v2(conn, junk):
diff --git a/cmd/tag-cmd.py b/cmd/tag-cmd.py
index 44c5b33..b9b040d 100755
--- a/cmd/tag-cmd.py
+++ b/cmd/tag-cmd.py
@@ -80,10 +80,10 @@ if not hash:
log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
sys.exit(2)

-pL = git.PackIdxList(git.repo(b'objects/pack'))
-if not pL.exists(hash):
- log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
- sys.exit(2)
+with git.open_idx_list(git.repo(b'objects/pack')) as pL:
+ if not pL.exists(hash):
+ log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
+ sys.exit(2)

tag_file = git.repo(b'refs/tags/' + tag_name)
try:
diff --git a/lib/bup/gc.py b/lib/bup/gc.py
index b23029d..840f0b4 100644
--- a/lib/bup/gc.py
+++ b/lib/bup/gc.py
@@ -57,8 +57,8 @@ def count_objects(dir, verbosity):
log('found %d objects (%d/%d %s)\r'
% (object_count, i + 1, len(indexes),
path_msg(basename(idx_name))))
- idx = git.open_idx(idx_name)
- object_count += len(idx)
+ with git.open_idx(idx_name) as idx:
+ object_count += len(idx)
return object_count

@@ -163,40 +163,39 @@ def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
if verbosity:
qprogress('preserving live data (%d%% complete)\r'
% ((float(collect_count) / existing_count) * 100))
- idx = git.open_idx(idx_name)
+ with git.open_idx(idx_name) as idx:
+ idx_live_count = 0
+ for sha in idx:
+ if live_objects.exists(sha):
+ idx_live_count += 1
+
+ collect_count += idx_live_count
+ if idx_live_count == 0:
+ if verbosity:
+ log('deleting %s\n'
+ % path_msg(git.repo_rel(basename(idx_name))))
+ ns.stale_files.append(idx_name)
+ ns.stale_files.append(idx_name[:-3] + b'pack')
+ continue
+
+ live_frac = idx_live_count / float(len(idx))
+ if live_frac > ((100 - threshold) / 100.0):
+ if verbosity:
+ log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
+ live_frac * 100))
+ continue

- idx_live_count = 0
- for sha in idx:
- if live_objects.exists(sha):
- idx_live_count += 1
-
- collect_count += idx_live_count
- if idx_live_count == 0:
if verbosity:
- log('deleting %s\n'
- % path_msg(git.repo_rel(basename(idx_name))))
+ log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
+ live_frac * 100))
+ for sha in idx:
+ if live_objects.exists(sha):
+ item_it = cat_pipe.get(hexlify(sha))
+ _, typ, _ = next(item_it)
+ writer.just_write(sha, typ, b''.join(item_it))
+
ns.stale_files.append(idx_name)
ns.stale_files.append(idx_name[:-3] + b'pack')
- continue
-
- live_frac = idx_live_count / float(len(idx))
- if live_frac > ((100 - threshold) / 100.0):
- if verbosity:
- log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
- live_frac * 100))
- continue
-
- if verbosity:
- log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
- live_frac * 100))
- for sha in idx:
- if live_objects.exists(sha):
- item_it = cat_pipe.get(hexlify(sha))
- _, typ, _ = next(item_it)
- writer.just_write(sha, typ, b''.join(item_it))
-
- ns.stale_files.append(idx_name)
- ns.stale_files.append(idx_name[:-3] + b'pack')

if verbosity:
progress('preserving live data (%d%% complete)\n'
diff --git a/lib/bup/git.py b/lib/bup/git.py
index dd78aa4..af5c1ea 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -10,6 +10,7 @@ from binascii import hexlify, unhexlify
from collections import namedtuple
from itertools import islice
from numbers import Integral
+from contextlib import contextmanager

from bup import _helpers, compat, hashsplit, path, midx, bloom, xstat
from bup.compat import (buffer,
@@ -434,6 +435,11 @@ class PackIdxV1(PackIdx):
for ofs in range(start, start + 24 * self.nsha, 24):
yield self.map[ofs : ofs + 20]

+ def close(self):
+ if self.map is not None:
+ self.map.close()
+ self.map = None
+

class PackIdxV2(PackIdx):
"""Object representation of a Git pack index (version 2) file."""
@@ -477,6 +483,11 @@ class PackIdxV2(PackIdx):
for ofs in range(start, start + 20 * self.nsha, 20):
yield self.map[ofs : ofs + 20]

+ def close(self):
+ if self.map is not None:
+ self.map.close()
+ self.map = None
+

_mpi_count = 0
class PackIdxList:
@@ -602,7 +613,7 @@ class PackIdxList:
for full in glob.glob(os.path.join(self.dir, b'*.idx')):
if not d.get(full):
try:
- ix = open_idx(full)
+ ix = open_idx_noctx(full)
except GitError as e:
add_error(e)
continue
@@ -622,9 +633,23 @@ class PackIdxList:
def add(self, hash):
"""Insert an additional object in the list."""
self.also.add(hash)
+
+ def close(self):
+ if self.packs is not None:
+ for pack in self.packs:
+ pack.close()

+@contextmanager
def open_idx(filename):
+ idx = open_idx_noctx(filename)
+ try:
+ yield idx
+ finally:
+ idx.close()
+
+
+def open_idx_noctx(filename):
if filename.endswith(b'.idx'):
f = open(filename, 'rb')
header = f.read(8)
@@ -646,6 +671,15 @@ def open_idx(filename):
raise GitError('idx filenames must end with .idx or .midx')

+@contextmanager
+def open_idx_list(dir, ignore_midx=False):
+ pack = PackIdxList(dir, ignore_midx)
+ try:
+ yield pack
+ finally:
+ pack.close()
+
+
def idxmerge(idxlist, final_progress=True):
"""Generate a list of all the objects reachable in a PackIdxList."""
def pfunc(count, total):
@@ -1065,16 +1099,16 @@ def rev_parse(committish, repo_dir=None):
debug2("resolved from ref: commit = %s\n" % hexlify(head))
return head

- pL = PackIdxList(repo(b'objects/pack', repo_dir=repo_dir))
+ with open_idx_list(repo(b'objects/pack', repo_dir=repo_dir)) as pL:

- if len(committish) == 40:
- try:
- hash = unhexlify(committish)
- except TypeError:
- return None
+ if len(committish) == 40:
+ try:
+ hash = unhexlify(committish)
+ except TypeError:
+ return None

- if pL.exists(hash):
- return hash
+ if pL.exists(hash):
+ return hash

return None

diff --git a/lib/bup/t/tclient.py b/lib/bup/t/tclient.py
index afbb09f..d5453af 100644
--- a/lib/bup/t/tclient.py
+++ b/lib/bup/t/tclient.py
@@ -117,26 +117,26 @@ def test_midx_refreshing():
p2name = os.path.join(c.cachedir, p2base)
del rw

- pi = git.PackIdxList(bupdir + b'/objects/pack')
- WVPASSEQ(len(pi.packs), 2)
- pi.refresh()
- WVPASSEQ(len(pi.packs), 2)
- WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]),
- sorted([p1base, p2base]))
-
- p1 = git.open_idx(p1name)
- WVPASS(p1.exists(s1sha))
- p2 = git.open_idx(p2name)
- WVFAIL(p2.exists(s1sha))
- WVPASS(p2.exists(s2sha))
-
- subprocess.call([path.exe(), b'midx', b'-f'])
- pi.refresh()
- WVPASSEQ(len(pi.packs), 1)
- pi.refresh(skip_midx=True)
- WVPASSEQ(len(pi.packs), 2)
- pi.refresh(skip_midx=False)
- WVPASSEQ(len(pi.packs), 1)
+ with git.open_idx_list(bupdir + b'/objects/pack') as pi:
+ WVPASSEQ(len(pi.packs), 2)
+ pi.refresh()
+ WVPASSEQ(len(pi.packs), 2)
+ WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]),
+ sorted([p1base, p2base]))
+
+ with git.open_idx(p1name) as p1:
+ WVPASS(p1.exists(s1sha))
+ with git.open_idx(p2name) as p2:
+ WVFAIL(p2.exists(s1sha))
+ WVPASS(p2.exists(s2sha))
+
+ subprocess.call([path.exe(), b'midx', b'-f'])
+ pi.refresh()
+ WVPASSEQ(len(pi.packs), 1)
+ pi.refresh(skip_midx=True)
+ WVPASSEQ(len(pi.packs), 2)
+ pi.refresh(skip_midx=False)
+ WVPASSEQ(len(pi.packs), 1)

@wvtest
diff --git a/lib/bup/t/tgit.py b/lib/bup/t/tgit.py
index 09faa2e..724c7fd 100644
--- a/lib/bup/t/tgit.py
+++ b/lib/bup/t/tgit.py
@@ -147,24 +147,24 @@ def testpacks():
WVPASS(os.path.exists(nameprefix + b'.pack'))
WVPASS(os.path.exists(nameprefix + b'.idx'))

- r = git.open_idx(nameprefix + b'.idx')
- print(repr(r.fanout))
+ with git.open_idx(nameprefix + b'.idx') as r:
+ print(repr(r.fanout))

- for i in range(nobj):
- WVPASS(r.find_offset(hashes[i]) > 0)
- WVPASS(r.exists(hashes[99]))
- WVFAIL(r.exists(b'\0'*20))
+ for i in range(nobj):
+ WVPASS(r.find_offset(hashes[i]) > 0)
+ WVPASS(r.exists(hashes[99]))
+ WVFAIL(r.exists(b'\0'*20))

- pi = iter(r)
- for h in sorted(hashes):
- WVPASSEQ(hexlify(next(pi)), hexlify(h))
+ pi = iter(r)
+ for h in sorted(hashes):
+ WVPASSEQ(hexlify(next(pi)), hexlify(h))

- WVFAIL(r.find_offset(b'\0'*20))
+ WVFAIL(r.find_offset(b'\0'*20))

- r = git.PackIdxList(bupdir + b'/objects/pack')
- WVPASS(r.exists(hashes[5]))
- WVPASS(r.exists(hashes[6]))
- WVFAIL(r.exists(b'\0'*20))
+ with git.open_idx_list(bupdir + b'/objects/pack') as r:
+ WVPASS(r.exists(hashes[5]))
+ WVPASS(r.exists(hashes[6]))
+ WVFAIL(r.exists(b'\0'*20))

@wvtest
@@ -186,11 +186,11 @@ def test_pack_name_lookup():
log('\n')
idxnames.append(os.path.basename(w.close() + b'.idx'))

- r = git.PackIdxList(packdir)
- WVPASSEQ(len(r.packs), 2)
- for e,idxname in enumerate(idxnames):
- for i in range(e*2, (e+1)*2):
- WVPASSEQ(idxname, r.exists(hashes[i], want_source=True))
+ with git.open_idx_list(packdir) as r:
+ WVPASSEQ(len(r.packs), 2)
+ for e,idxname in enumerate(idxnames):
+ for i in range(e*2, (e+1)*2):
+ WVPASSEQ(idxname, r.exists(hashes[i], want_source=True))

@wvtest
@@ -516,32 +516,32 @@ def test_midx_close():
for i in range(10):
_create_idx(tmpdir, i)
git.auto_midx(tmpdir)
- l = git.PackIdxList(tmpdir)
- # this doesn't exist (yet)
- WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0)))
- for i in range(10, 15):
- _create_idx(tmpdir, i)
- # delete the midx ...
- # TODO: why do we need to? git.auto_midx() below doesn't?!
- for fn in os.listdir(tmpdir):
- if fn.endswith(b'.midx'):
- os.unlink(os.path.join(tmpdir, fn))
- # and make a new one
- git.auto_midx(tmpdir)
- # check it still doesn't exist - we haven't refreshed
- WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0)))
- # check that we still have the midx open, this really
- # just checks more for the kernel API ('deleted' string)
- for fn in openfiles():
- if not b'midx-' in fn:
- continue
- WVPASSEQ(True, b'deleted' in fn)
- # refresh the PackIdxList
- l.refresh()
- # and check that an object in pack 10 exists now
- WVPASSEQ(True, l.exists(struct.pack('18xBB', 10, 0)))
- for fn in openfiles():
- if not b'midx-' in fn:
- continue
- # check that we don't have it open anymore
- WVPASSEQ(False, b'deleted' in fn)
+ with git.open_idx_list(tmpdir) as l:
+ # this doesn't exist (yet)
+ WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0)))
+ for i in range(10, 15):
+ _create_idx(tmpdir, i)
+ # delete the midx ...
+ # TODO: why do we need to? git.auto_midx() below doesn't?!
+ for fn in os.listdir(tmpdir):
+ if fn.endswith(b'.midx'):
+ os.unlink(os.path.join(tmpdir, fn))
+ # and make a new one
+ git.auto_midx(tmpdir)
+ # check it still doesn't exist - we haven't refreshed
+ WVPASSEQ(None, l.exists(struct.pack('18xBB', 10, 0)))
+ # check that we still have the midx open, this really
+ # just checks more for the kernel API ('deleted' string)
+ for fn in openfiles():
+ if not b'midx-' in fn:
+ continue
+ WVPASSEQ(True, b'deleted' in fn)
+ # refresh the PackIdxList
+ l.refresh()
+ # and check that an object in pack 10 exists now
+ WVPASSEQ(True, l.exists(struct.pack('18xBB', 10, 0)))
+ for fn in openfiles():
+ if not b'midx-' in fn:
+ continue
+ # check that we don't have it open anymore
+ WVPASSEQ(False, b'deleted' in fn)
--
2.25.1

Rob Browning

unread,

May 23, 2020, 8:39:56 PM5/23/20

to Luca Carlon, bup-...@googlegroups.com

Luca Carlon <carlo...@gmail.com> writes:

> Signed-off-by: Luca Carlon <carlo...@gmail.com>

Pushed to master with some minor adjustments by Johannes, and
cherry-picked to 0.30.x by me.

Thanks to you both.

commit e059fb80731c631d364f4c56e7202ed16e98ad84 (rlb-u/tmp/pending, tmp/pending)
Author: Luca Carlon <carlo...@gmail.com>
Date: Thu May 21 22:28:41 2020 +0200

git/midx: provide context managers for idx classes

Opening files and then mmap()ing them keeps the files open at the
filesystem level, and then they cannot be fully removed until the
fd is closed when giving up the mapping.

On most filesystems, the file still exists but is no longer visible
ion this case. However, at least on CIFS this results in the file
still being visible in the folder, but it can no longer be opened
again, or such. This leads to a crash in 'bup gc' because it wants
to re-evaluate the idx it just tried to delete.

Teach the PackIdx classes the context manager protocol so we can
easily unmap once they're no longer needed, and use that in bup gc
(for now only there).

For consistency, already add the context manager protocol also to
the midx, even if it's not strictly needed yet since bup gc won't
actually do this to an midx.

Signed-off-by: Luca Carlon <carlo...@gmail.com>
[add commit message based on error description, add midx part,
remove shatable to avoid live pointers into unmapped region]
Signed-off-by: Johannes Berg <joha...@sipsolutions.net>
Reviewed-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4

Rob Browning

unread,

May 23, 2020, 8:42:18 PM5/23/20

to Luca Carlon, bup-...@googlegroups.com

Rob Browning <r...@defaultvalue.org> writes:

> Luca Carlon <carlo...@gmail.com> writes:
>
>> Signed-off-by: Luca Carlon <carlo...@gmail.com>
>
> Pushed to master with some minor adjustments by Johannes, and
> cherry-picked to 0.30.x by me.

Oops, replied to wrong thread - this one hasn't been pushed yet.

Reply all

Reply to author

Forward