Use an ExitStack to protect/handle cleanup both in init and close (and
__del__). Previously exceptions thrown, particularly in init, could
leave the mmap/file open. Switch to a new _open member as the sole
indicator that the instance is viable so we can avoid needing to
reset multiple variables when cleaning up.
Signed-off-by: Rob Browning <
r...@defaultvalue.org>
Tested-by: Rob Browning <
r...@defaultvalue.org>
---
lib/bup/bloom.py | 147 +++++++++++++++++++++++------------------------
1 file changed, 73 insertions(+), 74 deletions(-)
diff --git a/lib/bup/bloom.py b/lib/bup/bloom.py
index e8b90420..ce857745 100644
--- a/lib/bup/bloom.py
+++ b/lib/bup/bloom.py
@@ -105,79 +105,72 @@ class _ShaBloom:
"""Wrapper which contains data from multiple index files. """
def __init__(self, file, *, readwrite=False, expected=-1):
# Takes ownership of file (eventually closing it)
- assert file.name.endswith(b'.bloom')
- self.readwrite = readwrite
- self.file = file
- self.map = None
- if not readwrite:
- self.map = mmap_read(self.file)
- else:
- assert set(file.mode) == set('r+b'), file.mode
- assert expected > 0
- file.seek(0)
- # Decide if we want to mmap() the pages as writable ('immediate'
- # write) or else map them privately for later writing back to
- # the file ('delayed' write). A bloom table's write access
- # pattern is such that we dirty almost all the pages after adding
- # very few entries. But the table is so big that dirtying
- # *all* the pages often exceeds Linux's default
- # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
- # thus causing it to start flushing the table before we're
- # finished... even though there's more than enough space to
- # store the bloom table in RAM.
- #
- # To work around that behaviour, if we calculate that we'll
- # probably end up touching the whole table anyway (at least
- # one bit flipped per memory page), let's use a "private" mmap,
- # which defeats Linux's ability to flush it to disk. Then we'll
- # flush it as one big lump during close().
- pages = os.fstat(file.fileno()).st_size // 4096 * 5 # assume k=5
- self.delaywrite = expected > pages
- debug1('bloom: delaywrite=%r\n' % self.delaywrite)
- if self.delaywrite:
- self.map = mmap_readwrite_private(self.file, close=False)
+ self._open = False
+ self._ctx = ExitStack()
+ with self._ctx:
+ self._ctx.enter_context(file)
+ assert file.name.endswith(b'.bloom')
+ self.readwrite = readwrite
+
+ self.file = file
+ self.map = None
+ if not readwrite:
+ self.map = self._ctx.enter_context(mmap_read(self.file))
else:
- self.map = mmap_readwrite(self.file, close=False)
- got = self.map[0:4]
- if got != b'BLOM':
- log(f'Warning: invalid BLOM header ({pm(got)}) in {pm(
file.name)}\n')
- self._init_failed()
- return
- ver = struct.unpack('!I', self.map[4:8])[0]
- if ver < BLOOM_VERSION:
- log(f'Warning: ignoring old-style (v{ver}) bloom {pm(
file.name)}\n')
- self._init_failed()
- return
- if ver > BLOOM_VERSION:
- log(f'Warning: ignoring too-new (v{ver}) bloom {pm(
file.name)}\n')
- self._init_failed()
- return
-
- self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
- idxnamestr = self.map[16 + 2**self.bits:]
- if idxnamestr:
- self.idxnames = idxnamestr.split(b'\0')
- else:
- self.idxnames = []
-
- def _init_failed(self):
- self.idxnames = []
- self.bits = self.entries = 0
- self.map, tmp_map = None, self.map
- self.file, tmp_file = None, self.file
- try:
- if tmp_map:
- tmp_map.close()
- finally:
- if self.file:
- tmp_file.close()
-
- def valid(self):
- return self.map and self.bits
+ assert set(file.mode) == set('r+b'), file.mode
+ assert expected > 0
+ file.seek(0)
+ # Decide if we want to mmap() the pages as writable ('immediate'
+ # write) or else map them privately for later writing back to
+ # the file ('delayed' write). A bloom table's write access
+ # pattern is such that we dirty almost all the pages after adding
+ # very few entries. But the table is so big that dirtying
+ # *all* the pages often exceeds Linux's default
+ # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
+ # thus causing it to start flushing the table before we're
+ # finished... even though there's more than enough space to
+ # store the bloom table in RAM.
+ #
+ # To work around that behaviour, if we calculate that we'll
+ # probably end up touching the whole table anyway (at least
+ # one bit flipped per memory page), let's use a "private" mmap,
+ # which defeats Linux's ability to flush it to disk. Then we'll
+ # flush it as one big lump during close().
+ pages = os.fstat(file.fileno()).st_size // 4096 * 5 # assume k=5
+ self.delaywrite = expected > pages
+ debug1('bloom: delaywrite=%r\n' % self.delaywrite)
+ if self.delaywrite:
+ self.map = mmap_readwrite_private(self.file, close=False)
+ else:
+ self.map = mmap_readwrite(self.file, close=False)
+ self._ctx.enter_context(self.map)
+
+ got = self.map[0:4]
+ if got != b'BLOM':
+ log(f'Warning: invalid BLOM header ({pm(got)}) in {pm(
file.name)}\n')
+ return
+ ver = struct.unpack('!I', self.map[4:8])[0]
+ if ver < BLOOM_VERSION:
+ log(f'Warning: ignoring old-style (v{ver}) bloom {pm(
file.name)}\n')
+ return
+ if ver > BLOOM_VERSION:
+ log(f'Warning: ignoring too-new (v{ver}) bloom {pm(
file.name)}\n')
+ return
+
+ self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
+ idxnamestr = self.map[16 + 2**self.bits:]
+ if idxnamestr:
+ self.idxnames = idxnamestr.split(b'\0')
+ else:
+ self.idxnames = []
+ self._open = True
+ self._ctx = self._ctx.pop_all()
def close(self):
- self.closed = True
- try:
+ if not self._open:
+ return
+ with self._ctx:
+ self._open = False
if self.map and self.readwrite:
debug2("bloom: closing with %d entries\n" % self.entries)
self.map[12:16] = struct.pack('!I', self.entries)
@@ -189,14 +182,17 @@ class _ShaBloom:
self.file.seek(16 + 2**self.bits)
if self.idxnames:
self.file.write(b'\0'.join(self.idxnames))
- finally: # This won't handle pending exceptions correctly in py2
- self._init_failed()
- def __del__(self): assert self.closed
+ def __del__(self): assert not self._open
def __enter__(self): return self
def __exit__(self, type, value, traceback): self.close()
+ def valid(self):
+ assert self._open
+ return self.map and self.bits
+
def pfalse_positive(self, additional=0):
+ assert self._open
n = self.entries + additional
m = 8*2**self.bits
k = self.k
@@ -204,12 +200,13 @@ class _ShaBloom:
def add(self, ids):
"""Add the hashes in ids (packed binary 20-bytes) to the filter."""
- if not self.map:
+ if not self._open:
raise Exception("Cannot add to closed bloom")
self.entries += bloom_add(self.map, ids, self.bits, self.k)
def add_idx(self, ix):
"""Add the object to the filter."""
+ assert self._open
self.add(ix.shatable)
self.idxnames.append(os.path.basename(
ix.name))
@@ -220,6 +217,7 @@ class _ShaBloom:
If it returns true, there is a small probability that it exists
anyway, so you'll have to check it some other way.
"""
+ assert self._open
global _total_searches, _total_steps
_total_searches += 1
if not self.map:
@@ -229,6 +227,7 @@ class _ShaBloom:
return found
def __len__(self):
+ assert self._open
return int(self.entries)
--
2.47.3