Split opening a bloom filter into two categories, opening
read-only (BloomReader), and opening for writes (BloomWriter) since
the read-only case is much simpler, and also allows BloomReader to
omit the update methods.
Ensure all filter instances are context managed, and can't leak
resources for errors during initialization, context managmeent, or
close.
Given the fact that when writing (for "not delaywrite") we update the
filter in place, and we need to do some bookkeeping (e.g. entries) on
close(), try to ensure we can never use an "incomplete" bloom filter
by moving the file to a temporary path when writing, and only moving
it back to its original path after a successful close. Delete it when
there's an error.
Handle expected errors while opening an existing filter by raising
specific BloomInvalid or BloomNotFound errors from init instead of
producing an unusable instance and expecting the caller to check
valid().
Signed-off-by: Rob Browning <
r...@defaultvalue.org>
Tested-by: Rob Browning <
r...@defaultvalue.org>
---
Pushed to main.
lib/bup/bloom.py | 371 ++++++++++++++++++++++++++---------------
lib/bup/cmd/bloom.py | 80 +++++----
lib/bup/gc.py | 4 +-
lib/bup/git.py | 18 +-
lib/bup/helpers.py | 15 +-
test/int/test_bloom.py | 15 +-
6 files changed, 304 insertions(+), 199 deletions(-)
diff --git a/lib/bup/bloom.py b/lib/bup/bloom.py
index cb305d91..f0e32408 100644
--- a/lib/bup/bloom.py
+++ b/lib/bup/bloom.py
@@ -80,11 +80,22 @@ None of this tells us what max_pfalse_positive to choose.
Brandon Low <
lost...@lostlogicx.com> 2011-02-04
"""
-import os, math, struct
+from contextlib import ExitStack
+from tempfile import mkstemp
+import builtins, os, math, struct
from bup import _helpers
-from bup.helpers import (debug1, debug2, log, mmap_read, mmap_readwrite,
- mmap_readwrite_private, unlink)
+from bup.helpers import \
+ (debug1,
+ debug2,
+ finalized,
+ log,
+ mmap_read,
+ mmap_readwrite,
+ mmap_readwrite_private,
+ notimplemented,
+ unlink)
+from
bup.io import path_msg as pm
BLOOM_VERSION = 2
@@ -98,129 +109,34 @@ _total_steps = 0
bloom_contains = _helpers.bloom_contains
bloom_add = _helpers.bloom_add
-# FIXME: check bloom create() and ShaBloom handling/ownership of "f".
-# The ownership semantics should be clarified since the caller needs
-# to know who is responsible for closing it.
-
-class ShaBloom:
- """Wrapper which contains data from multiple index files. """
- def __init__(self, filename, f=None, readwrite=False, expected=-1):
- self.closed = False
-
self.name = filename
- self.readwrite = readwrite
- self.file = None
- self.map = None
- assert(filename.endswith(b'.bloom'))
- if readwrite:
- assert(expected > 0)
- # pylint: disable-next=consider-using-with
- self.file = f = f or open(filename, 'r+b')
- f.seek(0)
-
- # Decide if we want to mmap() the pages as writable ('immediate'
- # write) or else map them privately for later writing back to
- # the file ('delayed' write). A bloom table's write access
- # pattern is such that we dirty almost all the pages after adding
- # very few entries. But the table is so big that dirtying
- # *all* the pages often exceeds Linux's default
- # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
- # thus causing it to start flushing the table before we're
- # finished... even though there's more than enough space to
- # store the bloom table in RAM.
- #
- # To work around that behaviour, if we calculate that we'll
- # probably end up touching the whole table anyway (at least
- # one bit flipped per memory page), let's use a "private" mmap,
- # which defeats Linux's ability to flush it to disk. Then we'll
- # flush it as one big lump during close().
- pages = os.fstat(f.fileno()).st_size // 4096 * 5 # assume k=5
- self.delaywrite = expected > pages
- debug1('bloom: delaywrite=%r\n' % self.delaywrite)
- if self.delaywrite:
- self.map = mmap_readwrite_private(self.file, close=False)
- else:
- self.map = mmap_readwrite(self.file, close=False)
- else:
- # pylint: disable-next=consider-using-with
- self.file = f or open(filename, 'rb')
- self.map = mmap_read(self.file)
- got = self.map[0:4]
- if got != b'BLOM':
- log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename))
- self._init_failed()
- return
- ver = struct.unpack('!I', self.map[4:8])[0]
- if ver < BLOOM_VERSION:
- log('Warning: ignoring old-style (v%d) bloom %r\n'
- % (ver, filename))
- self._init_failed()
- return
- if ver > BLOOM_VERSION:
- log('Warning: ignoring too-new (v%d) bloom %r\n'
- % (ver, filename))
- self._init_failed()
- return
- self.bits, self.k, self.entries = struct.unpack('!HHI', self.map[8:16])
- idxnamestr = self.map[16 + 2**self.bits:]
- if idxnamestr:
- self.idxnames = idxnamestr.split(b'\0')
- else:
- self.idxnames = []
-
- def _init_failed(self):
- self.idxnames = []
- self.bits = self.entries = 0
- self.map, tmp_map = None, self.map
- self.file, tmp_file = None, self.file
- try:
- if tmp_map:
- tmp_map.close()
- finally:
- if self.file:
- tmp_file.close()
+class _BloomBase:
+ """Bloom filter elements shared across both readers and writers."""
+ __slots__ = 'bits', 'entries', 'idxnames', 'k', 'map', 'path', 'version'
+ # mmap not None indicates "open"
- def valid(self):
- return self.map and self.bits
-
- def close(self):
- self.closed = True
- try:
- if self.map and self.readwrite:
- debug2("bloom: closing with %d entries\n" % self.entries)
- self.map[12:16] = struct.pack('!I', self.entries)
- if self.delaywrite:
- self.file.seek(0)
- self.file.write(self.map)
- else:
- self.map.flush()
- self.file.seek(16 + 2**self.bits)
- if self.idxnames:
- self.file.write(b'\0'.join(self.idxnames))
- finally: # This won't handle pending exceptions correctly in py2
- self._init_failed()
+ # Should be completely replaced by subclasses so that all of the
+ # context management / logic will be visible in one place.
+ @notimplemented
+ def __init__(self):
+ # All __slots__ are required (these assignments just satisfy pylint)
+ self.bits, self.entries, self.k, self.map = [None] * 4
- def __del__(self): assert self.closed
+ # Must be a context manager
+ @notimplemented
+ def __del__(self): pass
+ @notimplemented
def __enter__(self): return self
- def __exit__(self, type, value, traceback): self.close()
+ @notimplemented
+ def __exit__(self, type, value, traceback): pass
def pfalse_positive(self, additional=0):
+ assert self.map
n = self.entries + additional
m = 8*2**self.bits
k = self.k
return 100*(1-math.exp(-k*float(n)/m))**k
- def add(self, ids):
- """Add the hashes in ids (packed binary 20-bytes) to the filter."""
- if not self.map:
- raise Exception("Cannot add to closed bloom")
- self.entries += bloom_add(self.map, ids, self.bits, self.k)
-
- def add_idx(self, ix):
- """Add the object to the filter."""
- self.add(ix.shatable)
- self.idxnames.append(os.path.basename(
ix.name))
-
def exists(self, sha):
"""Return nonempty if the object probably exists in the bloom filter.
@@ -228,6 +144,7 @@ class ShaBloom:
If it returns true, there is a small probability that it exists
anyway, so you'll have to check it some other way.
"""
+ assert self.map
global _total_searches, _total_steps
_total_searches += 1
if not self.map:
@@ -237,30 +154,206 @@ class ShaBloom:
return found
def __len__(self):
- return int(self.entries)
-
-
-def create(name, expected, delaywrite=None, f=None, k=None):
- """Create and return a bloom filter for `expected` entries."""
- bits = int(math.floor(math.log(expected * MAX_BITS_EACH // 8, 2)))
- k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4)
- if bits > MAX_BLOOM_BITS[k]:
- log('bloom: warning, max bits exceeded, non-optimal\n')
- bits = MAX_BLOOM_BITS[k]
- debug1('bloom: using 2^%d bytes and %d hash functions\n' % (bits, k))
- # pylint: disable-next=consider-using-with
- f = f or open(name, 'w+b')
- f.write(b'BLOM')
- f.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0))
- assert(f.tell() == 16)
- # NOTE: On some systems this will not extend+zerofill, but it does on
- # darwin, linux, bsd and solaris.
- f.truncate(16+2**bits)
- f.seek(0)
- if delaywrite is not None and not delaywrite:
- # tell it to expect very few objects, forcing a direct mmap
- expected = 1
- return ShaBloom(name, f=f, readwrite=True, expected=expected)
+ assert self.map
+ return self.entries
+
+
+class BloomInvalid(Exception): pass
+class BloomNotFound(FileNotFoundError): pass
+
+
+def _validate_and_get_info(path, data):
+ got = data[0:4]
+ if got != b'BLOM':
+ raise BloomInvalid(f'invalid BLOM header ({pm(got)}) in {pm(path)}')
+ ver = struct.unpack('!I', data[4:8])[0]
+ if ver < BLOOM_VERSION:
+ raise BloomInvalid(f'old-style (v{ver}) bloom {pm(path)}')
+ if ver > BLOOM_VERSION:
+ raise BloomInvalid(f'too-new (v{ver}) bloom {pm(path)}')
+ bits, k, entries = struct.unpack('!HHI', data[8:16])
+ idxnames = data[16 + 2**bits:]
+ idxnames = idxnames.split(b'\0') if idxnames else []
+ return ver, bits, k, entries, idxnames
+
+
+class BloomReader(_BloomBase):
+ # pylint: disable-next=super-init-not-called
+ def __init__(self, path): # mmap not None indicates "open"
+ """Open an existing bloom filter, read-only."""
+ assert path.endswith(b'.bloom'), path
+ self.map = None
+ self.path = path
+ with ExitStack() as ctx:
+ try:
+ file = ctx.enter_context(builtins.open(path, 'rb'))
+ except FileNotFoundError as ex:
+ raise BloomNotFound(ex.errno, ex.strerror, ex.filename) from ex
+ self.map = ctx.enter_context(mmap_read(file, close=True))
+ self.version, self.bits, self.k, self.entries, self.idxnames = \
+ _validate_and_get_info(path, self.map)
+ ctx.pop_all()
+ def close(self):
+ try:
+ if self.map: self.map.close()
+ finally:
+ self.map = None
+ def __del__(self): assert not self.map, self.path
+ def __enter__(self): return self
+ def __exit__(self, type, value, traceback): self.close()
+
+
+def _create(path, expected, k):
+ with ExitStack() as ctx:
+ bits = int(math.floor(math.log(expected * MAX_BITS_EACH // 8, 2)))
+ k = k or ((bits <= MAX_BLOOM_BITS[5]) and 5 or 4)
+ if bits > MAX_BLOOM_BITS[k]:
+ log('bloom: warning, max bits exceeded, non-optimal\n')
+ bits = MAX_BLOOM_BITS[k]
+ debug1(f'bloom: using 2^{bits:d} bytes and {k:d} hash functions\n')
+ dir, name = os.path.split(path)
+ fd, tmp = mkstemp(dir=dir or os.getcwdb(), prefix=(name + b'-'))
+ with ExitStack() as ctx:
+ ctx.enter_context(finalized(tmp, unlink))
+ os.close(fd)
+ tmp_file = ctx.enter_context(builtins.open(tmp, 'w+b'))
+ tmp_file.write(b'BLOM')
+ tmp_file.write(struct.pack('!IHHI', BLOOM_VERSION, bits, k, 0))
+ assert tmp_file.tell() == 16
+ # Assume POSIX truncate(), which requires zero-fill
+ tmp_file.truncate(16+2**bits)
+ tmp_file.seek(0)
+ ctx.pop_all()
+ return tmp_file, bits, k
+
+
+def _open_write_map(file, expected):
+ # Decide if we want to mmap() the pages as writable ('immediate'
+ # write) or else map them privately for later writing back to
+ # the file ('delayed' write). A bloom table's write access
+ # pattern is such that we dirty almost all the pages after adding
+ # very few entries. But the table is so big that dirtying
+ # *all* the pages often exceeds Linux's default
+ # /proc/sys/vm/dirty_ratio or /proc/sys/vm/dirty_background_ratio,
+ # thus causing it to start flushing the table before we're
+ # finished... even though there's more than enough space to
+ # store the bloom table in RAM.
+ #
+ # To work around that behaviour, if we calculate that we'll
+ # probably end up touching the whole table anyway (at least
+ # one bit flipped per memory page), let's use a "private" mmap,
+ # which defeats Linux's ability to flush it to disk. Then we'll
+ # flush it as one big lump during close().
+ pages = os.fstat(file.fileno()).st_size // 4096 * 5 # assume k=5
+ delaywrite = expected > pages
+ debug1(f'bloom: delaywrite={delaywrite!r}\n')
+ if delaywrite:
+ data = mmap_readwrite_private(file, close=False)
+ else:
+ data = mmap_readwrite(file, close=False)
+ return data, delaywrite
+
+
+class BloomWriter(_BloomBase):
+
+ __slots__ = ('_delaywrite', '_file', '_tmp_file_path')
+
+ # pylint: disable-next=super-init-not-called
+ def __init__(self, path, mode, expected, *, delaywrite=None, k=None):
+ """Open (mode='r+b') an existing, or create (mode='w+b') a new
+ bloom filter for updates. The filter will not exist at path
+ until the instance is successfully closed.
+
+ """
+ assert path.endswith(b'.bloom'), path
+ assert expected > 0, expected
+ # mmap not None indicates "open"
+ self.map = None
+ self._file = None
+ self._tmp_file_path = None
+ self.path = path
+
+ # delaywrite arg is currently only used by tests
+ def open_map(f):
+ if delaywrite is not None and not delaywrite:
+ # tell it to expect very few objects, forcing a direct mmap
+ return _open_write_map(f, 1)
+ return _open_write_map(f, expected)
+
+ if mode == 'wb':
+ with ExitStack() as ctx:
+ try:
+ self._file = ctx.enter_context(builtins.open(path, 'r+b'))
+ except FileNotFoundError as ex:
+ raise BloomNotFound(ex.errno, ex.strerror, ex.filename) from ex
+ self.map, self._delaywrite = open_map(self._file)
+ ctx.enter_context(self.map)
+ self.version, self.bits, self.k, self.entries, self.idxnames = \
+ _validate_and_get_info(path, self.map)
+ dir, name = os.path.split(path)
+ fd, tmp = mkstemp(dir=dir or os.getcwdb(), prefix=name)
+ self._tmp_file_path = tmp
+ os.close(fd)
+ os.rename(path, tmp)
+ ctx.pop_all()
+ return
+
+ assert mode == 'w+b', mode # new filter
+ assert expected > 0, expected
+ self.entries = 0
+ self.idxnames = []
+ self.version = BLOOM_VERSION
+ self._file, self.bits, self.k = _create(path, expected, k)
+ self._tmp_file_path = self._
file.name
+ with ExitStack() as ctx:
+ ctx.enter_context(finalized(self._tmp_file_path, unlink))
+ ctx.enter_context(self._file)
+ self.map, self._delaywrite = open_map(self._file)
+ ctx.pop_all()
+
+ def close(self, error=None):
+ try:
+ with ExitStack() as ctx:
+ if self._tmp_file_path:
+ ctx.enter_context(finalized(self._tmp_file_path, unlink))
+ if self._file:
+ ctx.enter_context(self._file)
+ if self.map:
+ ctx.enter_context(self.map)
+ if error:
+ log(f'dropping unfinished bloom {pm(self.path)}, interrupted by {str(error)}')
+ return
+ if not self._file and self.map:
+ return
+ debug2(f'bloom: closing with {self.entries} entries\n')
+ self.map[12:16] = struct.pack('!I', self.entries)
+ if self._delaywrite:
+ self._file.seek(0)
+ self._file.write(self.map)
+ else:
+ self.map.flush()
+ self._file.seek(16 + 2**self.bits)
+ if self.idxnames:
+ self._file.write(b'\0'.join(self.idxnames))
+ os.rename(self._tmp_file_path, self.path)
+ finally:
+ self._file, self.map, self._tmp_file_path = None, None, None
+
+ def __del__(self): assert not self.map, self.path
+ def __enter__(self): return self
+ def __exit__(self, type, value, traceback): self.close(value)
+
+ def add(self, ids):
+ """Add the hashes in ids (packed binary 20-bytes) to the filter."""
+ if not self.map:
+ raise Exception("Cannot add to closed bloom")
+ self.entries += bloom_add(self.map, ids, self.bits, self.k)
+
+ def add_idx(self, ix):
+ """Add the object to the filter."""
+ assert self.map
+ self.add(ix.shatable)
+ self.idxnames.append(os.path.basename(
ix.name))
def clear_bloom(dir):
diff --git a/lib/bup/cmd/bloom.py b/lib/bup/cmd/bloom.py
index c0e44bc2..c4186403 100644
--- a/lib/bup/cmd/bloom.py
+++ b/lib/bup/cmd/bloom.py
@@ -1,10 +1,13 @@
import os, glob
-from bup import options, git, bloom
+from bup import bloom, options, git
+from bup.bloom import BloomInvalid, BloomNotFound, BloomReader, BloomWriter
from bup.compat import argv_bytes
from bup.helpers \
- import (add_error,
+ import (EXIT_FAILURE,
+ EXIT_FALSE,
+ EXIT_SUCCESS,
debug1,
log,
note_error,
@@ -27,28 +30,34 @@ c,check= check given *.idx or *.midx file against the bloom filter
def ruin_bloom(bloomfilename):
- if not os.path.exists(bloomfilename):
- log(path_msg(bloomfilename) + '\n')
- add_error('bloom: %s not found to ruin\n' % path_msg(bloomfilename))
- return
- with bloom.ShaBloom(bloomfilename, readwrite=True, expected=1) as b:
- b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits
+ try:
+ with BloomWriter(bloomfilename, 'wb', expected=1) as b:
+ b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits
+ except BloomInvalid as ex:
+ log(f'error: {str(ex)}\n')
+ return EXIT_FAILURE
+ except BloomNotFound:
+ log(f'error: bloom filter {path_msg(bloomfilename)} not found\n')
+ return EXIT_FAILURE
+ return EXIT_SUCCESS
def check_bloom(path, bloomfilename, idx):
rbloomfilename = git.repo_rel(bloomfilename)
ridx = git.repo_rel(idx)
- if not os.path.exists(bloomfilename):
- log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename))
- return
- with bloom.ShaBloom(bloomfilename) as b:
- if not b.valid():
- add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename))
- return
+ try:
+ b = BloomReader(bloomfilename)
+ except BloomInvalid as ex:
+ log(f'error: {str(ex)}\n')
+ return EXIT_FALSE
+ except BloomNotFound:
+ log(f'error: bloom filter {path_msg(bloomfilename)} not found\n')
+ return EXIT_FAILURE
+ with b:
base = os.path.basename(idx)
if base not in b.idxnames:
- log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename))
- return
+ log(f'bloom: {path_msg(rbloomfilename)} does not contain the idx.\n')
+ return EXIT_FALSE
if base == idx:
idx = os.path.join(path, idx)
log('bloom: bloom file: %s\n' % path_msg(rbloomfilename))
@@ -56,11 +65,14 @@ def check_bloom(path, bloomfilename, idx):
oids = git.open_object_idx(idx)
if not oids:
note_error(f'bloom: ERROR: invalid index {path_msg(idx)}\n')
- return
+ return EXIT_FAILURE
+ rc = EXIT_SUCCESS
with oids:
for oid in oids:
if not b.exists(oid):
- add_error('bloom: ERROR: object %s missing' % oid.hex())
+ log('bloom: ERROR: object %s missing\n' % oid.hex())
+ rc = EXIT_FALSE
+ return rc
_first = None
@@ -69,13 +81,14 @@ def do_bloom(path, outfilename, k, force):
assert k in (None, 4, 5)
b = None
try:
- if os.path.exists(outfilename) and not force:
- b = bloom.ShaBloom(outfilename)
- if not b.valid():
+ if not force:
+ try:
+ b = BloomReader(outfilename)
+ except BloomNotFound:
+ pass
+ except BloomInvalid as ex:
+ log(f'warning: {str(ex)}\n')
debug1("bloom: Existing invalid bloom found, regenerating.\n")
- b.close()
- b = None
-
add = []
rest = []
add_count = 0
@@ -116,8 +129,7 @@ def do_bloom(path, outfilename, k, force):
else:
b, b_tmp = None, b
b_tmp.close()
- b = bloom.ShaBloom(outfilename, readwrite=True,
- expected=add_count)
+ b = BloomWriter(outfilename, mode='w+b', expected=add_count)
if b is None: # Need all idxs to build from scratch
add += rest
add_count += rest_count
@@ -135,7 +147,7 @@ def do_bloom(path, outfilename, k, force):
tfname = None
if b is None:
tfname = os.path.join(path, b'bup.tmp.bloom')
- b = bloom.create(tfname, expected=add_count, k=k)
+ b = BloomWriter(tfname, 'w+b', expected=add_count, k=k)
count = 0
icount = 0
for name in add:
@@ -177,10 +189,10 @@ def main(argv):
debug1('bloom: scanning %s\n' % path_msg(path))
outfilename = output or os.path.join(path, b'bup.bloom')
if opt.check:
- check_bloom(path, outfilename, opt.check)
- if not saved_errors:
+ rc = check_bloom(path, outfilename, opt.check)
+ if not rc and not saved_errors:
log('All tests passed.\n')
- elif opt.ruin:
- ruin_bloom(outfilename)
- else:
- do_bloom(path, outfilename, opt.k, opt.force)
+ return rc
+ if opt.ruin:
+ return ruin_bloom(outfilename)
+ return do_bloom(path, outfilename, opt.k, opt.force)
diff --git a/lib/bup/gc.py b/lib/bup/gc.py
index 0c088dcc..5ff5de3b 100644
--- a/lib/bup/gc.py
+++ b/lib/bup/gc.py
@@ -1,11 +1,11 @@
from binascii import hexlify, unhexlify
from contextlib import ExitStack
-#from itertools import chain
from os.path import basename
import glob, os, re, subprocess, sys, tempfile
from bup import bloom, git, midx
+from bup.bloom import BloomWriter
from bup.git import MissingObject, walk_object
from bup.helpers import \
EXIT_FAILURE, log, note_error, progress, qprogress, reprogress
@@ -90,7 +90,7 @@ def find_live_objects(existing_count, cat_pipe, refs=None, *,
os.close(ffd)
# FIXME: allow selection of k?
# FIXME: support ephemeral bloom filters (i.e. *never* written to disk)
- live_blobs = bloom.create(bloom_filename, expected=existing_count, k=None)
+ live_blobs = BloomWriter(bloom_filename, 'w+b', expected=existing_count, k=None)
with ExitStack() as maybe_close_bloom:
maybe_close_bloom.enter_context(live_blobs)
# live_blobs will hold on to the fd until close or exit
diff --git a/lib/bup/git.py b/lib/bup/git.py
index d6dd8df1..833cf278 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -15,7 +15,8 @@ from subprocess import DEVNULL, PIPE, Popen, run
from sys import stderr
from typing import Literal, Optional, Union
-from bup import _helpers, hashsplit, midx, bloom, xstat
+from bup import _helpers, hashsplit, midx, xstat
+from bup.bloom import BloomInvalid, BloomNotFound, BloomReader
from bup.commit import create_commit_blob, parse_commit
from bup.compat import dataclass_frozen_for_testing, environ
from bup.helpers import (EXIT_FAILURE,
@@ -707,19 +708,24 @@ class PackIdxList:
new_packs = list(new_packs)
new_packs.sort(reverse=True, key=len)
self.packs = new_packs
- if self.bloom is None and os.path.exists(bfull):
- self.bloom = bloom.ShaBloom(bfull)
+ if self.bloom is None:
+ try:
+ self.bloom = BloomReader(bfull)
+ except BloomNotFound:
+ pass
+ except BloomInvalid as ex:
+ log(f'warning: {str(ex)}\n')
try:
- if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self):
+ if self.bloom and len(self.bloom) >= len(self):
self.do_bloom = True
else:
if self.bloom:
self.bloom, bloom_tmp = None, self.bloom
bloom_tmp.close()
- except BaseException as ex:
+ except BaseException:
if self.bloom:
self.bloom.close()
- raise ex
+ raise
debug1('PackIdxList: using %d index%s.\n'
% (len(self.packs), len(self.packs)!=1 and 'es' or ''))
diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index e2077c69..e7c56e15 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -289,17 +289,12 @@ def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
pfinal(count, total)
-def unlink(f):
- """Delete a file at path 'f' if it currently exists.
-
- Unlike os.unlink(), does not throw an exception if the file didn't already
- exist.
- """
+def unlink(path):
+ """Unlink path, ignoring it if missing (i.e. rm -f path)."""
try:
- os.unlink(f)
- except OSError as e:
- if e.errno != errno.ENOENT:
- raise
+ os.unlink(path)
+ except FileNotFoundError:
+ pass
_bq_simple_id_rx = re.compile(br'^[-_./a-zA-Z0-9]+$')
diff --git a/test/int/test_bloom.py b/test/int/test_bloom.py
index ea26eaf8..2c59269b 100644
--- a/test/int/test_bloom.py
+++ b/test/int/test_bloom.py
@@ -1,9 +1,9 @@
-import errno, os, sys, tempfile
+import errno, os, sys
import pytest
-from bup import bloom
+from bup.bloom import BloomReader, BloomWriter
from bup.compat import dataclass
@@ -15,10 +15,10 @@ def test_bloom(tmpdir):
shatable: bytes
ix = Idx(name=b'dummy.idx', shatable=b''.join(hashes))
for k in (4, 5):
- with bloom.create(tmpdir + b'/pybuptest.bloom', expected=100, k=k) as b:
+ with BloomWriter(tmpdir + b'/pybuptest.bloom', 'w+b', expected=100, k=k) as b:
b.add_idx(ix)
assert b.pfalse_positive() < .1
- with bloom.ShaBloom(tmpdir + b'/pybuptest.bloom') as b:
+ with BloomReader(tmpdir + b'/pybuptest.bloom') as b:
all_present = True
for h in hashes:
all_present &= (b.exists(h) or False)
@@ -30,9 +30,8 @@ def test_bloom(tmpdir):
assert false_positives < 10
os.unlink(tmpdir + b'/pybuptest.bloom')
- tf = tempfile.TemporaryFile(dir=tmpdir)
- with bloom.create(b'bup.bloom', f=tf, expected=100) as b:
- assert b.file == tf
+ with BloomWriter(b'bup.bloom', 'w+b', expected=100) as b:
+ assert b.path == b'bup.bloom'
assert b.k == 5
@@ -42,7 +41,7 @@ def test_large_bloom(tmpdir):
# architecture), and anywhere else where the address space is
# sufficiently limited.
try:
- with bloom.create(tmpdir + b'/bup.bloom', expected=2**28,
+ with BloomWriter(tmpdir + b'/bup.bloom', 'w+b', expected=2**28,
delaywrite=False) as b:
assert b.k == 4
except EnvironmentError as ex:
--
2.47.3