[PATCH 1/1] git/packwriter: open(..) prohibited in __del__

34 views
Skip to first unread message

Rob Browning

unread,
Sep 12, 2020, 6:30:47 PM9/12/20
to bup-...@googlegroups.com, Bas Stottelaar
From: Bas Stottelaar <bassto...@gmail.com>

When an exception occurs, __del__ is invoked by the interpretor, to
perform cleanup. It seems that since Python 3.4, the behaviour has
changed, and also prohibits invocations of open(..) (source:
https://stackoverflow.com/a/29737870). Instead, contextmanager API
should be used (source: https://stackoverflow.com/a/26544629), which
seems to be in place already.

This should fix exception messages such as 'NameError: name 'open'
is not defined'.

Signed-off-by: Bas Stottelaar <bassto...@gmail.com>
---

Thanks for the help. Redirected to bup-list as-per HACKING from
pull request https://github.com/bup/bup/pull/56

lib/bup/git.py | 3 ---
1 file changed, 3 deletions(-)

diff --git a/lib/bup/git.py b/lib/bup/git.py
index c0ac91f4..47180e7e 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -718,9 +718,6 @@ class PackWriter:
self.max_pack_objects = max_pack_objects if max_pack_objects \
else max(1, self.max_pack_size // 5000)

- def __del__(self):
- self.close()
-
def __enter__(self):
return self

--
2.26.1

Rob Browning

unread,
Sep 12, 2020, 6:39:00 PM9/12/20
to bup-...@googlegroups.com, Bas Stottelaar
Rob Browning <r...@defaultvalue.org> writes:

> From: Bas Stottelaar <bassto...@gmail.com>
>
> When an exception occurs, __del__ is invoked by the interpretor, to
> perform cleanup. It seems that since Python 3.4, the behaviour has
> changed, and also prohibits invocations of open(..) (source:
> https://stackoverflow.com/a/29737870). Instead, contextmanager API
> should be used (source: https://stackoverflow.com/a/26544629), which
> seems to be in place already.
>
> This should fix exception messages such as 'NameError: name 'open'
> is not defined'.

Thanks, and looks good.

Though before we consider merging it, I'm doing a survey of all the
packwriter uses to try to make sure we won't be able to leak references
(in any notable way) afterward, and so far, it looks like I may end up
with some patches we'll want to consider merging first.

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4

Rob Browning

unread,
Oct 11, 2020, 6:23:15 PM10/11/20
to bup-...@googlegroups.com, Bas Stottelaar
Make sure to always close or abort the packwriter. Abort on errors
under the assumption that any packfile in progress should only contain
blobs duplicated from the packfiles that were being garbage collected
to produce it, and those packfiles should never be removed until we've
finished writing and closing the one they're being swept into.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/gc.py | 95 +++++++++++++++++++++++++++------------------------
1 file changed, 50 insertions(+), 45 deletions(-)

diff --git a/lib/bup/gc.py b/lib/bup/gc.py
index e663cc02..4009e815 100644
--- a/lib/bup/gc.py
+++ b/lib/bup/gc.py
@@ -5,7 +5,7 @@ from os.path import basename
import glob, os, subprocess, sys, tempfile

from bup import bloom, git, midx
-from bup.compat import hexstr, range
+from bup.compat import hexstr, pending_raise, range
from bup.git import MissingObject, walk_object
from bup.helpers import Nonlocal, log, progress, qprogress
from bup.io import path_msg
@@ -156,59 +156,64 @@ def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
compression_level=compression,
run_midx=False,
on_pack_finish=remove_stale_files)
+ try:
+ # FIXME: sanity check .idx names vs .pack names?
+ collect_count = 0
+ for idx_name in glob.glob(os.path.join(git.repo(b'objects/pack'), b'*.idx')):
+ if verbosity:
+ qprogress('preserving live data (%d%% complete)\r'
+ % ((float(collect_count) / existing_count) * 100))
+ with git.open_idx(idx_name) as idx:
+ idx_live_count = 0
+ for sha in idx:
+ if live_objects.exists(sha):
+ idx_live_count += 1

- # FIXME: sanity check .idx names vs .pack names?
- collect_count = 0
- for idx_name in glob.glob(os.path.join(git.repo(b'objects/pack'), b'*.idx')):
- if verbosity:
- qprogress('preserving live data (%d%% complete)\r'
- % ((float(collect_count) / existing_count) * 100))
- with git.open_idx(idx_name) as idx:
- idx_live_count = 0
- for sha in idx:
- if live_objects.exists(sha):
- idx_live_count += 1
-
- collect_count += idx_live_count
- if idx_live_count == 0:
- if verbosity:
- log('deleting %s\n'
- % path_msg(git.repo_rel(basename(idx_name))))
- ns.stale_files.append(idx_name)
- ns.stale_files.append(idx_name[:-3] + b'pack')
- continue
+ collect_count += idx_live_count
+ if idx_live_count == 0:
+ if verbosity:
+ log('deleting %s\n'
+ % path_msg(git.repo_rel(basename(idx_name))))
+ ns.stale_files.append(idx_name)
+ ns.stale_files.append(idx_name[:-3] + b'pack')
+ continue
+
+ live_frac = idx_live_count / float(len(idx))
+ if live_frac > ((100 - threshold) / 100.0):
+ if verbosity:
+ log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
+ live_frac * 100))
+ continue

- live_frac = idx_live_count / float(len(idx))
- if live_frac > ((100 - threshold) / 100.0):
if verbosity:
- log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
- live_frac * 100))
- continue
+ log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
+ live_frac * 100))
+ for sha in idx:
+ if live_objects.exists(sha):
+ item_it = cat_pipe.get(hexlify(sha))
+ _, typ, _ = next(item_it)
+ writer.just_write(sha, typ, b''.join(item_it))

- if verbosity:
- log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
- live_frac * 100))
- for sha in idx:
- if live_objects.exists(sha):
- item_it = cat_pipe.get(hexlify(sha))
- _, typ, _ = next(item_it)
- writer.just_write(sha, typ, b''.join(item_it))
+ ns.stale_files.append(idx_name)
+ ns.stale_files.append(idx_name[:-3] + b'pack')

- ns.stale_files.append(idx_name)
- ns.stale_files.append(idx_name[:-3] + b'pack')
+ if verbosity:
+ progress('preserving live data (%d%% complete)\n'
+ % ((float(collect_count) / existing_count) * 100))

- if verbosity:
- progress('preserving live data (%d%% complete)\n'
- % ((float(collect_count) / existing_count) * 100))
+ # Nothing should have recreated midx/bloom yet.
+ pack_dir = git.repo(b'objects/pack')
+ assert(not os.path.exists(os.path.join(pack_dir, b'bup.bloom')))
+ assert(not glob.glob(os.path.join(pack_dir, b'*.midx')))

- # Nothing should have recreated midx/bloom yet.
- pack_dir = git.repo(b'objects/pack')
- assert(not os.path.exists(os.path.join(pack_dir, b'bup.bloom')))
- assert(not glob.glob(os.path.join(pack_dir, b'*.midx')))
+ except BaseException as ex:
+ with pending_raise(ex):
+ writer.abort()

- # try/catch should call writer.abort()?
# This will finally run midx.
- writer.close() # Can only change refs (if needed) after this.
+ # Can only change refs (if needed) after this.
+ writer.close()
+
remove_stale_files(None) # In case we didn't write to the writer.

if verbosity:
--
2.26.1

Rob Browning

unread,
Oct 11, 2020, 6:23:15 PM10/11/20
to bup-...@googlegroups.com, Bas Stottelaar
Proposed for master branch.

Rob Browning (4):
CODINGSTYLE: add raise missing from exception handling code
compat: add "with pending_raise(ex)" to simplify nested exceptions
gc: always clean up the packwriter; abort on errors
rm: handle writer.abort exceptions when exception pending

CODINGSTYLE | 24 ++++++++---
Makefile | 1 +
lib/bup/compat.py | 32 +++++++++++++++
lib/bup/gc.py | 95 +++++++++++++++++++++++---------------------
lib/bup/rm.py | 15 ++++---
lib/bup/t/tcompat.py | 32 +++++++++++++++
6 files changed, 141 insertions(+), 58 deletions(-)
create mode 100644 lib/bup/t/tcompat.py

--
2.26.1

Rob Browning

unread,
Oct 11, 2020, 6:23:16 PM10/11/20
to bup-...@googlegroups.com, Bas Stottelaar
Signed-off-by: Rob Browning <r...@defaultvalue.org>
---
CODINGSTYLE | 1 +
1 file changed, 1 insertion(+)

diff --git a/CODINGSTYLE b/CODINGSTYLE
index 03441575..69c2cfb9 100644
--- a/CODINGSTYLE
+++ b/CODINGSTYLE
@@ -67,5 +67,6 @@ of the new exception This can be accomplished via
except ... as ex2:
add_ex_tb(ex2)
raise add_ex_ctx(ex2, ex)
+ raise

See the end of ``lib/bup/compat.py`` for a functional example.
--
2.26.1

Rob Browning

unread,
Oct 11, 2020, 6:23:16 PM10/11/20
to bup-...@googlegroups.com, Bas Stottelaar
Thanks to Johannes Berg for the suggesting the approach.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
---
CODINGSTYLE | 23 ++++++++++++++++++-----
Makefile | 1 +
lib/bup/compat.py | 32 ++++++++++++++++++++++++++++++++
lib/bup/t/tcompat.py | 32 ++++++++++++++++++++++++++++++++
4 files changed, 83 insertions(+), 5 deletions(-)
create mode 100644 lib/bup/t/tcompat.py

diff --git a/CODINGSTYLE b/CODINGSTYLE
index 69c2cfb9..348a2f51 100644
--- a/CODINGSTYLE
+++ b/CODINGSTYLE
@@ -43,7 +43,6 @@ explicitly add stack traces to any exceptions that are going to be
re-raised by anything other than a no-argument raise (otherwise the
stack trace will be lost)::

-
try:
...
except ... as ex:
@@ -52,11 +51,24 @@ stack trace will be lost)::
...
raise pending_ex

-If an exception is thrown from an exception handler, the pending
+When an exception is thrown from an exception handler, the pending
exception should be the `"context"
<https://docs.python.org/3/reference/simple_stmts.html#the-raise-statement>`_
-of the new exception This can be accomplished via
-``add_ex_ctx()``::
+of the new exception, which can be accomplished (portably) via
+``pending_raise()``::
+
+ try:
+ ...
+ except ... as ex:
+ with pending_raise(ex):
+ clean_up()
+
+This should do roughly the same thing in Python 2 and Python 3,
+throwing any exception from ``clean_up()`` after adding ex as the
+``__context__`` if clean_up() throws, and throwing ``ex`` otherwise.
+
+If for some reason, you need more control, you can use
+``add_ex_ctx()`` directly::

try:
...
@@ -69,4 +81,5 @@ of the new exception This can be accomplished via
raise add_ex_ctx(ex2, ex)
raise

-See the end of ``lib/bup/compat.py`` for a functional example.
+See the end of ``lib/bup/compat.py`` for a functional example, and all
+of this can be removed once we drop support for Python 2.
diff --git a/Makefile b/Makefile
index 4d0784bc..5e31562a 100644
--- a/Makefile
+++ b/Makefile
@@ -163,6 +163,7 @@ runtests: runtests-python runtests-cmdline
python_tests := \
lib/bup/t/tbloom.py \
lib/bup/t/tclient.py \
+ lib/bup/t/tcompat.py \
lib/bup/t/tgit.py \
lib/bup/t/thashsplit.py \
lib/bup/t/thelpers.py \
diff --git a/lib/bup/compat.py b/lib/bup/compat.py
index 39dd8106..f12d70ce 100644
--- a/lib/bup/compat.py
+++ b/lib/bup/compat.py
@@ -37,6 +37,19 @@ if py3:
"""Do nothing (already handled by Python 3 infrastructure)."""
return ex

+ class pending_raise:
+ """Rethrows either the provided ex, or any exception raised by the
+ with statement body. Supports Python 2 compatibility.
+ """
+ def __init__(self, ex):
+ self.ex = ex
+ def __enter__(self):
+ return None
+ def __exit__(self, exc_type, exc_value, traceback):
+ if not exc_type:
+ raise self.ex
+ return None
+
def items(x):
return x.items()

@@ -103,6 +116,25 @@ else: # Python 2
ex.__context__ = context_ex
return ex

+ class pending_raise:
+ """Rethrows either the provided ex, or any exception raised by the
+ with statement body, after making ex the __context__ of the newer
+ exception (assuming there's no existing __context__). Ensures the
+ exceptions have __tracebacks__. Supports Python 2 compatibility.
+
+ """
+ def __init__(self, ex):
+ self.ex = ex
+ def __enter__(self):
+ add_ex_tb(self.ex)
+ return None
+ def __exit__(self, exc_type, exc_value, traceback):
+ if not exc_type:
+ raise self.ex
+ add_ex_tb(exc_value)
+ add_ex_ctx(exc_value, self.ex)
+ return None
+
def dump_traceback(ex):
stack = [ex]
next_ex = getattr(ex, '__context__', None)
diff --git a/lib/bup/t/tcompat.py b/lib/bup/t/tcompat.py
new file mode 100644
index 00000000..039eb12d
--- /dev/null
+++ b/lib/bup/t/tcompat.py
@@ -0,0 +1,32 @@
+
+from __future__ import absolute_import, print_function
+
+from wvtest import *
+
+from bup.compat import pending_raise
+
+@wvtest
+def test_pending_raise():
+ outer = Exception('outer')
+ inner = Exception('inner')
+
+ try:
+ try:
+ raise outer
+ except Exception as ex:
+ with pending_raise(ex):
+ pass
+ except Exception as ex:
+ WVPASSEQ(outer, ex)
+ WVPASSEQ(None, getattr(outer, '__context__', None))
+
+ try:
+ try:
+ raise outer
+ except Exception as ex:
+ with pending_raise(ex):
+ raise inner
+ except Exception as ex:
+ WVPASSEQ(inner, ex)
+ WVPASSEQ(None, getattr(outer, '__context__', None))
+ WVPASSEQ(outer, getattr(inner, '__context__', None))
--
2.26.1

Rob Browning

unread,
Oct 11, 2020, 6:23:16 PM10/11/20
to bup-...@googlegroups.com, Bas Stottelaar
And drop the else clause since it's unnecessary here.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/rm.py | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/bup/rm.py b/lib/bup/rm.py
index 3bc2d833..b1f78898 100644
--- a/lib/bup/rm.py
+++ b/lib/bup/rm.py
@@ -5,7 +5,7 @@ import sys

from bup import compat, git, vfs
from bup.client import ClientError
-from bup.compat import hexstr
+from bup.compat import add_ex_ctx, add_ex_tb, hexstr, pending_raise
from bup.git import get_commit_items
from bup.helpers import add_error, die_if_errors, log, saved_errors
from bup.io import path_msg
@@ -116,14 +116,13 @@ def bup_rm(repo, paths, compression=6, verbosity=None):
for branch, saves in compat.items(dead_saves):
assert(saves)
updated_refs[b'refs/heads/' + branch] = rm_saves(saves, writer)
- except:
+ except BaseException as ex:
if writer:
- writer.abort()
- raise
- else:
- if writer:
- # Must close before we can update the ref(s) below.
- writer.close()
+ with pending_raise(ex):
+ writer.abort()
+ if writer:
+ # Must close before we can update the ref(s) below.
+ writer.close()

# Only update the refs here, at the very end, so that if something
# goes wrong above, the old refs will be undisturbed. Make an attempt
--
2.26.1

Johannes Berg

unread,
Oct 12, 2020, 3:38:36 PM10/12/20
to Rob Browning, bup-...@googlegroups.com, Bas Stottelaar
On Sun, 2020-10-11 at 17:23 -0500, Rob Browning wrote:
> Thanks to Johannes Berg for the suggesting the approach.
>
> Signed-off-by: Rob Browning <r...@defaultvalue.org>
> ---
> CODINGSTYLE | 23 ++++++++++++++++++-----
> Makefile | 1 +
> lib/bup/compat.py | 32 ++++++++++++++++++++++++++++++++
> lib/bup/t/tcompat.py | 32 ++++++++++++++++++++++++++++++++
> 4 files changed, 83 insertions(+), 5 deletions(-)
> create mode 100644 lib/bup/t/tcompat.py
>
> diff --git a/CODINGSTYLE b/CODINGSTYLE
> index 69c2cfb9..348a2f51 100644
> --- a/CODINGSTYLE
> +++ b/CODINGSTYLE
> @@ -43,7 +43,6 @@ explicitly add stack traces to any exceptions that are going to be
> re-raised by anything other than a no-argument raise (otherwise the
> stack trace will be lost)::
>
> -
> try:

Have you checked the output with this? Sometimes markdown is weird that
way ...

> +If for some reason, you need more control, you can use
> +``add_ex_ctx()`` directly::

IMHO just remove that portion (and then I guess no need for the first
patch, though might include it anyway for the bug); if anyone really has
a need to do it then look at the code of how the context manager is
implemented?

But dunno. I'd just go for the "common case" (which is _already_ quite
uncommon in the codebase) and not muddy the waters further.

> +++ b/lib/bup/compat.py
> @@ -37,6 +37,19 @@ if py3:
> """Do nothing (already handled by Python 3 infrastructure)."""
> return ex

Or maybe even consider removing these (add_ex_tb, add_ex_ctx) here in
the python3 case?

> + class pending_raise:
> + """Rethrows either the provided ex, or any exception raised by the
> + with statement body. Supports Python 2 compatibility.
> + """
> + def __init__(self, ex):
> + self.ex = ex
> + def __enter__(self):
> + return None
> + def __exit__(self, exc_type, exc_value, traceback):
> + if not exc_type:
> + raise self.ex

Now I'm having second thoughts on this - don't we have to preserve the
original traceback somehow? But I forget what exactly we wanted as
output ...

johannes

Rob Browning

unread,
Nov 22, 2020, 4:48:21 PM11/22/20
to bup-...@googlegroups.com, Bas Stottelaar
Rob Browning <r...@defaultvalue.org> writes:

> Signed-off-by: Rob Browning <r...@defaultvalue.org>

Pushed to master.

Rob Browning

unread,
Nov 22, 2020, 4:49:45 PM11/22/20
to bup-...@googlegroups.com, Bas Stottelaar
Rob Browning <r...@defaultvalue.org> writes:

> Thanks to Johannes Berg for the suggesting the approach.
>
> Signed-off-by: Rob Browning <r...@defaultvalue.org>

Pushed to master.

Rob Browning

unread,
Nov 22, 2020, 4:53:20 PM11/22/20
to Johannes Berg, bup-...@googlegroups.com, Bas Stottelaar
Johannes Berg <joha...@sipsolutions.net> writes:

> Have you checked the output with this? Sometimes markdown is weird that
> way ...

CODINGSTYLE is actually rst, though don't know if gh notices...

I think I probably made it rst (from plain text) when I was thinking we
might shift to sphinx (which I suppose could still happen...).

> IMHO just remove that portion (and then I guess no need for the first
> patch, though might include it anyway for the bug); if anyone really has
> a need to do it then look at the code of how the context manager is
> implemented?
>
> But dunno. I'd just go for the "common case" (which is _already_ quite
> uncommon in the codebase) and not muddy the waters further.

For now I just left it there, but not opposed to further revisions.
Alternately, we'll just drop python 2 support and delete it all...

> Now I'm having second thoughts on this - don't we have to preserve the
> original traceback somehow? But I forget what exactly we wanted as
> output ...

If I follow, I think that may be handled in __enter__, and I did
double-check that the results are what I *think* we wanted.

Thanks

Rob Browning

unread,
Nov 22, 2020, 4:53:43 PM11/22/20
to bup-...@googlegroups.com, Bas Stottelaar
Rob Browning <r...@defaultvalue.org> writes:

> Make sure to always close or abort the packwriter. Abort on errors
> under the assumption that any packfile in progress should only contain
> blobs duplicated from the packfiles that were being garbage collected
> to produce it, and those packfiles should never be removed until we've
> finished writing and closing the one they're being swept into.
>
> Signed-off-by: Rob Browning <r...@defaultvalue.org>

Pushed to master.

Rob Browning

unread,
Nov 22, 2020, 4:53:58 PM11/22/20
to bup-...@googlegroups.com, Bas Stottelaar
Rob Browning <r...@defaultvalue.org> writes:

> And drop the else clause since it's unnecessary here.
>
> Signed-off-by: Rob Browning <r...@defaultvalue.org>

Pushed to master.

Rob Browning

unread,
Sep 18, 2021, 2:42:41 PM9/18/21
to bup-...@googlegroups.com, Bas Stottelaar
Proposed.

Bas Stottelaar (1):
git/packwriter: open(..) prohibited in __del__

Rob Browning (1):
Ensure all packwriter instances are closed

lib/bup/cmd/save.py | 15 ++++-
lib/bup/cmd/server.py | 131 +++++++++++++++++++-----------------
lib/bup/cmd/split.py | 18 ++++-
lib/bup/gc.py | 8 +--
lib/bup/git.py | 3 -
lib/bup/rm.py | 7 +-
test/int/test_client.py | 145 +++++++++++++++++++---------------------
test/int/test_git.py | 73 ++++++++++----------
8 files changed, 213 insertions(+), 187 deletions(-)

--
2.30.2

Rob Browning

unread,
Sep 18, 2021, 2:42:41 PM9/18/21
to bup-...@googlegroups.com, Bas Stottelaar
Finally got back to this, and I think this commit may be the last one
we need before we can include Bas's commit to remove (the now
invalid) __del__ from packwriters. I tried to make the changes in a
way that's less likely to conflict with Johannes' pending work
(e.g. my use of the packwriter_guards), but if that's not actually
true, we can just hold off until I get further through that series,
since this work is fairly easy to adapt or even recreate.

Now that I look, I suspect the most disruptive bit might be the
cmd.server changes.

I did restore one use of a try/except/else, not because it changes
the semantics (I hope), but because it seems like a useful indicator
that the close "goes with" the try, and any additional intervening
code (before the close) would be a mistake.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/cmd/save.py | 15 ++++-
lib/bup/cmd/server.py | 131 +++++++++++++++++++-----------------
lib/bup/cmd/split.py | 18 ++++-
lib/bup/gc.py | 8 +--
lib/bup/rm.py | 7 +-
test/int/test_client.py | 145 +++++++++++++++++++---------------------
test/int/test_git.py | 73 ++++++++++----------
7 files changed, 213 insertions(+), 184 deletions(-)

diff --git a/lib/bup/cmd/save.py b/lib/bup/cmd/save.py
index 1325b4d9..970d4554 100755
--- a/lib/bup/cmd/save.py
+++ b/lib/bup/cmd/save.py
@@ -47,7 +47,9 @@ def before_saving_regular_file(name):
return


-def main(argv):
+def _guarded_main(argv, packwriter_guard):
+ # The packwriter_guard is a hack we'll only need until we're ready
+ # to restructure this code to use a context manager instead.

# Hack around lack of nonlocal vars in python 2
_nonlocal = {}
@@ -125,6 +127,7 @@ def main(argv):
cli = None
oldref = refname and git.read_ref(refname) or None
w = git.PackWriter(compression_level=opt.compress)
+ packwriter_guard.append(w)

handle_ctrl_c()

@@ -523,3 +526,13 @@ def main(argv):
if saved_errors:
log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
sys.exit(1)
+
+def main(argv):
+ # This hack avoids having to restructure/reindent main until we're
+ # ready.
+ packwriter_guard = []
+ try:
+ return _guarded_main(argv, packwriter_guard)
+ finally:
+ if packwriter_guard:
+ packwriter_guard[0].close()
diff --git a/lib/bup/cmd/server.py b/lib/bup/cmd/server.py
index 35744284..95d94251 100755
--- a/lib/bup/cmd/server.py
+++ b/lib/bup/cmd/server.py
@@ -90,50 +90,55 @@ def receive_objects_v2(conn, junk):
w = git.PackWriter(objcache_maker=None)
else:
w = git.PackWriter()
- while 1:
- ns = conn.read(4)
- if not ns:
- w.abort()
- raise Exception('object read: expected length header, got EOF\n')
- n = struct.unpack('!I', ns)[0]
- #debug2('expecting %d bytes\n' % n)
- if not n:
- debug1('bup server: received %d object%s.\n'
- % (w.count, w.count!=1 and "s" or ''))
- fullpath = w.close(run_midx=not dumb_server_mode)
- if fullpath:
- (dir, name) = os.path.split(fullpath)
- conn.write(b'%s.idx\n' % name)
- conn.ok()
- return
- elif n == 0xffffffff:
- debug2('bup server: receive-objects suspended.\n')
- suspended_w = w
- conn.ok()
- return
-
- shar = conn.read(20)
- crcr = struct.unpack('!I', conn.read(4))[0]
- n -= 20 + 4
- buf = conn.read(n) # object sizes in bup are reasonably small
- #debug2('read %d bytes\n' % n)
- _check(w, n, len(buf), 'object read: expected %d bytes, got %d\n')
- if not dumb_server_mode:
- oldpack = w.exists(shar, want_source=True)
- if oldpack:
- assert(not oldpack == True)
- assert(oldpack.endswith(b'.idx'))
- (dir,name) = os.path.split(oldpack)
- if not (name in suggested):
- debug1("bup server: suggesting index %s\n"
- % git.shorten_hash(name).decode('ascii'))
- debug1("bup server: because of object %s\n"
- % hexstr(shar))
- conn.write(b'index %s\n' % name)
- suggested.add(name)
- continue
- nw, crc = w._raw_write((buf,), sha=shar)
- _check(w, crcr, crc, 'object read: expected crc %d, got %d\n')
+ try:
+ while 1:
+ ns = conn.read(4)
+ if not ns:
+ w.abort()
+ raise Exception('object read: expected length header, got EOF\n')
+ n = struct.unpack('!I', ns)[0]
+ #debug2('expecting %d bytes\n' % n)
+ if not n:
+ debug1('bup server: received %d object%s.\n'
+ % (w.count, w.count!=1 and "s" or ''))
+ fullpath = w.close(run_midx=not dumb_server_mode)
+ if fullpath:
+ (dir, name) = os.path.split(fullpath)
+ conn.write(b'%s.idx\n' % name)
+ conn.ok()
+ return
+ elif n == 0xffffffff:
+ debug2('bup server: receive-objects suspended.\n')
+ suspended_w = w
+ w = None
+ conn.ok()
+ return
+
+ shar = conn.read(20)
+ crcr = struct.unpack('!I', conn.read(4))[0]
+ n -= 20 + 4
+ buf = conn.read(n) # object sizes in bup are reasonably small
+ #debug2('read %d bytes\n' % n)
+ _check(w, n, len(buf), 'object read: expected %d bytes, got %d\n')
+ if not dumb_server_mode:
+ oldpack = w.exists(shar, want_source=True)
+ if oldpack:
+ assert(not oldpack == True)
+ assert(oldpack.endswith(b'.idx'))
+ (dir,name) = os.path.split(oldpack)
+ if not (name in suggested):
+ debug1("bup server: suggesting index %s\n"
+ % git.shorten_hash(name).decode('ascii'))
+ debug1("bup server: because of object %s\n"
+ % hexstr(shar))
+ conn.write(b'index %s\n' % name)
+ suggested.add(name)
+ continue
+ nw, crc = w._raw_write((buf,), sha=shar)
+ _check(w, crcr, crc, 'object read: expected crc %d, got %d\n')
+ finally:
+ if w:
+ w.close()
# NOTREACHED


@@ -278,9 +283,10 @@ commands = {
}

def main(argv):
+ global suspended_w
+
o = options.Options(optspec)
opt, flags, extra = o.parse_bytes(argv[1:])
-
if extra:
o.fatal('no arguments expected')

@@ -291,21 +297,26 @@ def main(argv):
sys.stdout.flush()
conn = Conn(byte_stream(sys.stdin), byte_stream(sys.stdout))
lr = linereader(conn)
- for _line in lr:
- line = _line.strip()
- if not line:
- continue
- debug1('bup server: command: %r\n' % line)
- words = line.split(b' ', 1)
- cmd = words[0]
- rest = len(words)>1 and words[1] or b''
- if cmd == b'quit':
- break
- else:
- cmd = commands.get(cmd)
- if cmd:
- cmd(conn, rest)
+ try:
+ for _line in lr:
+ line = _line.strip()
+ if not line:
+ continue
+ debug1('bup server: command: %r\n' % line)
+ words = line.split(b' ', 1)
+ cmd = words[0]
+ rest = len(words)>1 and words[1] or b''
+ if cmd == b'quit':
+ break
else:
- raise Exception('unknown server command: %r\n' % line)
+ cmd = commands.get(cmd)
+ if cmd:
+ cmd(conn, rest)
+ else:
+ raise Exception('unknown server command: %r\n' % line)
+ finally:
+ if suspended_w:
+ suspended_w.close()
+ suspended_w = None

debug1('bup server: done\n')
diff --git a/lib/bup/cmd/split.py b/lib/bup/cmd/split.py
index 11392e20..f8a007d0 100755
--- a/lib/bup/cmd/split.py
+++ b/lib/bup/cmd/split.py
@@ -41,7 +41,10 @@ bwlimit= maximum bytes/sec to transmit to server
#,compress= set compression level to # (0-9, 9 is highest) [1]
"""

-def main(argv):
+def _guarded_main(argv, packwriter_guard):
+ # The packwriter_guard is a hack we'll only need until we're ready
+ # to restructure this code to use a context manager instead.
+
o = options.Options(optspec)
opt, flags, extra = o.parse_bytes(argv[1:])
if opt.name: opt.name = argv_bytes(opt.name)
@@ -118,6 +121,8 @@ def main(argv):
pack_writer = git.PackWriter(compression_level=opt.compress,
max_pack_size=max_pack_size,
max_pack_objects=max_pack_objects)
+ if pack_writer:
+ packwriter_guard.append(pack_writer)

input = byte_stream(sys.stdin)

@@ -234,3 +239,14 @@ def main(argv):
if saved_errors:
log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
sys.exit(1)
+
+def main(argv):
+ # This hack avoids having to restructure/reindent main until we're
+ # ready.
+ packwriter_guard = []
+ try:
+ return _guarded_main(argv, packwriter_guard)
+ finally:
+ if packwriter_guard:
+ print('foo:', packwriter_guard, file=sys.stderr)
+ packwriter_guard[0].close()
diff --git a/lib/bup/gc.py b/lib/bup/gc.py
index b757034c..e217372e 100644
--- a/lib/bup/gc.py
+++ b/lib/bup/gc.py
@@ -209,10 +209,10 @@ def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
except BaseException as ex:
with pending_raise(ex):
writer.abort()
-
- # This will finally run midx.
- # Can only change refs (if needed) after this.
- writer.close()
+ else:
+ # This will finally run midx.
+ # Can only change refs (if needed) after this.
+ writer.close()

remove_stale_files(None) # In case we didn't write to the writer.

diff --git a/lib/bup/rm.py b/lib/bup/rm.py
index 844bdf15..e7b34f78 100644
--- a/lib/bup/rm.py
+++ b/lib/bup/rm.py
@@ -116,10 +116,9 @@ def bup_rm(repo, paths, compression=6, verbosity=None):
assert(saves)
updated_refs[b'refs/heads/' + branch] = rm_saves(saves, writer)
except BaseException as ex:
- if writer:
- with pending_raise(ex):
- writer.abort()
- if writer:
+ with pending_raise(ex):
+ writer.abort()
+ else:
# Must close before we can update the ref(s) below.
writer.close()

diff --git a/test/int/test_client.py b/test/int/test_client.py
index b3cdba4b..66f3214a 100644
--- a/test/int/test_client.py
+++ b/test/int/test_client.py
@@ -23,76 +23,70 @@ IDX_PAT = b'/*.idx'
def test_server_split_with_indexes(tmpdir):
environ[b'BUP_DIR'] = bupdir = tmpdir
git.init_repo(bupdir)
- lw = git.PackWriter()
- c = client.Client(bupdir, create=True)
- rw = c.new_packwriter()
-
- lw.new_blob(s1)
- lw.close()
-
- rw.new_blob(s2)
- rw.breakpoint()
- rw.new_blob(s1)
- rw.close()
+ with git.PackWriter() as lw:
+ c = client.Client(bupdir, create=True)
+ with c.new_packwriter() as rw:
+ lw.new_blob(s1)
+ lw.close()

+ rw.new_blob(s2)
+ rw.breakpoint()
+ rw.new_blob(s1)

def test_multiple_suggestions(tmpdir):
environ[b'BUP_DIR'] = bupdir = tmpdir
git.init_repo(bupdir)

- lw = git.PackWriter()
- lw.new_blob(s1)
- lw.close()
- lw = git.PackWriter()
- lw.new_blob(s2)
- lw.close()
+ with git.PackWriter() as lw:
+ lw.new_blob(s1)
+ with git.PackWriter() as lw:
+ lw.new_blob(s2)
assert len(glob.glob(git.repo(b'objects/pack'+IDX_PAT))) == 2

c = client.Client(bupdir, create=True)
assert len(glob.glob(c.cachedir+IDX_PAT)) == 0
- rw = c.new_packwriter()
- s1sha = rw.new_blob(s1)
- assert rw.exists(s1sha)
- s2sha = rw.new_blob(s2)
-
- # This is a little hacky, but ensures that we test the
- # code under test. First, flush to ensure that we've
- # actually sent all the command ('receive-objects-v2')
- # and their data to the server. This may be needed if
- # the output buffer size is bigger than the data (both
- # command and objects) we're writing. To see the need
- # for this, change the object sizes at the beginning
- # of this file to be very small (e.g. 10 instead of 10k)
- c.conn.outp.flush()
-
- # Then, check if we've already received the idx files.
- # This may happen if we're preempted just after writing
- # the data, then the server runs and suggests, and only
- # then we continue in PackWriter_Remote::_raw_write()
- # and check the has_input(), in that case we'll receive
- # the idx still in the rw.new_blob() calls above.
- #
- # In most cases though, that doesn't happen, and we'll
- # get past the has_input() check before the server has
- # a chance to respond - it has to actually hash the new
- # object here, so it takes some time. So also break out
- # of the loop if the server has sent something on the
- # connection.
- #
- # Finally, abort this after a little while (about one
- # second) just in case something's actually broken.
- n = 0
- while (len(glob.glob(c.cachedir+IDX_PAT)) < 2 and
- not c.conn.has_input() and n < 10):
- time.sleep(0.1)
- n += 1
- assert len(glob.glob(c.cachedir+IDX_PAT)) == 2 or c.conn.has_input()
- rw.new_blob(s2)
- assert rw.objcache.exists(s1sha)
- assert rw.objcache.exists(s2sha)
- rw.new_blob(s3)
- assert len(glob.glob(c.cachedir+IDX_PAT)) == 2
- rw.close()
+ with c.new_packwriter() as rw:
+ s1sha = rw.new_blob(s1)
+ assert rw.exists(s1sha)
+ s2sha = rw.new_blob(s2)
+
+ # This is a little hacky, but ensures that we test the
+ # code under test. First, flush to ensure that we've
+ # actually sent all the command ('receive-objects-v2')
+ # and their data to the server. This may be needed if
+ # the output buffer size is bigger than the data (both
+ # command and objects) we're writing. To see the need
+ # for this, change the object sizes at the beginning
+ # of this file to be very small (e.g. 10 instead of 10k)
+ c.conn.outp.flush()
+
+ # Then, check if we've already received the idx files.
+ # This may happen if we're preempted just after writing
+ # the data, then the server runs and suggests, and only
+ # then we continue in PackWriter_Remote::_raw_write()
+ # and check the has_input(), in that case we'll receive
+ # the idx still in the rw.new_blob() calls above.
+ #
+ # In most cases though, that doesn't happen, and we'll
+ # get past the has_input() check before the server has
+ # a chance to respond - it has to actually hash the new
+ # object here, so it takes some time. So also break out
+ # of the loop if the server has sent something on the
+ # connection.
+ #
+ # Finally, abort this after a little while (about one
+ # second) just in case something's actually broken.
+ n = 0
+ while (len(glob.glob(c.cachedir+IDX_PAT)) < 2 and
+ not c.conn.has_input() and n < 10):
+ time.sleep(0.1)
+ n += 1
+ assert len(glob.glob(c.cachedir+IDX_PAT)) == 2 or c.conn.has_input()
+ rw.new_blob(s2)
+ assert rw.objcache.exists(s1sha)
+ assert rw.objcache.exists(s2sha)
+ rw.new_blob(s3)
+ assert len(glob.glob(c.cachedir+IDX_PAT)) == 2
assert len(glob.glob(c.cachedir+IDX_PAT)) == 3


@@ -101,17 +95,15 @@ def test_dumb_client_server(tmpdir):
git.init_repo(bupdir)
open(git.repo(b'bup-dumb-server'), 'w').close()

- lw = git.PackWriter()
- lw.new_blob(s1)
- lw.close()
+ with git.PackWriter() as lw:
+ lw.new_blob(s1)

c = client.Client(bupdir, create=True)
- rw = c.new_packwriter()
- assert len(glob.glob(c.cachedir+IDX_PAT)) == 1
- rw.new_blob(s1)
- assert len(glob.glob(c.cachedir+IDX_PAT)) == 1
- rw.new_blob(s2)
- rw.close()
+ with c.new_packwriter() as rw:
+ assert len(glob.glob(c.cachedir+IDX_PAT)) == 1
+ rw.new_blob(s1)
+ assert len(glob.glob(c.cachedir+IDX_PAT)) == 1
+ rw.new_blob(s2)
assert len(glob.glob(c.cachedir+IDX_PAT)) == 2


@@ -119,15 +111,14 @@ def test_midx_refreshing(tmpdir):
environ[b'BUP_DIR'] = bupdir = tmpdir
git.init_repo(bupdir)
c = client.Client(bupdir, create=True)
- rw = c.new_packwriter()
- rw.new_blob(s1)
- p1base = rw.breakpoint()
- p1name = os.path.join(c.cachedir, p1base)
- s1sha = rw.new_blob(s1) # should not be written; it's already in p1
- s2sha = rw.new_blob(s2)
- p2base = rw.close()
+ with c.new_packwriter() as rw:
+ rw.new_blob(s1)
+ p1base = rw.breakpoint()
+ p1name = os.path.join(c.cachedir, p1base)
+ s1sha = rw.new_blob(s1) # should not be written; it's already in p1
+ s2sha = rw.new_blob(s2)
+ p2base = rw.close()
p2name = os.path.join(c.cachedir, p2base)
- del rw

pi = git.PackIdxList(bupdir + b'/objects/pack')
assert len(pi.packs) == 2
diff --git a/test/int/test_git.py b/test/int/test_git.py
index 7965c60c..f5eab44c 100644
--- a/test/int/test_git.py
+++ b/test/int/test_git.py
@@ -117,18 +117,18 @@ def test_packs(tmpdir):
git.init_repo(bupdir)
git.verbose = 1

- w = git.PackWriter()
- w.new_blob(os.urandom(100))
- w.new_blob(os.urandom(100))
- w.abort()
-
- w = git.PackWriter()
- hashes = []
- nobj = 1000
- for i in range(nobj):
- hashes.append(w.new_blob(b'%d' % i))
- log('\n')
- nameprefix = w.close()
+ with git.PackWriter() as w:
+ w.new_blob(os.urandom(100))
+ w.new_blob(os.urandom(100))
+ w.abort()
+
+ with git.PackWriter() as w:
+ hashes = []
+ nobj = 1000
+ for i in range(nobj):
+ hashes.append(w.new_blob(b'%d' % i))
+ log('\n')
+ nameprefix = w.close()
print(repr(nameprefix))
WVPASS(os.path.exists(nameprefix + b'.pack'))
WVPASS(os.path.exists(nameprefix + b'.idx'))
@@ -163,11 +163,11 @@ def test_pack_name_lookup(tmpdir):
hashes = []

for start in range(0,28,2):
- w = git.PackWriter()
- for i in range(start, start+2):
- hashes.append(w.new_blob(b'%d' % i))
- log('\n')
- idxnames.append(os.path.basename(w.close() + b'.idx'))
+ with git.PackWriter() as w:
+ for i in range(start, start+2):
+ hashes.append(w.new_blob(b'%d' % i))
+ log('\n')
+ idxnames.append(os.path.basename(w.close() + b'.idx'))

r = git.PackIdxList(packdir)
WVPASSEQ(len(r.packs), 2)
@@ -310,31 +310,30 @@ def test_new_commit(tmpdir):
git.init_repo(bupdir)
git.verbose = 1

- w = git.PackWriter()
- tree = os.urandom(20)
- parent = os.urandom(20)
- author_name = b'Author'
- author_mail = b'author@somewhere'
- adate_sec = 1439657836
- cdate_sec = adate_sec + 1
- committer_name = b'Committer'
- committer_mail = b'committer@somewhere'
- adate_tz_sec = cdate_tz_sec = None
- commit = w.new_commit(tree, parent,
- b'%s <%s>' % (author_name, author_mail),
- adate_sec, adate_tz_sec,
- b'%s <%s>' % (committer_name, committer_mail),
- cdate_sec, cdate_tz_sec,
- b'There is a small mailbox here')
- adate_tz_sec = -60 * 60
- cdate_tz_sec = 120 * 60
- commit_off = w.new_commit(tree, parent,
+ with git.PackWriter() as w:
+ tree = os.urandom(20)
+ parent = os.urandom(20)
+ author_name = b'Author'
+ author_mail = b'author@somewhere'
+ adate_sec = 1439657836
+ cdate_sec = adate_sec + 1
+ committer_name = b'Committer'
+ committer_mail = b'committer@somewhere'
+ adate_tz_sec = cdate_tz_sec = None
+ commit = w.new_commit(tree, parent,
b'%s <%s>' % (author_name, author_mail),
adate_sec, adate_tz_sec,
b'%s <%s>' % (committer_name, committer_mail),
cdate_sec, cdate_tz_sec,
b'There is a small mailbox here')
- w.close()
+ adate_tz_sec = -60 * 60
+ cdate_tz_sec = 120 * 60
+ commit_off = w.new_commit(tree, parent,
+ b'%s <%s>' % (author_name, author_mail),
+ adate_sec, adate_tz_sec,
+ b'%s <%s>' % (committer_name, committer_mail),
+ cdate_sec, cdate_tz_sec,
+ b'There is a small mailbox here')

commit_items = git.get_commit_items(hexlify(commit), git.cp())
local_author_offset = localtime(adate_sec).tm_gmtoff
--
2.30.2

Rob Browning

unread,
Sep 18, 2021, 2:43:10 PM9/18/21
to bup-...@googlegroups.com, Bas Stottelaar
From: Bas Stottelaar <bassto...@gmail.com>

When an exception occurs, __del__ is invoked by the interpretor, to
perform cleanup. It seems that since Python 3.4, the behaviour has
changed, and also prohibits invocations of open(..) (source:
https://stackoverflow.com/a/29737870). Instead, contextmanager API
should be used (source: https://stackoverflow.com/a/26544629), which
seems to be in place already.

This should fix exception messages such as 'NameError: name 'open'
is not defined'.

Signed-off-by: Bas Stottelaar <bassto...@gmail.com>
---
lib/bup/git.py | 3 ---
1 file changed, 3 deletions(-)

diff --git a/lib/bup/git.py b/lib/bup/git.py
index ce8041a7..051d0bfd 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -727,9 +727,6 @@ class PackWriter:
self.max_pack_objects = max_pack_objects if max_pack_objects \
else max(1, self.max_pack_size // 5000)

- def __del__(self):
- self.close()
-
def __enter__(self):
return self

--
2.30.2

Reply all
Reply to author
Forward
0 new messages