[PATCH 1/4] vint: test more error cases; raise ValueError for invalid args

0 views
Skip to first unread message

Rob Browning

unread,
May 7, 2026, 6:29:15 PMMay 7
to bup-...@googlegroups.com
Test some additional error cases and switch from Exception to
ValueError for invalid pack/unpack format strings or mismatched format
strings and values. Move the pack overflow fallback out of the
exception handler. If nothing else, this avoids needing a "raise from
None" for the ValueError.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---

Pushed to main.

lib/bup/_helpers.c | 4 ++--
lib/bup/vint.py | 36 ++++++++++++++++++++----------------
test/int/test_vint.py | 22 ++++++++++++++++++++++
3 files changed, 44 insertions(+), 18 deletions(-)

diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c
index a6195ba4..fc4fe0b4 100644
--- a/lib/bup/_helpers.c
+++ b/lib/bup/_helpers.c
@@ -1113,7 +1113,7 @@ static unsigned int vuint_encode(long long val, char *buf)
unsigned int len = 0;

if (val < 0) {
- PyErr_Format(PyExc_ValueError, "cannot encode negative value %llu", val);
+ PyErr_Format(PyExc_ValueError, "cannot encode negative value %lld", val);
return 0;
}

@@ -1757,7 +1757,7 @@ static PyObject *bup_limited_vint_pack(PyObject *self, PyObject *args)
break;
}
default:
- PyErr_Format(PyExc_Exception, "unknown xpack format string item %c",
+ PyErr_Format(PyExc_ValueError, "unknown format string item '%c'",
fmt[i]);
goto error;
}
diff --git a/lib/bup/vint.py b/lib/bup/vint.py
index 00f5b3c1..1969d110 100644
--- a/lib/bup/vint.py
+++ b/lib/bup/vint.py
@@ -147,7 +147,7 @@ def skip_bvec(port):

def send(port, types, *args):
if len(types) != len(args):
- raise Exception('number of arguments does not match format string')
+ raise ValueError('number of arguments does not match format string')
for (type, value) in zip(types, args):
if type == 'V':
write_vuint(port, value)
@@ -156,7 +156,8 @@ def send(port, types, *args):
elif type == 's':
write_bvec(port, value)
else:
- raise Exception('unknown xpack format string item "' + type + '"')
+ raise ValueError(f'unknown format string item {type!r}')
+

def recv(port, types):
result = []
@@ -164,29 +165,32 @@ def recv(port, types):
if type == 'V': decode = read_vuint
elif type == 'v': decode = read_vint
elif type == 's': decode = read_bvec
- else: raise Exception(f'unknown xunpack format string item {type!r}')
+ else: raise ValueError(f'unknown format string item {type!r}')
x = decode(port)
if x is None:
- raise EOFError(f'EOF while reading xunpack type {type!r}')
+ raise EOFError(f'EOF while reading type {type!r}')
result.append(x)
return result

+
def pack(types, *args):
try:
return _helpers.limited_vint_pack(types, args)
except OverflowError:
- assert len(types) == len(args)
- ret = []
- for typ, value in zip(types, args):
- if typ == 'V':
- ret.append(encode_vuint(value))
- elif typ == 'v':
- ret.append(encode_vint(value))
- elif typ == 's':
- ret.append(encode_bvec(value))
- else:
- assert False
- return b''.join(ret)
+ pass
+ assert len(types) == len(args)
+ ret = []
+ for typ, value in zip(types, args):
+ if typ == 'V':
+ ret.append(encode_vuint(value))
+ elif typ == 'v':
+ ret.append(encode_vint(value))
+ elif typ == 's':
+ ret.append(encode_bvec(value))
+ else:
+ raise ValueError(f'unknown format string item {typ!r}')
+ return b''.join(ret)
+

def unpack(types, data):
port = BytesIO(data)
diff --git a/test/int/test_vint.py b/test/int/test_vint.py
index 332aadf2..87488499 100644
--- a/test/int/test_vint.py
+++ b/test/int/test_vint.py
@@ -2,6 +2,7 @@
from io import BytesIO
from itertools import combinations_with_replacement

+from pytest import raises
from wvpytest import *

from bup import vint
@@ -14,6 +15,8 @@ def encode_and_decode_vuint(x):


def test_vuint():
+ with raises(ValueError, match='cannot encode negative value -1'):
+ vint.encode_vuint(-1)
for x in (0, 1, 42, 128, 10**16, 10**100):
WVPASSEQ(encode_and_decode_vuint(x), x)
WVEXCEPT(Exception, vint.write_vuint, BytesIO(), -1)
@@ -66,6 +69,21 @@ def test_bvec():
WVEXCEPT(EOFError, vint.skip_bvec, BytesIO(b'\x01'))


+def test_send_recv():
+ b = BytesIO()
+ with raises(ValueError, match='number of arguments does not match format string'):
+ vint.send(b, 'v')
+ with raises(ValueError, match='unknown format string'):
+ vint.send(b, '?', 0)
+ b = BytesIO()
+ vint.send(b, 'v', 0)
+ b.seek(0)
+ with raises(ValueError, match='unknown format string'):
+ vint.recv(b, '?')
+ with raises(EOFError, match="EOF while reading type 'V'"):
+ vint.recv(b, 'vV')
+
+
def pack_and_unpack(types, *values):
data = vint.pack(types, *values)
return vint.unpack(types, data)
@@ -94,3 +112,7 @@ def test_pack_and_unpack():
WVEXCEPT(Exception, vint.pack, 'x', 1)
WVEXCEPT(Exception, vint.unpack, 's', '')
WVEXCEPT(Exception, vint.unpack, 'x', '')
+ with raises(ValueError, match=r"unknown format string item '\?'"):
+ vint.pack('?', 0)
+ with raises(ValueError, match=r"unknown format string item '\?'"):
+ vint.pack('v?', 10000000000000000000000000000000000000, 0)
--
2.47.3

Rob Browning

unread,
May 7, 2026, 6:29:16 PMMay 7
to bup-...@googlegroups.com
For now, instead of returning none for an empty --bool value, raise a
ConfigError. i.e. for

[bup]
some_value =

We can always relax this later if it seems warranted. Add and use
bup.io.cmd_msg() to format the git command in the error message.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---

Pushed to main.

lib/bup/git.py | 7 ++++---
lib/bup/io.py | 8 ++++++++
test/int/sample.conf | 1 +
test/int/test_git.py | 6 +++++-
test/int/test_io.py | 8 +++++++-
5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/lib/bup/git.py b/lib/bup/git.py
index 833cf278..eef14fd6 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -19,6 +19,7 @@ from bup import _helpers, hashsplit, midx, xstat
from bup.bloom import BloomInvalid, BloomNotFound, BloomReader
from bup.commit import create_commit_blob, parse_commit
from bup.compat import dataclass_frozen_for_testing, environ
+from bup.config import ConfigError
from bup.helpers import (EXIT_FAILURE,
OBJECT_EXISTS,
ObjectLocation,
@@ -35,7 +36,7 @@ from bup.helpers import (EXIT_FAILURE,
stopped,
temp_dir,
unlink)
-from bup.io import enc_shs, path_msg
+from bup.io import cmd_msg, enc_shs, path_msg
from bup.midx import open_midx
import bup.path

@@ -84,12 +85,12 @@ def git_config_get(path, option, *, opttype=None):
return int(r)
if opttype == 'bool': # any positive int is true for git --bool
if not r:
- return None
+ raise ConfigError(f'no output from {cmd_msg(cmd)}')
if r in (b'0', b'false'):
return False
if r in (b'1', b'true'):
return True
- raise GitError(f'{cmd!r} returned invalid boolean value {r}')
+ raise ConfigError(f'got invalid value {path_msg(r)} from {cmd_msg(cmd)}')
return r
if rc == 1:
return None
diff --git a/lib/bup/io.py b/lib/bup/io.py
index e6b2a9b4..e1e4361b 100644
--- a/lib/bup/io.py
+++ b/lib/bup/io.py
@@ -258,6 +258,14 @@ def path_msg(x):
"""Return a string representation of a path."""
return enc_shs(fsdecode(x))

+def cmd_msg(cmd):
+ """Return an sh quoted representation of the cmd list (bytes or
+ strings) as a string. The individual elements are quoted, not the
+ whole command. e.g. ("ls", "x y") becomes "ls 'x y'".
+
+ """
+ return ' '.join(enc_shs(x if isinstance(x, str) else fsdecode(x))
+ for x in cmd)

def walk_path_msg(ref_name, item_path):
# walk ref of
diff --git a/test/int/sample.conf b/test/int/sample.conf
index e735f8e2..ecdffc36 100644
--- a/test/int/sample.conf
+++ b/test/int/sample.conf
@@ -10,3 +10,4 @@ isfalse1 = false
isfalse2 = 0
isbad = ok
hex = 0x777
+isempty =
diff --git a/test/int/test_git.py b/test/int/test_git.py
index a5229596..35224be8 100644
--- a/test/int/test_git.py
+++ b/test/int/test_git.py
@@ -6,11 +6,13 @@ from time import localtime
import struct, os
import pytest

+from pytest import raises
from wvpytest import *
import buptest

from bup import git, path
from bup.compat import environ
+from bup.config import ConfigError
from bup.helpers import OBJECT_EXISTS, finalized, log, mkdirp


@@ -481,13 +483,15 @@ def test_config(tmpdir):

WVPASSEQ(git.git_config_get(no_such_file, b'bup.foo'), None)

- WVEXCEPT(git.GitError, git_config_get, b'bup.isbad', opttype='bool')
+ WVEXCEPT(ConfigError, git_config_get, b'bup.isbad', opttype='bool')
WVEXCEPT(git.GitError, git_config_get, b'bup.isbad', opttype='int')
WVPASSEQ(git_config_get(b'bup.isbad'), b'ok')
WVPASSEQ(True, git_config_get(b'bup.istrue1', opttype='bool'))
WVPASSEQ(True, git_config_get(b'bup.istrue2', opttype='bool'))
WVPASSEQ(False, git_config_get(b'bup.isfalse1', opttype='bool'))
WVPASSEQ(False, git_config_get(b'bup.isfalse2', opttype='bool'))
+ with raises(ConfigError, match='no output from'):
+ git_config_get(b'bup.isempty', opttype='bool')
WVPASSEQ(None, git_config_get(b'bup.nosuchkey', opttype='bool'))
WVPASSEQ(1, git_config_get(b'bup.istrue1', opttype='int'))
WVPASSEQ(0, git_config_get(b'bup.isfalse2', opttype='int'))
diff --git a/test/int/test_io.py b/test/int/test_io.py
index ee0ccf19..c2d6180c 100644
--- a/test/int/test_io.py
+++ b/test/int/test_io.py
@@ -1,7 +1,8 @@

from wvpytest import *

-from bup.io import enc_dsq, enc_dsqs, enc_sh, enc_shs, qsql_id, qsql_str
+from bup.io import \
+ cmd_msg, enc_dsq, enc_dsqs, enc_sh, enc_shs, qsql_id, qsql_str


def _dsq_enc_byte(b):
@@ -102,6 +103,11 @@ def test_enc_shs():
assert r"$'\xb5'" \
== enc_shs(b'\xb5'.decode('utf-8', errors='surrogateescape'))

+def test_cmd_msg():
+ assert 'ls' == cmd_msg(('ls',))
+ assert 'ls' == cmd_msg((b'ls',))
+ assert "ls 'x y'" == cmd_msg(('ls', 'x y'))
+
def test_qsql_id():
assert '""""' == qsql_id('"')
assert '"x"' == qsql_id('x')
--
2.47.3

Rob Browning

unread,
May 7, 2026, 6:29:17 PMMay 7
to bup-...@googlegroups.com
This allows the misuse checks in save and split via
hashsplit.configuration() to work properly.

Parse git config integers ourselves because --type int doesn't provide
any way to distinguish a bad value from some other, more serious
failure (it just exits with status 128). Wrap strtoimax to handle the
conversion in order to more easily match what git does, since it calls
strtoimax with a base of 0, and so handles octal and hex, even though
it doesn't mention that in git-config(1).

Thanks to Johannes Berg for tracking that down.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---

Pushed to main.

lib/bup/_helpers.c | 22 +++++++++++++++++++++
lib/bup/git.py | 47 ++++++++++++++++++++++++++++++++++----------
test/int/test_git.py | 34 +++++++++++++++++++++++++++++++-
3 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/lib/bup/_helpers.c b/lib/bup/_helpers.c
index fc4fe0b4..27941e76 100644
--- a/lib/bup/_helpers.c
+++ b/lib/bup/_helpers.c
@@ -1774,6 +1774,26 @@ static PyObject *bup_limited_vint_pack(PyObject *self, PyObject *args)
return NULL;
}

+static PyObject *
+bup_strtoimax(PyObject *self, PyObject *args)
+{
+ char *str;
+ if (!PyArg_ParseTuple(args, cstr_argf, &str))
+ return NULL;
+ errno = 0;
+ char *end;
+ intmax_t v = strtoimax(str, &end, 0);
+ if (end == str)
+ return Py_BuildValue("(Oi)", Py_None, 0);
+ if (v == INTMAX_MAX && errno == ERANGE)
+ return Py_BuildValue("(Oi)", PyFloat_FromDouble(Py_INFINITY), 0);
+ if (v == INTMAX_MIN && errno == ERANGE)
+ return Py_BuildValue("(Oi)", PyFloat_FromDouble(-Py_INFINITY), 0);
+ if (errno)
+ return PyErr_SetFromErrno(PyExc_IOError);
+ return Py_BuildValue("(On)", BUP_LONGISH_TO_PY(v), end - str);
+}
+
static PyMethodDef helper_methods[] = {
{ "write_sparsely", bup_write_sparsely, METH_VARARGS,
"Write buf excepting zeros at the end. Return trailing zero count." },
@@ -1861,6 +1881,8 @@ static PyMethodDef helper_methods[] = {
{ "vint_encode", bup_vint_encode, METH_VARARGS, "encode an int to vint" },
{ "limited_vint_pack", bup_limited_vint_pack, METH_VARARGS,
"Try to pack vint/vuint/str, throwing OverflowError when unable." },
+ { "strtoimax", bup_strtoimax, METH_VARARGS,
+ "Return value and index of first byte after value as per strtoimax(1)" },
{ NULL, NULL, 0, NULL }, // sentinel
};

diff --git a/lib/bup/git.py b/lib/bup/git.py
index eef14fd6..1200d166 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -69,32 +69,59 @@ def _raise_git_cmd_error(cmd, status):
def repo_config_file(path):
return os.path.join(path or repo(), b'config')

+
+def parse_git_int(v):
+ """Parse v exactly as git config --type int would."""
+ mag, suffix_i = _helpers.strtoimax(v)
+ if mag == float('+inf'):
+ raise ConfigError(f'--type int value {path_msg(v)} too large')
+ if mag == float('-inf'):
+ raise ConfigError(f'--type int value {path_msg(v)} too small')
+ if mag is None:
+ raise ConfigError(f'unrecognized --type int value {path_msg(v)}')
+ unit = v[suffix_i:]
+ if not unit: return mag
+ if unit in (b'k', b'K'): return mag * 1024
+ if unit in (b'm', b'M'): return mag * 1024 * 1024
+ if unit in (b'g', b'G'): return mag * 1024 * 1024 * 1024
+ raise ConfigError(f'unrecognized --type int value {path_msg(v)}')
+
+
def git_config_get(path, option, *, opttype=None):
+ """Return the git config value for path. Return None if the path
+ does not exist. Return an integer for int, boolean for bool, and
+ bytes otherwise. This is stricter than git in that any
+ opttype='bool' values must be 0, 1, true or false.
+
+ """
+ assert opttype in ('bool', 'int', None)
+ # We don't use --type int/bool because git doesn't currently
+ # provide a way to distinguish a bad value from other errors, and
+ # for bool, we're stricter than git.
cmd = [b'git', b'config', b'--file', path, b'--null']
- if opttype == 'int':
- cmd.extend([b'--int'])
- else:
- assert opttype in ('bool', None)
cmd.extend([b'--get', option])
- cp = subprocess.run(cmd, stdout=subprocess.PIPE)
+ cp = subprocess.run(cmd, stdout=PIPE)
# with --null, git writes out a trailing \0 after the value
r = cp.stdout[:-1]
rc = cp.returncode
if rc == 0:
+ if not r:
+ raise ConfigError(f'no output from {cmd_msg(cmd)}')
if opttype == 'int':
- return int(r)
+ val = parse_git_int(r)
+ if val is not None:
+ return val
+ raise ConfigError(f'got invalid int value {path_msg(r)} from {cmd_msg(cmd)}')
if opttype == 'bool': # any positive int is true for git --bool
- if not r:
- raise ConfigError(f'no output from {cmd_msg(cmd)}')
if r in (b'0', b'false'):
return False
if r in (b'1', b'true'):
return True
- raise ConfigError(f'got invalid value {path_msg(r)} from {cmd_msg(cmd)}')
+ raise ConfigError(f'got invalid bool value {path_msg(r)} from {cmd_msg(cmd)}')
return r
if rc == 1:
return None
- raise GitError('%r returned %d' % (cmd, rc))
+ raise GitError(f'unexpected exit {rc} for {cmd_msg(cmd)}')


def get_commit_items(ref, get_ref):
diff --git a/test/int/test_git.py b/test/int/test_git.py
index 35224be8..d15b0281 100644
--- a/test/int/test_git.py
+++ b/test/int/test_git.py
@@ -469,6 +469,38 @@ def test_midx_close(tmpdir):
# check that we don't have it open anymore
WVPASSEQ(False, b'deleted' in fn)

+
+def test_parse_git_int():
+ parse = git.parse_git_int
+ with raises(ConfigError, match="unrecognized --type int value ''"):
+ parse(b'')
+ with raises(ConfigError, match="unrecognized --type int value x"):
+ parse(b'x')
+ with raises(ConfigError, match="unrecognized --type int value 0.1"):
+ parse(b'0.1')
+ with raises(ConfigError, match="value .* too large"):
+ parse(b'10000000000000000000000000')
+ with raises(ConfigError, match="value .* too small"):
+ parse(b'-10000000000000000000000000')
+ assert 0 == parse(b'0')
+ assert 1 == parse(b'1')
+ assert 1 == parse(b' 1')
+ assert 1024 == parse(b'1k')
+ assert 1024**2 == parse(b'1m')
+ assert 1024**3 == parse(b'1g')
+ assert 1 == parse(b'+1')
+ assert 1024 == parse(b'+1k')
+ assert 1024**2 == parse(b'+1m')
+ assert 1024**3 == parse(b'+1g')
+ assert -1 == parse(b'-1')
+ assert -1024 == parse(b'-1k')
+ assert -1024**2 == parse(b'-1m')
+ assert -1024**3 == parse(b'-1g')
+ with raises(ConfigError, match="unrecognized --type int value 1q"):
+ parse(b'1q')
+ assert 43008 == parse(b'42k')
+
+
def test_config(tmpdir):
cfg_file = os.path.join(os.path.dirname(__file__), 'sample.conf')
no_such_file = os.path.join(os.path.dirname(__file__), 'nosuch.conf')
@@ -484,7 +516,7 @@ def test_config(tmpdir):
WVPASSEQ(git.git_config_get(no_such_file, b'bup.foo'), None)

WVEXCEPT(ConfigError, git_config_get, b'bup.isbad', opttype='bool')
- WVEXCEPT(git.GitError, git_config_get, b'bup.isbad', opttype='int')
+ WVEXCEPT(ConfigError, git_config_get, b'bup.isbad', opttype='int')
WVPASSEQ(git_config_get(b'bup.isbad'), b'ok')
WVPASSEQ(True, git_config_get(b'bup.istrue1', opttype='bool'))
WVPASSEQ(True, git_config_get(b'bup.istrue2', opttype='bool'))
--
2.47.3

Reply all
Reply to author
Forward
0 new messages