Add POSIX quoter and Bup-Version/Bup-Argv commit trailers

2 views
Skip to first unread message

Rob Browning

unread,
Jul 18, 2025, 2:36:25 PMJul 18
to bup-...@googlegroups.com
Now that POSIX sh makes it possible via the addition of $'' style
quoting, add enc_sh() to POSIX quote any bytes as a single line, and
use that to add a Bup-Argv commit header, replacing the previous
python repr encoded command, and also include the Bup-Version.

See also git-interpret-trailers(1).

Proposed for main.

--
Rob Browning
rlb @defaultvalue.org and @debian.org
GPG as of 2011-07-10 E6A9 DA3C C9FD 1FF8 C676 D2C4 C0F0 39E9 ED1B 597A
GPG as of 2002-11-03 14DD 432F AE39 534D B592 F9A0 25C8 D377 8C7E 73A4

Rob Browning

unread,
Jul 18, 2025, 2:36:25 PMJul 18
to bup-...@googlegroups.com
Replace the existing python list repr() command in the commit messages
with a Bup-Command trailer (git-interpret-trailers(1)), POSIX quoted
via enc_sh(), and include a Bup-Version trailer with the current bup
version.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/cmd/get.py | 3 ++-
lib/bup/cmd/save.py | 10 ++++------
lib/bup/cmd/split.py | 5 +++--
lib/bup/commit.py | 10 ++++++++++
note/main.md | 8 ++++++++
5 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/lib/bup/cmd/get.py b/lib/bup/cmd/get.py
index 5d6a7088..5cdfaa45 100644
--- a/lib/bup/cmd/get.py
+++ b/lib/bup/cmd/get.py
@@ -5,6 +5,7 @@ from stat import S_ISDIR
import os, sys, textwrap, time

from bup import compat, git, client, vfs
+from bup.commit import commit_message
from bup.compat import argv_bytes
from bup.config import derive_repo_addr
from bup.git import MissingObject, get_cat_data, parse_commit, walk_object
@@ -449,7 +450,7 @@ def handle_append(item, src_repo, dest_repo, opt):
if item.src.type == 'tree':
get_random_item(item.spec.src, src_oidx, src_repo, dest_repo, opt)
parent = item.dest.hash
- msg = b'bup save\n\nGenerated by command:\n%r\n' % compat.get_argvb()
+ msg = commit_message(b'bup get', compat.get_argvb())
userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
now = time.time()
commit = dest_repo.write_commit(item.src.hash, parent,
diff --git a/lib/bup/cmd/save.py b/lib/bup/cmd/save.py
index f673b898..d7b7fad9 100644
--- a/lib/bup/cmd/save.py
+++ b/lib/bup/cmd/save.py
@@ -5,7 +5,8 @@ import math, os, stat, sys, time

from bup import hashsplit, options, index, client, metadata
from bup import hlinkdb
-from bup.compat import argv_bytes
+from bup.commit import commit_message
+from bup.compat import argv_bytes, get_argvb
from bup.config import ConfigError, derive_repo_addr
from bup.hashsplit import \
(GIT_MODE_TREE,
@@ -423,13 +424,10 @@ def save_tree(opt, reader, hlink_db, msr, repo, split_trees, split_cfg):


def commit_tree(tree, parent, date, argv, repo):
- # Strip b prefix from python 3 bytes reprs to preserve previous format
- msgcmd = b'[%s]' % b', '.join([repr(argv_bytes(x))[1:].encode('ascii')
- for x in argv])
- msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
return repo.write_commit(tree, parent, userline, date, None,
- userline, date, None, msg)
+ userline, date, None,
+ commit_message(b'bup save', get_argvb()))


def main(argv):
diff --git a/lib/bup/cmd/split.py b/lib/bup/cmd/split.py
index 6209e1a4..5c9861b0 100644
--- a/lib/bup/cmd/split.py
+++ b/lib/bup/cmd/split.py
@@ -5,6 +5,7 @@ from os import environb as environ
import os, sys, time

from bup import compat, hashsplit, git, options, client
+from bup.commit import commit_message
from bup.compat import argv_bytes
from bup.config import ConfigError, derive_repo_addr
from bup.hashsplit import \
@@ -148,10 +149,10 @@ def split(opt, files, parent, out, split_cfg, *,

commit = None
if opt.commit or opt.name:
- msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
commit = new_commit(tree, parent, userline, opt.date,
- None, userline, opt.date, None, msg)
+ None, userline, opt.date, None,
+ commit_message(b'bup split', compat.get_argvb()))
if opt.commit:
out.write(hexlify(commit) + b'\n')

diff --git a/lib/bup/commit.py b/lib/bup/commit.py
index 554f6c17..3601d31a 100644
--- a/lib/bup/commit.py
+++ b/lib/bup/commit.py
@@ -5,6 +5,8 @@ import re

from bup.compat import dataclass
from bup.helpers import utc_offset_str
+from bup.io import enc_sh
+from bup.version import version


def parse_tz_offset(s):
@@ -123,3 +125,11 @@ def create_commit_blob(tree, parent,
l.append(b'')
l.append(msg)
return b'\n'.join(l)
+
+
+def commit_message(message, command):
+ message = message.rstrip()
+ return b'\n'.join((message,
+ b'',
+ b'Bup-Version: %s' % version,
+ b'Bup-Argv: %s' % b' '.join(map(enc_sh, command))))
diff --git a/note/main.md b/note/main.md
index d3a445da..abf02b5d 100644
--- a/note/main.md
+++ b/note/main.md
@@ -136,6 +136,14 @@ General
longer print paths while scanning the repository because earlier git
(and then our use of git) didn't allow otherwise.

+* The commit message format has changed to place the command in a
+ POSIX quoted `Bup-Argv` trailer (git-interpret-trailers(1)) and the
+ version in a `Bup-Version` trailer, but note that the format is not
+ settled, i.e. may continue to change. The command quoting avoids
+ quoting arguments when possible, single quotes when there's no
+ single quote or newline, and falls back to `$'...'` quoting
+ otherwise.
+
Bugs
----

--
2.47.2

Rob Browning

unread,
Jul 18, 2025, 2:36:25 PMJul 18
to bup-...@googlegroups.com
Add enc_sh() to minimally POSIX encode values, adding no quotes when
possible, using single quotes when sufficient, and falling back to $''
when required, for example, when the value includes newlines.

For now, conservatively match git's behavior when core.quotePath is
false, e.g. by encoding all control characters, and all bytes >= 0x7f,
keeping the output compatible with ASCII superset encodings.

cf. https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/io.py | 77 +++++++++++++++++++++++++++++++++++++++++++++
test/int/test_io.py | 59 ++++++++++++++++++++++++++++++++++
2 files changed, 136 insertions(+)
create mode 100644 test/int/test_io.py

diff --git a/lib/bup/io.py b/lib/bup/io.py
index cc92f1f8..3c20ea87 100644
--- a/lib/bup/io.py
+++ b/lib/bup/io.py
@@ -81,6 +81,83 @@ def byte_stream(file):
return file.buffer


+def _make_enc_sh_map():
+ m = [None] * 256
+ for i in range(7): m[i] = br'\x%02x' % i
+ m[7] = br'\a'
+ m[8] = br'\b'
+ m[9] = br'\t'
+ m[10] = br'\n'
+ m[11] = br'\v'
+ m[12] = br'\f'
+ m[13] = br'\r'
+ for i in range(14, 27): m[i] = br'\x%02x' % i
+ m[27] = br'\e' # ESC
+ for i in range(28, 32): m[i] = br'\x%02x' % i
+ m[39] = br"\'"
+ m[92] = br'\\'
+ for i in range(127, 256): m[i] = br'\x%02x' % i
+ return m
+
+_enc_sh_map = _make_enc_sh_map()
+
+def enc_dsq(val):
+ """Encode val in POSIX $'...' (dollar-single-quote) format."""
+ # https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04
+ result = [b"$'"]
+ part_start = 0
+ i = 0
+
+ def finish_part():
+ nonlocal result, i, part_start
+ if i != part_start:
+ result.append(val[part_start:i])
+ part_start = i = i + 1
+
+ encoding = _enc_sh_map
+ while i < len(val):
+ b = val[i]
+ enc = encoding[b]
+ if enc:
+ finish_part()
+ result.append(enc)
+ else:
+ i += 1
+ finish_part()
+ result.append(b"'")
+ return b''.join(result)
+
+def enc_sh(val):
+ """Minimally POSIX quote val as a single line. Use no quotes if
+ possible, single quotes if val doesn't contain single quotes or
+ newline, otherwise dollar-single-quote.
+ https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02
+
+ For now, like git with core.quotePath set to false, this
+ conservatively hex escapes all bytes with the high bit set,
+ keeping the output compatible with any encoding that's compatible
+ with ASCII, e.g. UTF-8, Latin-1, etc.
+
+ """
+ #pylint: disable=consider-using-in
+ assert isinstance(val, bytes), val
+ if val == b'':
+ return b"''"
+ need_sq = False
+ need_dsq = False
+ for c in val: # 32 is space
+ if c < 32 or c == b"'"[0]:
+ need_dsq = True
+ break
+ # This set is everything from POSIX except ' and \n (handled above).
+ if c in b'|&;<>()$`\\" \t*?[]^!#~=%{,}':
+ need_sq = True
+ if need_dsq:
+ return enc_dsq(val)
+ if need_sq:
+ return b"'%s'" % val
+ return val
+
def path_msg(x):
"""Return a string representation of a path."""
# FIXME: configurability (might git-config quotePath be involved?)
diff --git a/test/int/test_io.py b/test/int/test_io.py
new file mode 100644
index 00000000..a6e30ca5
--- /dev/null
+++ b/test/int/test_io.py
@@ -0,0 +1,59 @@
+
+from wvpytest import *
+
+from bup.io import enc_dsq, enc_sh
+
+
+def test_enc_dsq():
+ def enc_byte(b):
+ bb = bytes([b])
+ sym = {b'\a': br'\a',
+ b'\b': br'\b',
+ b'\t': br'\t',
+ b'\n': br'\n',
+ b'\v': br'\v',
+ b'\f': br'\f',
+ b'\r': br'\r',
+ b'\x1b': br'\e'}
+ sub = sym.get(bb)
+ if sub:
+ return sub
+ if bb == b"'":
+ return br"\'"
+ if bb == b'\\':
+ return br'\\'
+ if b >= 127 or b < 7 or (b > 13 and b < 27) or (b > 27 and b < 32):
+ return br'\x%02x' % b
+ return bb
+
+ def enc(bv):
+ result = [b"$'"]
+ for b in bv:
+ result.append(enc_byte(b))
+ result.append(b"'")
+ return b''.join(result)
+
+ for i in range(1, 256):
+ bi = bytes([i])
+ wvpasseq(enc(bi), enc_dsq(bi))
+ v = b'foo' + bi
+ wvpasseq(enc(v), enc_dsq(v))
+ v = bi + b'foo'
+ wvpasseq(enc(v), enc_dsq(v))
+ v = b'foo' + bi + b'bar'
+ wvpasseq(enc(v), enc_dsq(v))
+
+ assert br"$'x'" == enc_dsq(b'x')
+ assert br"$'\n'" == enc_dsq(b'\n')
+ assert br"$'\x03'" == enc_dsq(b'\x03')
+
+def test_enc_sh():
+ assert br"''" == enc_sh(b'')
+ assert br"'a|b'" == enc_sh(b'a|b')
+ assert br"$'\n'" == enc_sh(b'\n')
+ assert br"$'\''" == enc_sh(b"'")
+ assert br"$'\x00'" == enc_sh(b'\0')
+ for needs_dsq in range(32):
+ assert enc_dsq(b'%c' % needs_dsq) == enc_sh(needs_dsq.to_bytes(1, 'big'))
+ for needs_sq in br'|&;<>()$`\" *?[]^!#~=%{,}':
+ assert b"'%c'" % needs_sq == enc_sh(needs_sq.to_bytes(1, 'big'))
--
2.47.2

Rob Browning

unread,
Jul 18, 2025, 2:36:25 PMJul 18
to bup-...@googlegroups.com
Signed-off-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/commit.py | 117 ++++++++++++++++++++++++++++++++++++
lib/bup/git.py | 116 +----------------------------------
test/int/test_commit.py | 130 ++++++++++++++++++++++++++++++++++++++++
test/int/test_git.py | 114 -----------------------------------
4 files changed, 250 insertions(+), 227 deletions(-)
create mode 100644 lib/bup/commit.py
create mode 100644 test/int/test_commit.py

diff --git a/lib/bup/commit.py b/lib/bup/commit.py
new file mode 100644
index 00000000..5d863fd4
--- /dev/null
+++ b/lib/bup/commit.py
@@ -0,0 +1,117 @@
+
+from binascii import hexlify
+from collections import namedtuple
+import re
+
+from bup.helpers import utc_offset_str
+
+
+def parse_tz_offset(s):
+ """UTC offset in seconds."""
+ tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
+ if s[0] == b'-'[0]:
+ return - tz_off
+ return tz_off
+
+
+def parse_commit_gpgsig(sig):
+ """Return the original signature bytes.
+
+ i.e. with the "gpgsig " header and the leading space character on
+ each continuation line removed.
+
+ """
+ if not sig:
+ return None
+ assert sig.startswith(b'gpgsig ')
+ sig = sig[7:]
+ return sig.replace(b'\n ', b'\n')
+
+# FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
+# Make sure that's authoritative.
+
+# See also
+# https://github.com/git/git/blob/master/Documentation/technical/signature-format.txt
+# The continuation lines have only one leading space.
+
+_start_end_char = br'[^ .,:;<>"\'\0\n]'
+_content_char = br'[^\0\n<>]'
+_safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \
+ % (_start_end_char,
+ _start_end_char, _content_char, _start_end_char)
+_tz_rx = br'[-+]\d\d[0-5]\d'
+_parent_rx = br'(?:parent [abcdefABCDEF0123456789]{40}\n)'
+# Assumes every following line starting with a space is part of the
+# mergetag. Is there a formal commit blob spec?
+_mergetag_rx = br'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]*)*)'
+_commit_rx = re.compile(br'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
+(?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
+committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
+(?P<gpgsig>gpgsig .*\n(?: .*\n)*)?
+(?P<message>(?:.|\n)*)''' % (_parent_rx,
+ _safe_str_rx, _safe_str_rx, _tz_rx,
+ _safe_str_rx, _safe_str_rx, _tz_rx,
+ _mergetag_rx))
+_parent_hash_rx = re.compile(br'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
+
+# Note that the author_sec and committer_sec values are (UTC) epoch
+# seconds, and for now the mergetag is not included.
+CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
+ 'author_name', 'author_mail',
+ 'author_sec', 'author_offset',
+ 'committer_name', 'committer_mail',
+ 'committer_sec', 'committer_offset',
+ 'gpgsig',
+ 'message'])
+
+def parse_commit(content):
+ commit_match = re.match(_commit_rx, content)
+ if not commit_match:
+ raise Exception('cannot parse commit %r' % content)
+ matches = commit_match.groupdict()
+ return CommitInfo(tree=matches['tree'],
+ parents=re.findall(_parent_hash_rx, matches['parents']),
+ author_name=matches['author_name'],
+ author_mail=matches['author_mail'],
+ author_sec=int(matches['asec']),
+ author_offset=parse_tz_offset(matches['atz']),
+ committer_name=matches['committer_name'],
+ committer_mail=matches['committer_mail'],
+ committer_sec=int(matches['csec']),
+ committer_offset=parse_tz_offset(matches['ctz']),
+ gpgsig=parse_commit_gpgsig(matches['gpgsig']),
+ message=matches['message'])
+
+
+def _local_git_date_str(epoch_sec):
+ return b'%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
+
+def _git_date_str(epoch_sec, tz_offset_sec):
+ offs = tz_offset_sec // 60
+ return b'%d %s%02d%02d' \
+ % (epoch_sec,
+ b'+' if offs >= 0 else b'-',
+ abs(offs) // 60,
+ abs(offs) % 60)
+
+
+def create_commit_blob(tree, parent,
+ author, adate_sec, adate_tz,
+ committer, cdate_sec, cdate_tz,
+ msg):
+ if adate_tz is not None:
+ adate_str = _git_date_str(adate_sec, adate_tz)
+ else:
+ adate_str = _local_git_date_str(adate_sec)
+ if cdate_tz is not None:
+ cdate_str = _git_date_str(cdate_sec, cdate_tz)
+ else:
+ cdate_str = _local_git_date_str(cdate_sec)
+ l = []
+ if tree: l.append(b'tree %s' % hexlify(tree))
+ if parent: l.append(b'parent %s' % hexlify(parent))
+ if author: l.append(b'author %s %s' % (author, adate_str))
+ if committer: l.append(b'committer %s %s' % (committer, cdate_str))
+ l.append(b'')
+ l.append(msg)
+ return b'\n'.join(l)
diff --git a/lib/bup/git.py b/lib/bup/git.py
index b734b190..0fb11da4 100644
--- a/lib/bup/git.py
+++ b/lib/bup/git.py
@@ -6,7 +6,6 @@ interact with the Git data structures.
import os, sys, zlib, subprocess, struct, stat, re, glob
from array import array
from binascii import hexlify, unhexlify
-from collections import namedtuple
from contextlib import ExitStack
from dataclasses import replace
from itertools import islice
@@ -16,6 +15,7 @@ from sys import stderr
from typing import Optional, Union

from bup import _helpers, hashsplit, path, midx, bloom, xstat
+from bup.commit import create_commit_blob, parse_commit
from bup.compat import dataclass, environ
from bup.io import path_msg
from bup.helpers import (EXIT_FAILURE,
@@ -33,8 +33,7 @@ from bup.helpers import (EXIT_FAILURE,
progress, qprogress, stat_if_exists,
quote,
temp_dir,
- unlink,
- utc_offset_str)
+ unlink)
from bup.midx import open_midx


@@ -103,103 +102,16 @@ def git_config_get(path, option, *, opttype=None):
raise GitError('%r returned %d' % (cmd, rc))


-def parse_tz_offset(s):
- """UTC offset in seconds."""
- tz_off = (int(s[1:3]) * 60 * 60) + (int(s[3:5]) * 60)
- if s[0] == b'-'[0]:
- return - tz_off
- return tz_off
-
-def parse_commit_gpgsig(sig):
- """Return the original signature bytes.
-
- i.e. with the "gpgsig " header and the leading space character on
- each continuation line removed.
-
- """
- if not sig:
- return None
- assert sig.startswith(b'gpgsig ')
- sig = sig[7:]
- return sig.replace(b'\n ', b'\n')
-
-# FIXME: derived from http://git.rsbx.net/Documents/Git_Data_Formats.txt
-# Make sure that's authoritative.
-
-# See also
-# https://github.com/git/git/blob/master/Documentation/technical/signature-format.txt
-# The continuation lines have only one leading space.
-
-_start_end_char = br'[^ .,:;<>"\'\0\n]'
-_content_char = br'[^\0\n<>]'
-_safe_str_rx = br'(?:%s{1,2}|(?:%s%s*%s))' \
- % (_start_end_char,
- _start_end_char, _content_char, _start_end_char)
-_tz_rx = br'[-+]\d\d[0-5]\d'
-_parent_rx = br'(?:parent [abcdefABCDEF0123456789]{40}\n)'
-# Assumes every following line starting with a space is part of the
-# mergetag. Is there a formal commit blob spec?
-_mergetag_rx = br'(?:\nmergetag object [abcdefABCDEF0123456789]{40}(?:\n [^\0\n]*)*)'
-_commit_rx = re.compile(br'''tree (?P<tree>[abcdefABCDEF0123456789]{40})
-(?P<parents>%s*)author (?P<author_name>%s) <(?P<author_mail>%s)> (?P<asec>\d+) (?P<atz>%s)
-committer (?P<committer_name>%s) <(?P<committer_mail>%s)> (?P<csec>\d+) (?P<ctz>%s)(?P<mergetag>%s?)
-(?P<gpgsig>gpgsig .*\n(?: .*\n)*)?
-(?P<message>(?:.|\n)*)''' % (_parent_rx,
- _safe_str_rx, _safe_str_rx, _tz_rx,
- _safe_str_rx, _safe_str_rx, _tz_rx,
- _mergetag_rx))
-_parent_hash_rx = re.compile(br'\s*parent ([abcdefABCDEF0123456789]{40})\s*')
-
-# Note that the author_sec and committer_sec values are (UTC) epoch
-# seconds, and for now the mergetag is not included.
-CommitInfo = namedtuple('CommitInfo', ['tree', 'parents',
- 'author_name', 'author_mail',
- 'author_sec', 'author_offset',
- 'committer_name', 'committer_mail',
- 'committer_sec', 'committer_offset',
- 'gpgsig',
- 'message'])
-
-def parse_commit(content):
- commit_match = re.match(_commit_rx, content)
- if not commit_match:
- raise Exception('cannot parse commit %r' % content)
- matches = commit_match.groupdict()
- return CommitInfo(tree=matches['tree'],
- parents=re.findall(_parent_hash_rx, matches['parents']),
- author_name=matches['author_name'],
- author_mail=matches['author_mail'],
- author_sec=int(matches['asec']),
- author_offset=parse_tz_offset(matches['atz']),
- committer_name=matches['committer_name'],
- committer_mail=matches['committer_mail'],
- committer_sec=int(matches['csec']),
- committer_offset=parse_tz_offset(matches['ctz']),
- gpgsig=parse_commit_gpgsig(matches['gpgsig']),
- message=matches['message'])
-
-
def get_cat_data(cat_iterator, expected_type):
_, kind, _ = next(cat_iterator)
if kind != expected_type:
raise Exception('expected %r, saw %r' % (expected_type, kind))
return b''.join(cat_iterator)

+
def get_commit_items(id, cp):
return parse_commit(get_cat_data(cp.get(id), b'commit'))

-def _local_git_date_str(epoch_sec):
- return b'%d %s' % (epoch_sec, utc_offset_str(epoch_sec))
-
-
-def _git_date_str(epoch_sec, tz_offset_sec):
- offs = tz_offset_sec // 60
- return b'%d %s%02d%02d' \
- % (epoch_sec,
- b'+' if offs >= 0 else b'-',
- abs(offs) // 60,
- abs(offs) % 60)
-

def repo(sub = b'', repo_dir=None):
"""Get the path to the git repository or one of its subdirectories."""
@@ -857,28 +769,6 @@ def idxmerge(idxlist, final_progress=True):
return merge_iter(idxlist, 10024, pfunc, pfinal)


-def create_commit_blob(tree, parent,
- author, adate_sec, adate_tz,
- committer, cdate_sec, cdate_tz,
- msg):
- if adate_tz is not None:
- adate_str = _git_date_str(adate_sec, adate_tz)
- else:
- adate_str = _local_git_date_str(adate_sec)
- if cdate_tz is not None:
- cdate_str = _git_date_str(cdate_sec, cdate_tz)
- else:
- cdate_str = _local_git_date_str(cdate_sec)
- l = []
- if tree: l.append(b'tree %s' % hexlify(tree))
- if parent: l.append(b'parent %s' % hexlify(parent))
- if author: l.append(b'author %s %s' % (author, adate_str))
- if committer: l.append(b'committer %s %s' % (committer, cdate_str))
- l.append(b'')
- l.append(msg)
- return b'\n'.join(l)
-
-
# del/exit/close/etc. wrt parent/child?

class LocalPackStore():
diff --git a/test/int/test_commit.py b/test/int/test_commit.py
new file mode 100644
index 00000000..42081c9e
--- /dev/null
+++ b/test/int/test_commit.py
@@ -0,0 +1,130 @@
+
+from os import environb as environ
+from subprocess import check_call
+import sys
+
+from wvpytest import *
+
+from bup import git
+from bup.commit import _git_date_str, parse_commit
+from bup.helpers import readpipe
+
+
+def exc(*cmd):
+ print(repr(cmd), file=sys.stderr)
+ check_call(cmd)
+
+
+def test_commit_parsing(tmpdir):
+ def restore_env_var(name, val):
+ if val is None:
+ del environ[name]
+ else:
+ environ[name] = val
+
+ def showval(commit, val):
+ return readpipe([b'git', b'show', b'-s',
+ b'--pretty=format:%s' % val, commit]).strip()
+
+ orig_cwd = os.getcwd()
+ workdir = tmpdir + b'/work'
+ repodir = workdir + b'/.git'
+ orig_author_name = environ.get(b'GIT_AUTHOR_NAME')
+ orig_author_email = environ.get(b'GIT_AUTHOR_EMAIL')
+ orig_committer_name = environ.get(b'GIT_COMMITTER_NAME')
+ orig_committer_email = environ.get(b'GIT_COMMITTER_EMAIL')
+ environ[b'GIT_AUTHOR_NAME'] = b'bup test'
+ environ[b'GIT_COMMITTER_NAME'] = environ[b'GIT_AUTHOR_NAME']
+ environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
+ environ[b'GIT_COMMITTER_EMAIL'] = environ[b'GIT_AUTHOR_EMAIL']
+ try:
+ environ[b'GIT_DIR'] = environ[b'BUP_DIR'] = repodir
+ readpipe([b'git', b'init', workdir])
+ exc(b'git', b'symbolic-ref', b'HEAD', b'refs/heads/main')
+ git.check_repo_or_die(repodir)
+ os.chdir(workdir)
+ with open('foo', 'w') as f:
+ print('bar', file=f)
+ readpipe([b'git', b'add', b'.'])
+ readpipe([b'git', b'commit', b'-am', b'Do something',
+ b'--author', b'Someone <someone@somewhere>',
+ b'--date', b'Sat Oct 3 19:48:49 2009 -0400'])
+ commit = readpipe([b'git', b'show-ref', b'-s', b'main']).strip()
+ parents = showval(commit, b'%P')
+ tree = showval(commit, b'%T')
+ cname = showval(commit, b'%cn')
+ cmail = showval(commit, b'%ce')
+ cdate = showval(commit, b'%ct')
+ coffs = showval(commit, b'%ci')
+ coffs = coffs[-5:]
+ coff = (int(coffs[-4:-2]) * 60 * 60) + (int(coffs[-2:]) * 60)
+ if coffs[-5] == b'-'[0]:
+ coff = - coff
+ commit_items = git.get_commit_items(commit, git.cp())
+ WVPASSEQ(commit_items.parents, [])
+ WVPASSEQ(commit_items.tree, tree)
+ WVPASSEQ(commit_items.author_name, b'Someone')
+ WVPASSEQ(commit_items.author_mail, b'someone@somewhere')
+ WVPASSEQ(commit_items.author_sec, 1254613729)
+ WVPASSEQ(commit_items.author_offset, -(4 * 60 * 60))
+ WVPASSEQ(commit_items.committer_name, cname)
+ WVPASSEQ(commit_items.committer_mail, cmail)
+ WVPASSEQ(commit_items.committer_sec, int(cdate))
+ WVPASSEQ(commit_items.committer_offset, coff)
+ WVPASSEQ(commit_items.message, b'Do something\n')
+ with open(b'bar', 'wb') as f:
+ f.write(b'baz\n')
+ readpipe([b'git', b'add', '.'])
+ readpipe([b'git', b'commit', b'-am', b'Do something else'])
+ child = readpipe([b'git', b'show-ref', b'-s', b'main']).strip()
+ parents = showval(child, b'%P')
+ commit_items = git.get_commit_items(child, git.cp())
+ WVPASSEQ(commit_items.parents, [commit])
+ finally:
+ os.chdir(orig_cwd)
+ restore_env_var(b'GIT_AUTHOR_NAME', orig_author_name)
+ restore_env_var(b'GIT_AUTHOR_EMAIL', orig_author_email)
+ restore_env_var(b'GIT_COMMITTER_NAME', orig_committer_name)
+ restore_env_var(b'GIT_COMMITTER_EMAIL', orig_committer_email)
+
+
+gpgsig_example_1 = b'''tree 3fab08ade2fbbda60bef180bb8e0cc5724d6bd4d
+parent 36db87b46a95ca5079f43dfe9b72220acab7c731
+author Rob Browning <r...@defaultvalue.org> 1633397238 -0500
+committer Rob Browning <r...@defaultvalue.org> 1633397238 -0500
+gpgsig -----BEGIN PGP SIGNATURE-----
+
+ ...
+ -----END PGP SIGNATURE-----
+
+Sample signed commit.
+'''
+
+gpgsig_example_2 = b'''tree 3fab08ade2fbbda60bef180bb8e0cc5724d6bd4d
+parent 36db87b46a95ca5079f43dfe9b72220acab7c731
+author Rob Browning <r...@defaultvalue.org> 1633397238 -0500
+committer Rob Browning <r...@defaultvalue.org> 1633397238 -0500
+gpgsig -----BEGIN PGP SIGNATURE-----
+
+ ...
+ -----END PGP SIGNATURE-----
+
+
+Sample signed commit.
+'''
+
+def test_commit_gpgsig_parsing():
+ c = parse_commit(gpgsig_example_1)
+ assert c.gpgsig
+ assert c.gpgsig.startswith(b'-----BEGIN PGP SIGNATURE-----\n')
+ assert c.gpgsig.endswith(b'\n-----END PGP SIGNATURE-----\n')
+ c = git.parse_commit(gpgsig_example_2)
+ assert c.gpgsig
+ assert c.gpgsig.startswith(b'-----BEGIN PGP SIGNATURE-----')
+ assert c.gpgsig.endswith(b'\n-----END PGP SIGNATURE-----\n\n')
+
+
+def test_git_date_str():
+ WVPASSEQ(b'0 +0000', _git_date_str(0, 0))
+ WVPASSEQ(b'0 -0130', _git_date_str(0, -90 * 60))
+ WVPASSEQ(b'0 +0130', _git_date_str(0, 90 * 60))
diff --git a/test/int/test_git.py b/test/int/test_git.py
index a1385d13..83888621 100644
--- a/test/int/test_git.py
+++ b/test/int/test_git.py
@@ -278,114 +278,6 @@ def test_check_repo_or_die(tmpdir):
git.repodir = None
check_establish_default_repo_variant(tmpdir, git.check_repo_or_die, False)

-def test_commit_parsing(tmpdir):
- def restore_env_var(name, val):
- if val is None:
- del environ[name]
- else:
- environ[name] = val
-
- def showval(commit, val):
- return readpipe([b'git', b'show', b'-s',
- b'--pretty=format:%s' % val, commit]).strip()
-
- orig_cwd = os.getcwd()
- workdir = tmpdir + b'/work'
- repodir = workdir + b'/.git'
- orig_author_name = environ.get(b'GIT_AUTHOR_NAME')
- orig_author_email = environ.get(b'GIT_AUTHOR_EMAIL')
- orig_committer_name = environ.get(b'GIT_COMMITTER_NAME')
- orig_committer_email = environ.get(b'GIT_COMMITTER_EMAIL')
- environ[b'GIT_AUTHOR_NAME'] = b'bup test'
- environ[b'GIT_COMMITTER_NAME'] = environ[b'GIT_AUTHOR_NAME']
- environ[b'GIT_AUTHOR_EMAIL'] = b'bup@a425bc70a02811e49bdf73ee56450e6f'
- environ[b'GIT_COMMITTER_EMAIL'] = environ[b'GIT_AUTHOR_EMAIL']
- try:
- environ[b'GIT_DIR'] = environ[b'BUP_DIR'] = repodir
- readpipe([b'git', b'init', workdir])
- exc(b'git', b'symbolic-ref', b'HEAD', b'refs/heads/main')
- git.check_repo_or_die(repodir)
- os.chdir(workdir)
- with open('foo', 'w') as f:
- print('bar', file=f)
- readpipe([b'git', b'add', b'.'])
- readpipe([b'git', b'commit', b'-am', b'Do something',
- b'--author', b'Someone <someone@somewhere>',
- b'--date', b'Sat Oct 3 19:48:49 2009 -0400'])
- commit = readpipe([b'git', b'show-ref', b'-s', b'main']).strip()
- parents = showval(commit, b'%P')
- tree = showval(commit, b'%T')
- cname = showval(commit, b'%cn')
- cmail = showval(commit, b'%ce')
- cdate = showval(commit, b'%ct')
- coffs = showval(commit, b'%ci')
- coffs = coffs[-5:]
- coff = (int(coffs[-4:-2]) * 60 * 60) + (int(coffs[-2:]) * 60)
- if coffs[-5] == b'-'[0]:
- coff = - coff
- commit_items = git.get_commit_items(commit, git.cp())
- WVPASSEQ(commit_items.parents, [])
- WVPASSEQ(commit_items.tree, tree)
- WVPASSEQ(commit_items.author_name, b'Someone')
- WVPASSEQ(commit_items.author_mail, b'someone@somewhere')
- WVPASSEQ(commit_items.author_sec, 1254613729)
- WVPASSEQ(commit_items.author_offset, -(4 * 60 * 60))
- WVPASSEQ(commit_items.committer_name, cname)
- WVPASSEQ(commit_items.committer_mail, cmail)
- WVPASSEQ(commit_items.committer_sec, int(cdate))
- WVPASSEQ(commit_items.committer_offset, coff)
- WVPASSEQ(commit_items.message, b'Do something\n')
- with open(b'bar', 'wb') as f:
- f.write(b'baz\n')
- readpipe([b'git', b'add', '.'])
- readpipe([b'git', b'commit', b'-am', b'Do something else'])
- child = readpipe([b'git', b'show-ref', b'-s', b'main']).strip()
- parents = showval(child, b'%P')
- commit_items = git.get_commit_items(child, git.cp())
- WVPASSEQ(commit_items.parents, [commit])
- finally:
- os.chdir(orig_cwd)
- restore_env_var(b'GIT_AUTHOR_NAME', orig_author_name)
- restore_env_var(b'GIT_AUTHOR_EMAIL', orig_author_email)
- restore_env_var(b'GIT_COMMITTER_NAME', orig_committer_name)
- restore_env_var(b'GIT_COMMITTER_EMAIL', orig_committer_email)
-
-
-gpgsig_example_1 = b'''tree 3fab08ade2fbbda60bef180bb8e0cc5724d6bd4d
-parent 36db87b46a95ca5079f43dfe9b72220acab7c731
-author Rob Browning <r...@defaultvalue.org> 1633397238 -0500
-committer Rob Browning <r...@defaultvalue.org> 1633397238 -0500
-gpgsig -----BEGIN PGP SIGNATURE-----
-
- ...
- -----END PGP SIGNATURE-----
-
-Sample signed commit.
-'''
-
-gpgsig_example_2 = b'''tree 3fab08ade2fbbda60bef180bb8e0cc5724d6bd4d
-parent 36db87b46a95ca5079f43dfe9b72220acab7c731
-author Rob Browning <r...@defaultvalue.org> 1633397238 -0500
-committer Rob Browning <r...@defaultvalue.org> 1633397238 -0500
-gpgsig -----BEGIN PGP SIGNATURE-----
-
- ...
- -----END PGP SIGNATURE-----
-
-
-Sample signed commit.
-'''
-
-def test_commit_gpgsig_parsing():
- c = git.parse_commit(gpgsig_example_1)
- assert c.gpgsig
- assert c.gpgsig.startswith(b'-----BEGIN PGP SIGNATURE-----\n')
- assert c.gpgsig.endswith(b'\n-----END PGP SIGNATURE-----\n')
- c = git.parse_commit(gpgsig_example_2)
- assert c.gpgsig
- assert c.gpgsig.startswith(b'-----BEGIN PGP SIGNATURE-----')
- assert c.gpgsig.endswith(b'\n-----END PGP SIGNATURE-----\n\n')
-

def test_new_commit(tmpdir):
environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup'
@@ -495,12 +387,6 @@ def test_list_refs(tmpdir):
WVPASSEQ(frozenset(git.list_refs(limit_to_tags=True)), expected_tags)


-def test_git_date_str():
- WVPASSEQ(b'0 +0000', git._git_date_str(0, 0))
- WVPASSEQ(b'0 -0130', git._git_date_str(0, -90 * 60))
- WVPASSEQ(b'0 +0130', git._git_date_str(0, 90 * 60))
-
-
def test_cat_pipe(tmpdir):
environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup'
src = tmpdir + b'/src'
--
2.47.2

Rob Browning

unread,
Jul 18, 2025, 2:36:26 PMJul 18
to bup-...@googlegroups.com
Reduce the helpers kitchen-sink aspect a bit before we make
some further changes.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/helpers.py | 80 +++-------------------------------------------
lib/bup/io.py | 76 +++++++++++++++++++++++++++++++++++++++++++
lib/bup/main.py | 6 ++--
lib/bup/ssh.py | 4 +--
4 files changed, 85 insertions(+), 81 deletions(-)

diff --git a/lib/bup/helpers.py b/lib/bup/helpers.py
index d9ef85d2..852fb3c6 100644
--- a/lib/bup/helpers.py
+++ b/lib/bup/helpers.py
@@ -15,7 +15,10 @@ import hashlib, heapq, math, operator, time
from bup import _helpers
from bup import io
from bup.compat import argv_bytes
-from bup.io import byte_stream, path_msg
+from bup.io import byte_stream, debug1, debug2, log, path_msg
+# pylint: disable=unused-import
+from bup.io import istty1, istty2, progress, qprogress, reprogress
+# pylint: enable=unused-import
# This function should really be in helpers, not in bup.options. But we
# want options.py to be standalone so people can include it in other projects.
from bup.options import _tty_width as tty_width
@@ -32,9 +35,6 @@ EXIT_FALSE = 1
EXIT_FAILURE = 2


-buglvl = int(os.environ.get('BUP_DEBUG', 0))
-
-
nullctx = nullcontext() # only need one

def nullcontext_if_not(manager):
@@ -188,78 +188,6 @@ def stat_if_exists(path):
return None


-# Write (blockingly) to sockets that may or may not be in blocking mode.
-# We need this because our stderr is sometimes eaten by subprocesses
-# (probably ssh) that sometimes make it nonblocking, if only temporarily,
-# leading to race conditions. Ick. We'll do it the hard way.
-def _hard_write(fd, buf):
- while buf:
- (r,w,x) = select.select([], [fd], [], None)
- if not w:
- raise IOError('select(fd) returned without being writable')
- try:
- sz = os.write(fd, buf)
- except OSError as e:
- if e.errno != errno.EAGAIN:
- raise
- assert(sz >= 0)
- buf = buf[sz:]
-
-
-_last_prog = 0
-def log(s):
- """Print a log message to stderr."""
- global _last_prog
- sys.stdout.flush()
- _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
- _last_prog = 0
-
-
-def debug1(s):
- if buglvl >= 1:
- log(s)
-
-
-def debug2(s):
- if buglvl >= 2:
- log(s)
-
-
-istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
-istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
-_last_progress = ''
-def progress(s):
- """Calls log() if stderr is a TTY. Does nothing otherwise."""
- global _last_progress
- if istty2:
- if _last_progress.endswith('\r'):
- log('\x1b[0K')
- log(s)
- _last_progress = s
-
-
-def qprogress(s):
- """Calls progress() only if we haven't printed progress in a while.
-
- This avoids overloading the stderr buffer with excess junk.
- """
- global _last_prog
- now = time.time()
- if now - _last_prog > 0.1:
- progress(s)
- _last_prog = now
-
-
-def reprogress():
- """Calls progress() to redisplay the most recent progress message.
-
- Useful after you've printed some other message that wipes out the
- progress line.
- """
- if _last_progress and _last_progress.endswith('\r'):
- progress(_last_progress)
-
-
def mkdirp(d, mode=None):
"""Recursively create directories on path 'd'.

diff --git a/lib/bup/io.py b/lib/bup/io.py
index 01e44a25..cc92f1f8 100644
--- a/lib/bup/io.py
+++ b/lib/bup/io.py
@@ -1,10 +1,86 @@

+from errno import EAGAIN
import mmap as py_mmap
+import os, select, sys, time
+
+
+# Write (blockingly) to sockets that may or may not be in blocking mode.
+# We need this because our stderr is sometimes eaten by subprocesses
+# (probably ssh) that sometimes make it nonblocking, if only temporarily,
+# leading to race conditions. Ick. We'll do it the hard way.
+def _hard_write(fd, buf):
+ while buf:
+ (r,w,x) = select.select([], [fd], [], None)
+ if not w:
+ raise IOError('select(fd) returned without being writable')
+ try:
+ sz = os.write(fd, buf)
+ except OSError as e:
+ if e.errno != EAGAIN:
+ raise
+ assert(sz >= 0)
+ buf = buf[sz:]
+
+
+_last_prog = 0
+def log(s):
+ """Print a log message to stderr."""
+ global _last_prog
+ sys.stdout.flush()
+ _hard_write(sys.stderr.fileno(), s if isinstance(s, bytes) else s.encode())
+ _last_prog = 0
+
+
+buglvl = int(os.environ.get('BUP_DEBUG', 0))
+
+def debug1(s):
+ if buglvl >= 1:
+ log(s)
+
+def debug2(s):
+ if buglvl >= 2:
+ log(s)
+
+
+istty1 = os.isatty(1) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 1)
+istty2 = os.isatty(2) or (int(os.environ.get('BUP_FORCE_TTY', 0)) & 2)
+_last_progress = ''
+def progress(s):
+ """Calls log() if stderr is a TTY. Does nothing otherwise."""
+ global _last_progress
+ if istty2:
+ if _last_progress.endswith('\r'):
+ log('\x1b[0K')
+ log(s)
+ _last_progress = s
+
+
+def qprogress(s):
+ """Calls progress() only if we haven't printed progress in a while.
+
+ This avoids overloading the stderr buffer with excess junk.
+ """
+ global _last_prog
+ now = time.time()
+ if now - _last_prog > 0.1:
+ progress(s)
+ _last_prog = now
+
+
+def reprogress():
+ """Calls progress() to redisplay the most recent progress message.
+
+ Useful after you've printed some other message that wipes out the
+ progress line.
+ """
+ if _last_progress and _last_progress.endswith('\r'):
+ progress(_last_progress)


def byte_stream(file):
return file.buffer

+
def path_msg(x):
"""Return a string representation of a path."""
# FIXME: configurability (might git-config quotePath be involved?)
diff --git a/lib/bup/main.py b/lib/bup/main.py
index 7ec8913d..fa2262c2 100755
--- a/lib/bup/main.py
+++ b/lib/bup/main.py
@@ -9,7 +9,7 @@ from importlib import import_module
from os import fsdecode
from pkgutil import iter_modules

-from bup import compat, path, helpers
+from bup import compat, io, path
from bup.compat import environ, print_exception
from bup.git import close_catpipes
from bup.helpers import \
@@ -130,8 +130,8 @@ while args:
subcmd = [b'version']
args = args[1:]
elif arg in (b'-D', b'--debug'):
- helpers.buglvl += 1
- environ[b'BUP_DEBUG'] = b'%d' % helpers.buglvl
+ io.buglvl += 1
+ environ[b'BUP_DEBUG'] = b'%d' % io.buglvl
args = args[1:]
elif arg == b'--profile':
do_profile = True
diff --git a/lib/bup/ssh.py b/lib/bup/ssh.py
index ca5df56e..e2577c22 100644
--- a/lib/bup/ssh.py
+++ b/lib/bup/ssh.py
@@ -7,7 +7,8 @@ import re

from bup import path
from bup.compat import environ
-from bup.helpers import debug1
+from bup.io import buglvl, debug1
+

def connect(rhost, port, subcmd, stderr=None):
"""Connect to 'rhost' and execute the bup subcommand 'subcmd' on it."""
@@ -15,7 +16,6 @@ def connect(rhost, port, subcmd, stderr=None):
if rhost is None or rhost == b'-':
argv = [path.exe(), subcmd]
else:
- buglvl = int(environ.get(b'BUP_DEBUG', 0))
force_tty = int(environ.get(b'BUP_FORCE_TTY', 0))
argv = [b'ssh']
if port:
--
2.47.2

Rob Browning

unread,
Jul 20, 2025, 3:48:08 PMJul 20
to bup-...@googlegroups.com
Rob Browning <r...@defaultvalue.org> writes:

> Now that POSIX sh makes it possible via the addition of $'' style
> quoting, add enc_sh() to POSIX quote any bytes as a single line, and
> use that to add a Bup-Argv commit header, replacing the previous
> python repr encoded command, and also include the Bup-Version.
>
> See also git-interpret-trailers(1).

Pushed to main.
Reply all
Reply to author
Forward
0 new messages