[PATCH v3 6/6] fsck: report errors for all mismatched or empty pack-*.par2 files

0 views
Skip to first unread message

Rob Browning

unread,
Jun 22, 2024, 5:23:28 PM (10 days ago) Jun 22
to bup-...@googlegroups.com
When any recovery files exist for a packfile, insist they all do, and
disallow empty ones. Previously, we only checked whether the
top-level PACK.idx index file was empty, ignoring the vol files.

Check again at the end, to catch any problems with the "par2 create"
tempdir sandboxing.

Signed-off-by: Rob Browning <r...@defaultvalue.org>
Tested-by: Rob Browning <r...@defaultvalue.org>
---
lib/bup/cmd/fsck.py | 70 ++++++++++++++++++++++++++++++++++++++-------
note/0.33.x.md | 7 +++--
test/ext/test-fsck | 26 +++++++++++++++++
3 files changed, 91 insertions(+), 12 deletions(-)

diff --git a/lib/bup/cmd/fsck.py b/lib/bup/cmd/fsck.py
index 1309543d2..e95d89ffd 100644
--- a/lib/bup/cmd/fsck.py
+++ b/lib/bup/cmd/fsck.py
@@ -9,9 +9,9 @@ import glob, os, subprocess, sys
from bup import options, git
from bup.compat import argv_bytes
from bup.helpers \
- import (EXIT_FALSE, EXIT_TRUE,
+ import (EXIT_FAILURE, EXIT_FALSE, EXIT_TRUE, EXIT_SUCCESS,
Sha1, chunkyreader, istty2, log, progress, temp_dir)
-from bup.io import byte_stream
+from bup.io import byte_stream, path_msg


par2_ok = 0
@@ -112,6 +112,44 @@ def par2_generate(base):
assert expected == remaining
return rc

+def par2_recovery_file_status(stem):
+ """Return True if recovery files exist for the stem and we should
+ assume they're acceptable. Return None if none of them exist, and
+ return False (after logging appropriate errors) if something
+ appears to be wrong with them, for example, if any of the files
+ are empty, or if the set of files is incomplete.
+
+ """
+ # Look for empty *.par2 files because C-c during "par2 create" may
+ # leave them when interrupted, and previous versions of bup didn't
+ # run par2 create in a tempdir to compensate. For now, we decide
+ # the existing data is OK if the pack-HASH.par2 index file exists,
+ # and no pack-HASH*.par2 files are empty.
+ # cf. https://github.com/Parchive/par2cmdline/issues/84
+ paths = [stem + suffix for suffix in (b'.par2', b'.vol000+200.par2')]
+ empty = []
+ missing = set(paths)
+ for path in paths:
+ try:
+ st = os.stat(path)
+ if st.st_size == 0:
+ empty.append(path)
+ else:
+ missing.remove(path)
+ except FileNotFoundError:
+ pass
+ for path in empty:
+ log(f'error: empty par2 file - {path_msg(path)}\n')
+ for path in missing:
+ log(f'error: missing par2 file - {path_msg(path)}\n')
+ if empty:
+ return False
+ if len(missing) == 2:
+ return None
+ if not missing:
+ return True
+ return False
+
def par2_verify(base):
return par2(b'verify', [b'--', base], verb_floor=3)

@@ -158,6 +196,9 @@ def do_pack(base, last, par2_exists, out):
else:
action_result = b'repaired'
log('%s par2 repair: succeeded (0)\n' % last)
+ # FIXME: for this to be useful, we need to define
+ # the semantics, e.g. what's promised when we have
+ # this and a competing error from another pack?
code = 100
else:
action_result = b'failed'
@@ -237,22 +278,25 @@ def main(argv):

sys.stdout.flush()
out = byte_stream(sys.stdout)
- code = 0
+ code = EXIT_SUCCESS
count = 0
outstanding = {}
for stem in pack_stems:
base = os.path.basename(stem)
- par2_exists = os.path.exists(stem + b'.par2')
- if par2_exists and os.stat(stem + b'.par2').st_size == 0:
- par2_exists = 0
+ par2_status = par2_recovery_file_status(stem)
+ if par2_status == False:
+ if code == EXIT_SUCCESS:
+ code = EXIT_FAILURE
+ continue
sys.stdout.flush() # Not sure we still need this, but it'll flush out too
debug('fsck: checking %r (%s)\n'
- % (base, par2_ok and par2_exists and 'par2' or 'git'))
+ % (base, par2_ok and par2_status and 'par2' or 'git'))
if not opt.verbose:
progress('fsck (%d/%d)\r' % (count, len(extra)))

if not opt.jobs:
- nc = do_pack(stem, base, par2_exists, out)
+ nc = do_pack(stem, base, par2_status, out)
+ # FIXME: is first wins what we really want (cf. repair's 100)
code = code or nc
count += 1
else:
@@ -268,7 +312,7 @@ def main(argv):
outstanding[pid] = 1
else: # child
try:
- sys.exit(do_pack(stem, base, par2_exists, out))
+ sys.exit(do_pack(stem, base, par2_status, out))
except Exception as e:
log('exception: %r\n' % e)
sys.exit(99)
@@ -282,7 +326,13 @@ def main(argv):
count += 1
if not opt.verbose:
progress('fsck (%d/%d)\r' % (count, len(extra)))
-
if istty2:
debug('fsck done. \n')
+
+ # double-check (e.g. for (unlikely) problems with generate tmpdir renames)
+ for stem in pack_stems:
+ if par2_recovery_file_status(stem) == False:
+ if code == EXIT_SUCCESS:
+ code = EXIT_FAILURE
+
sys.exit(code)
diff --git a/note/0.33.x.md b/note/0.33.x.md
index 1fbd153fa..bd64c1676 100644
--- a/note/0.33.x.md
+++ b/note/0.33.x.md
@@ -7,7 +7,10 @@ May require attention
* The `par2` command (invoked by `bup fsck -g`) may generate empty
recovery files if interrupted (say via C-c). To mitigate this, bup
now runs `par2` in a temporary directory, and only moves the
- recovery files into place if the generation succeeds. See also
+ recovery files into place if the generation succeeds. It will also
+ look for any empty par2 files, or incomplete sets, associated with
+ packfiles that it has been asked to examine. If found, they will
+ provoke an error. See also
https://github.com/Parchive/par2cmdline/issues/84

* Previously, any `bup on REMOTE ...` commands that attempted to read
@@ -30,4 +33,4 @@ Bugs
Thanks to (at least)
====================

-...
+Greg Troxel, Johannes Berg, and ...
diff --git a/test/ext/test-fsck b/test/ext/test-fsck
index 73e734d9c..94428b274 100755
--- a/test/ext/test-fsck
+++ b/test/ext/test-fsck
@@ -49,6 +49,32 @@ WVFAIL bup fsck --quick -rvv -j9
if bup fsck --par2-ok; then
WVPASS bup fsck -r # ok because of repairs from last time

+ some_idx="$(WVPASS find "$BUP_DIR" -name "pack-*.par2" -not -name "*.vol*.par2" | head -1)" || exit $?
+ some_vol="$(WVPASS find "$BUP_DIR" -name "pack-*.vol*.par2" | head -1)" || exit $?
+
+ WVPASS cp -p "$some_idx" some-pack.par2
+ WVPASS cp -p "$some_vol" some-pack.vol.par2
+
+ WVSTART 'fsck rejects empty par2 index files'
+ WVPASS echo -n > "$some_idx"
+ WVFAIL bup fsck -v
+ WVPASS test -e "$some_idx" -a ! -s "$some_idx"
+ WVFAIL bup fsck -vr
+ WVPASS test -e "$some_idx" -a ! -s "$some_idx"
+ WVFAIL bup fsck -vg
+ WVPASS test -e "$some_idx" -a ! -s "$some_idx"
+ WVPASS cp -p some-pack.par2 "$some_idx"
+
+ WVSTART 'fsck rejects empty par2 vol files'
+ WVPASS echo -n > "$some_vol"
+ WVFAIL bup fsck -v
+ WVPASS test -e "$some_vol" -a ! -s "$some_vol"
+ WVFAIL bup fsck -vr
+ WVPASS test -e "$some_vol" -a ! -s "$some_vol"
+ WVFAIL bup fsck -vg
+ WVPASS test -e "$some_vol" -a ! -s "$some_vol"
+ WVPASS cp -p some-pack.vol.par2 "$some_vol"
+
# This must do "too much" damage. Currently par2 is invoked with
# -c200, which should allow up to 200 damaged "blocks", but since
# we don't specify the block size, it's dynamically computed.
--
2.43.0

Reply all
Reply to author
Forward
0 new messages