When reading bupm files, e.g. during repo validation, perhaps
specifically when reading old unsplit ones that are larger,
bup processes a lot of data byte-by-byte due to variable int
encodings, and thus spends a lot of time simply copying the
remaining blob data.
Track the consumed data from the original blob instead of the
remaining data, to avoid copying around the remaining data a
lot. Reportedly, this cut the time spent validating a certain
save to about 1/3rd overall.
Reported-by: Anton Khirnov <
an...@khirnov.net>
Signed-off-by: Johannes Berg <
joha...@sipsolutions.net>
---
lib/bup/vfs.py | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/lib/bup/vfs.py b/lib/bup/vfs.py
index c83f33424040..7062b595f452 100644
--- a/lib/bup/vfs.py
+++ b/lib/bup/vfs.py
@@ -210,9 +210,11 @@ class _ChunkReader:
if isdir:
self.it = _tree_chunks(repo, data, startofs)
self.blob = None
+ self.blobofs = None
else:
self.it = None
- self.blob = data[startofs:]
+ self.blob = data
+ self.blobofs = startofs
self.ofs = startofs
def next(self, size):
@@ -221,12 +223,16 @@ class _ChunkReader:
if
self.it and not self.blob:
try:
self.blob = next(
self.it)
+ self.blobofs = 0
except StopIteration:
self.it = None
if self.blob:
want = size - len(out)
- out += self.blob[:want]
- self.blob = self.blob[want:]
+ out += self.blob[self.blobofs:self.blobofs + want]
+ self.blobofs += want
+ if self.blobofs >= len(self.blob):
+ self.blob = None
+ self.blobofs = None
if not
self.it:
break
debug2('next(%d) returned %d\n' % (size, len(out)))
--
2.52.0