I consider that is not good.
GitCachedRepository.sync() checks whether each revision of the
repository is cached in revision table to execute a SELECT query. That
would lead performance down if too many revisions.
However, I think we could improve the sync() for already synchronized
cached git repository, to check each ref is cached before checking
whether all revisions are cached. I agree removing persistent_cache
option if improving sync() by the following patch.
This patch is for 1.0-stable:
diff --git a/tracopt/versioncontrol/git/PyGIT.py
b/tracopt/versioncontrol/git/PyGIT.py
index 47f397389..32aac9184 100644
--- a/tracopt/versioncontrol/git/PyGIT.py
+++ b/tracopt/versioncontrol/git/PyGIT.py
@@ -596,6 +596,11 @@ class Storage(object):
key=lambda (name, rev, head): (not head, name))
return [(name, rev) for name, rev, head in branches]
+ def get_refs(self):
+ for refname, rev in self.rev_cache.refs_dict.iteritems():
+ if refname != 'HEAD':
+ yield refname, rev
+
def get_commits(self):
return self.rev_cache.rev_dict
diff --git a/tracopt/versioncontrol/git/git_fs.py
b/tracopt/versioncontrol/git/git_fs.py
index 031f68c2b..c33eb7a5d 100644
--- a/tracopt/versioncontrol/git/git_fs.py
+++ b/tracopt/versioncontrol/git/git_fs.py
@@ -100,6 +100,21 @@ class GitCachedRepository(CachedRepository):
return count > 0
return False
+ def needs_sync():
+ max_holders = 999
+ revs = sorted(set(rev for refname, rev in repos.git.get_refs()))
+ for idx in xrange(0, len(revs), max_holders):
+ revs_ = revs[idx:idx + max_holders]
+ holders = ','.join(('%s',) * len(revs_))
+ args = [
self.id]
+ args.extend(revs_)
+ query = 'SELECT COUNT(*) FROM revision ' \
+ 'WHERE repos=%s AND rev IN (' + holders + ')'
+ for count, in self.env.db_query(query, args):
+ if count < len(revs_):
+ return True
+ return False
+
def traverse(rev, seen):
revs = []
merge_revs = []
@@ -121,9 +136,7 @@ class GitCachedRepository(CachedRepository):
revs[idx:idx] = traverse(rev, seen)
return revs
- while True:
- repos.sync()
- repos_youngest = repos.youngest_rev or ''
+ def sync_revs():
updated = False
seen = set()
@@ -148,9 +161,13 @@ class GitCachedRepository(CachedRepository):
if feedback:
feedback(rev)
- if updated:
- continue # sync again
+ return updated
+ while True:
+ repos.sync()
+ repos_youngest = repos.youngest_rev or ''
+ if needs_sync() and sync_revs():
+ continue # sync again
if meta_youngest != repos_youngest:
with self.env.db_transaction as db:
db("""