[xappy] r609 committed - utils/verify_cache.py: Expand verification routine to check the...

0 views
Skip to first unread message

xa...@googlecode.com

unread,
Mar 4, 2010, 1:08:10 PM3/4/10
to xappy-...@googlegroups.com
Revision: 609
Author: boulton.rj
Date: Thu Mar 4 10:07:11 2010
Log: utils/verify_cache.py: Expand verification routine to check the
cache much more thoroughly, and report errors more clearly.
http://code.google.com/p/xappy/source/detail?r=609

Modified:
/trunk/ChangeLog
/trunk/utils/verify_cache.py

=======================================
--- /trunk/ChangeLog Thu Mar 4 08:40:18 2010
+++ /trunk/ChangeLog Thu Mar 4 10:07:11 2010
@@ -1,3 +1,8 @@
+Thu Mar 04 18:06:34 GMT 2010 Richard Boulton <ric...@tartarus.org>
+
+ * utils/verify_cache.py: Expand verification routine to check the
+ cache much more thoroughly, and report errors more clearly.
+
Thu Mar 04 16:39:51 GMT 2010 Richard Boulton <ric...@tartarus.org>

* utils/verify_cache.py: Add a utility to verify the integrity of a
=======================================
--- /trunk/utils/verify_cache.py Thu Mar 4 08:40:18 2010
+++ /trunk/utils/verify_cache.py Thu Mar 4 10:07:11 2010
@@ -24,15 +24,35 @@
import xapian
import xappy.cachemanager

-def default_fail_handler(msg):
- """Default handler for a failure.
-
- `msg` is the message describing the failure.
+class Handler(object):
+ """Default handler context.

"""
- print "Error:", msg
-
-def verify(dbpath, fail_cb):
+
+ def __init__(self):
+ self.failcount = 0
+ self.failmax = 100
+
+ def fail_handler(self, msg):
+ """Handler for a failure.
+
+ `msg` is the message describing the failure.
+
+ """
+ print "Error:", msg
+ self.failcount +=1
+ if self.failcount >= self.failmax:
+ raise RuntimeError("Too many failures - aborting verification")
+
+ def info_handler(self, msg):
+ """Handler for an informative message.
+
+ `msg` is the message.
+
+ """
+ print msg
+
+def verify(dbpath, fail_cb, info_cb):
"""Verify that a cache stored in a database has been applied correctly.

- `dbpath` is the path to the database.
@@ -42,30 +62,87 @@
times. It may raise an exception to escape from the verification
routine
if it doesn't wish the verification to continue.

+ - `info_cb` is the callback to call for informative messages. It is
+ supplied with a single string of text giving information about the
+ verification process.
+
+ Returns True if the cache verified ok, False if any errors were found.
+
"""
db = xapian.Database(dbpath)
cm = xappy.cachemanager.XapianCacheManager(dbpath)
-
+ ok = True
+
+ info_cb("Starting verification for %r" % dbpath)
+ info_cb("Checking for querystr->queryid mapping")
queryids = {}
for querystr in cm.iter_query_strs():
queryid = cm.get_queryid(querystr)
if queryid in queryids:
fail_cb("queryid %d occurs multiple times: for both querystr "
"%r and %r" % (queryid, querystr, queryids[queryid]))
+ ok = False
queryids[queryid] = querystr

+ info_cb("Checking queryids")
for queryid in cm.iter_queryids():
+ if queryid not in queryids:
+ fail_cb("queryid %d not found in querystr->queryid mapping" %
+ queryid)
+ ok = False
+
+ info_cb("Checking values stored in cached documents")
+ for queryid in cm.iter_queryids():
hits = cm.get_hits(queryid)
- storedhits = []
- for item in db.valuestream(queryid + 10000):
- storedhits.append(item.value, item.docid)
- storedhits.sort()
- storedhits = [item[1] for item in storedhits]
- if hits != storedhits:
- fail_cb("Stored hits do not match hits in cache for
queryid %d: "
- "cache has %r, stored hits are %r" %
- (queryid, hits, storedhits))
+ prevvalue = None
+ slot = queryid + 10000
+ missing_values = []
+ for docid in hits:
+ doc = db.get_document(docid)
+ value = doc.get_value(slot)
+ if value == '':
+ missing_values.append(docid)
+ continue
+ if prevvalue is not None:
+ if value >= prevvalue:
+ fail_cb("Values in wrong order for queryid %d: %r "
+ "followed by %r" % (queryid, prevvalue, value))
+ ok = False
+ continue
+ prevvalue = value
+ if len(missing_values) != 0:
+ if len(missing_values) > 10:
+ fail_cb("%d/%d missing values in slot %d for queryid %d: "
+ "starting with %r" %
+ (len(missing_values), len(hits), slot, queryid,
+ missing_values[:10]))
+ else:
+ fail_cb("%d/%d missing values in slot %d for
queryid %d: %r" %
+ (len(missing_values), len(hits), slot, queryid,
+ missing_values))
+ ok = False
+
+ info_cb("Checking valuestreams match cached values")
+ if not hasattr(db, 'valuestream'):
+ info_cb("Skipping check - xapian version in use does not support "
+ "valuestream iterators")
+ else:
+ for queryid in cm.iter_queryids():
+ hits = cm.get_hits(queryid)
+ storedhits = []
+ for item in db.valuestream(queryid + 10000):
+ storedhits.append(item.value, item.docid)
+ storedhits.sort()
+ storedhits = [item[1] for item in storedhits]
+ if hits != storedhits:
+ fail_cb("Stored hits do not match hits in cache for
queryid "
+ "%d: cache has %r, stored hits are %r" %
+ (queryid, hits, storedhits))
+ ok = False
+
+ return ok

if __name__ == '__main__':
import sys
- verify(sys.argv[1], default_fail_handler)
+ handler = Handler()
+ verify(sys.argv[1], handler.fail_handler, handler.info_handler)

Reply all
Reply to author
Forward
0 new messages