I code a webapp in
google-app-engine in python and I use cursors in my pagination to go to the next page. The pagination should also let the user go backwards and for example 20 pages forward (but maybe not to the last page). Previously appengine API couldn't let the user go backwards in a good way except caching the previous page and there was no good or easy way to go to the middle of a result set or 20 pages forward in one go.
Is my best bet to do like google and fetch 10 or 20 pages and then use caching to split the pages and forget about achieving a way to go to the middle of a large result set? I agree there is no clear rationale why a user would want to go to the last page or to the middle of a result set that has 100 pages of 50 items each.
I'm going to implement sorting by date as the default sorting and also sorting by price ascending and descending (the webapp is an online classifieds site similar to craigslist where you can search and page through classified ads).
My somewhat dirty code that I'm going to clean is the following which achieves paging to the next page by using the cursor method. I beg your pardon if the code is very dirty but it's the first write without refactoring.
def find_documents3(url, query_string, doc_limit, cursor, indexname=_INDEX_NAME):
try:
date_desc = search.SortExpression(expression='date',
direction=search.SortExpression.DESCENDING,
default_value=datetime(1999, 01, 01))
hr_desc = search.SortExpression(expression='hour',
direction=search.SortExpression.DESCENDING,
default_value=1)
min_desc = search.SortExpression(expression='minute',
direction=search.SortExpression.DESCENDING,
default_value=1)
# Sort up to matching results by subject in descending order
sort = search.SortOptions(expressions=[date_desc, hr_desc,
min_desc], limit=ACCURACY)
# Set query options
options = search.QueryOptions(limit=doc_limit, cursor=cursor,
sort_options=sort,
number_found_accuracy=10000,
# returned_fields=['title', 'city', 'region','category', 'adID', 'date','price', 'type', 'company_ad', 'adID', 'cityID','regionID', 'hour','minute'],
#snippeted_fields=['text']
)
if "127" not in url:
query_string = query_string + ' url:' + url
query = search.Query(query_string=query_string, options=options)
'''
if url == 'www.kewlbusiness.com':
_INDEX_NAME='kewlbusiness'
elif url == 'www.qlbusiness.com':
_INDEX_NAME='qlbusiness'
elif url == 'www.new-ads.com':
_INDEX_NAME='new-ads'
elif url == 'www.ads-somali.com':
_INDEX_NAME='ads-somali'
elif url == 'www.klokaffar.se' or url == '127.0.0.1:8080' :
_INDEX_NAME='klokaffar'
'''
logging.info('indexname '+indexname)
index = search.Index(name=indexname)
# Execute the query
return index.search(query)
except search.PutError as e:
logging.exception('caught PutError %s', e)
except search.InternalError as e:
logging.exception('caught InternalError %s', e)
except search.DeleteError as e:
logging.exception('caught DeleteError %s', e)
except search.TransientError as e:
logging.exception('caught TransientError %s', e)
except search.InvalidRequest as e:
logging.exception('caught InvalidRequest %s', e)
except search.Error as e:
logging.exception('caught unknown error %s', e)
return None
class RegionSearch(SearchBaseHandler):
"""Handles regional search requests."""
def get(self):
"""Handles a get request with a query."""
logging.info('i regionsearch')
lastpageresultset = None
previouspageresultset = None
firstpageresults = None
category = None
cityentity = None
next_cursor = None
country = ''
url = ''
indexname = _INDEX_NAME
if self.request.host.find('qlbusiness') > -1:
country = 'USA'
indexname = 'qlbusiness'
elif self.request.host.find('koolbusiness') > -1:
country = 'India'
setIndex('koolbusiness')
elif self.request.host.find('new-ads') > -1:
country = 'Philippines'
indexname = 'new-ads'
elif self.request.host.find('kewlbusiness') > -1:
country = 'Pakistan'
indexname = 'kewlbusiness'
elif self.request.host.find('montao') > -1:
country = 'Brasil'
indexname = 'montao'
elif self.request.host.find('morocco-business') > -1:
country = 'Morocco'
indexname = 'morocco-business'
elif self.request.host.find('businessafrikaans') > -1:
country = 'South Africa'
indexname = 'businessafrikaans'
elif self.request.host.find('qewlbusiness') > -1:
country = 'Malaysia'
indexname = 'qewlbusiness'
elif self.request.host.find('klokaffar') > -1:
country = 'Sverige'
indexname = 'klokaffar'
elif self.request.host.find('montao') > -1:
country = 'Brasil'
indexname = 'montao'
elif self.request.host.find('127') > -1:
country = 'Sverige'
indexname = 'klokaffar'
number_returned = 0
regionname = None
cityname = None
regionentity = None
region = None
cursor = self.request.get('cursor')
uri = urlparse(self.request.uri)
query = ''
regionID = regionid = self.request.get('regionid', 0)
logging.info('i regionsearch '+str(regionID))
cityID = cityid = self.request.get('cityid', 0)
categoryID = categoryid = self.request.get('category', 0)
if uri.query:
tquery = parse_qs(uri.query)
try:
query = tquery['query'][0]
cursor = tquery['cursor'][0]
except KeyError, err:
query = ''
# logging.debug('KeyError')
next_cursor_urlsafe = self.request.get('cursor')
query = self.request.get('query')
memcache_key = 'memcachedkey'
try:
offsetval = int(self.request.get('offset', 0))
except ValueError:
offsetval = 0
logging.info('i regionsearch 2 '+ indexname)
# Try get region ID and/or cityID and categoryID
if regionid or query.find('regionID') > -1:
query = query.strip()
regionID = re.sub("^regionID=(\d+).*", r'\1', query)
memcache_key = 'region%s' % str(regionID)
region = Region.get_by_id(long(regionID))
regionname = region.name
logging.info('i regionsearch '+regionname)
if regionid:
regionID = regionid
region = Region.get_by_id(long(regionID))
regionname = region.name
logging.info('i regionsearch '+regionname)
if cityid or query.find('cityID') > -1:
cityID = re.sub("^.*cityID=(\d+).*", r'\1', query)
if cityid: cityID = cityid
city = montaomodel.City.get_by_id(long(cityID))
cityID = city.key().id()
cityentity = city
cityname = city.name
region = Region.get_by_id(long(city.region.key().id()))
regionID = region.key().id()
if categoryid or query.find('category') > -1:
categoryID = re.sub("^.*category=(\d+).*", r'\1', query)
if categoryid: categoryID = categoryid
logging.info('category')
results = find_documents3(self.request.host, 'category='+categoryid, 50, search.Cursor(web_safe_string=next_cursor_urlsafe),
indexname)
logging.info('i4 regionsearch next'+str(regionID)+self.request.get('next'))
if not self.request.get('o') and not self.request.get('next') and not self.request.get(
'firstpageview') == 'f1' and not self.request.get('prevprev') and not self.request.get(
'previouspageview') == 'previous' and cursor:
#results = find_documents3(self.request.host, query, 50, search.Cursor(web_safe_string=next_cursor_urlsafe), indexname)
memcache_key = memcache_key + str(query)
data = memcache.get(memcache_key)
if data is not None:
memcache.add(memcache_key + 'Fs', data, 36000)
memcache.add(memcache_key, results, 36000)
firstpageresults = data
results = data
else:
results = find_documents3(self.request.host, query, 50, search.Cursor(), indexname)
memcache.add(memcache_key + 'Fs', results, 36000)
memcache.add(memcache_key, results, 36000)
firstpageresults = results
elif self.request.get('firstpageview') == 'f1':
memcache_key = memcache_key + str(query)
data = memcache.get(memcache_key + 'F')
if data is not None:
firstpageresults = data
results = data
else:
results = find_documents3(self.request.host, query, 50, search.Cursor(), indexname)
#memcache_key = memcache_key + str(query)
memcache.add(memcache_key, results, 36000)
memcache.add(memcache_key + 'F', results, 36000)
firstpageresults = results
elif self.request.get('next'):
logging.info('next search')
results = find_documents3(self.request.host, query, 50, search.Cursor(web_safe_string=next_cursor_urlsafe), indexname)
memcache_key = memcache_key + str(query)
memcache.add(memcache_key + 'prev', memcache.get(memcache_key), 36000)
memcache.add(memcache_key, results, 36000)
elif not categoryID and not self.request.get('previouspageview') and not self.request.get('o'):
logging.info('common case')
results = find_documents3(self.request.host, query, 50, search.Cursor(), indexname)
firstpageresults = results
elif self.request.get('o'):
size = int(self.request.get('o')) - 1
results = find_documents_offset(query, 50, size * 50)
lastpageview = self.request.get('lastpageview')
previous = self.request.get('previouspageview')
previouspageview = False
if 'previous' == previous:
previouspageview = True
sortoptions = None
paginationLast = False
if 'last' == lastpageview:
paginationLast = True
logging.info('i 6 regionsearch '+str(regionID))
if paginationLast:
date_asc = search.SortExpression(expression='date',
direction=search.SortExpression.ASCENDING,
default_value=datetime(1999, 01, 01))
hr_asc = search.SortExpression(expression='hour',
direction=search.SortExpression.ASCENDING,
default_value=1)
min_asc = search.SortExpression(expression='minute',
direction=search.SortExpression.ASCENDING,
default_value=1)
# Sort up to matching results by subject in descending order
sortoptions = search.SortOptions(expressions=[date_asc, hr_asc,
min_asc], limit=ACCURACY)
data = memcache.get(memcache_key + 'ASCENDING')
if data is not None:
results = data
else:
results = find_documents3(self.request.host, query, 50, search.Cursor(), sortoptions, indexname)
memcache.add(memcache_key + 'ASCENDING', data, 5 * 36000)
lastpageresultset = results
if previouspageview: # and cursor: results = find_previous_page(query, 50, search.Cursor(web_safe_string=next_cursor_urlsafe)):
# search for documents with timestamps that are newest that the query
memcache_key = memcache_key + str(query) + 'prev'
data = memcache.get(memcache_key + 'prev')
if data is not None:
results = data
previouspageresultset = results
if results and results.cursor: next_cursor = results.cursor.web_safe_string
namedquery = query
query = query.replace(' and company_ad=0', ''
).replace(' and company_ad=1', ''
).replace(' and category:(6010 OR 6020 OR 6030 OR 6040 OR 6090)'
, ''
).replace(' and category:(6010 OR 6020 OR 6030 OR 6040 OR 6090)'
, ''
).replace(' and category:(1020 OR 1010 OR 1030 OR 1050 OR 1080 OR 1100 OR 1090)'
, ''
).replace(' and category:(2010 OR 2030 OR 2040 OR 2080 OR 2070)'
, ''
).replace(' and category:(3040 OR 3050 OR 3030 OR 3060)'
, ''
).replace(' and category:(4010 OR 4020 OR 4040 OR 4030 OR 4090 OR 4060 OR 4070)'
, '')
query = re.sub("regionID=\d+", '', query)
query = query.replace('category and', '')
query = query.replace('type=s', '')
query = query.replace('type=w', '')
query = query.replace('type=r', '')
query = query.replace('type=b', '')
query = query.replace('cityID and', '')
query = query.replace('and ', '')
query = query.replace(' and', '')
query = query.replace('regionID', '')
query = query.replace('=', '%3D')
namedquery = namedquery.replace('=', '%3D')
query = re.sub("cityID%3D\d+", '', query)
query = re.sub("category%3D\d+", '', query)
query = query.replace(' ', ' ')
# to do: make into a dictionary for O(1) access
if int(regionid) > 0: regionname = region_id_to_name[regionid]
form = SearchForm()
form.w.choices = [
('4703187', u'Andaman & Nicobar Islands'),
('4694186', u'Andhra Pradesh'),
('4699188', u'Arunachal Pradesh'),
('4692186', u'Assam'),
('4702186', u'Bihar'),
('4698185', u'Chandigarh'),
('4676188', u'Chhattisgarh'),
('4691190', u'Dadra & Nagar Haveli'),
('4704183', u'Daman & Diu'),
('4699183', u'Delhi'),
('4702187', u'Goa'),
('4691189', u'Gujarat'),
('4700186', u'Haryana'),
('4703185', u'Himachal Pradesh'),
('4694187', u'Jammu & Kashmir'),
('4699189', u'Jharkhand'),
('4701185', u'Karnataka'),
('4695189', u'Kerala'),
('4700189', u'Lakshadweep'),
('4697186', u'Madhya Pradesh'),
('4694184', u'Maharashtra'),
('4700187', u'Manipur'),
('4703186', u'Meghalaya'),
('4698184', u'Mizoram'),
('4692187', u'Nagaland'),
('4696185', u'Orissa'),
('4676189', u'Pondicherry'),
('4693185', u'Punjab'),
('4701186', u'Rajasthan'),
('4701187', u'Sikkim'),
('4701188', u'Tamil Nadu'),
('4697187', u'Tripura'),
('4699190', u'Uttaranchal'),
('4692188', u'Uttar Pradesh'),
('4700188', u'West Bengal'),
]
if region or cityentity:
# to do:use memcache
form.area.choices = [] # to do: use memcache for the list
for cityitem in City.all().filter('region =',
region.key()).order('-vieworder').order('name'
).fetch(99999):
form.area.choices.append([str(cityitem.key().id()),
cityitem.name])
if cityentity:
form.area.data = str(cityentity.key().id())
if self.request.host.find('hipheap') > -1:
if region and (str(region.key().id()), region.name) \
in form.w_us.choices:
form.w_us.choices.remove((str(region.key().id()),
region.name))
else:
if region and (str(region.key().id()), region.name) \
in form.w.choices:
form.w.choices.remove((str(region.key().id()),
region.name))
if region: regionname = region.name
pagenumber = 1
if self.request.get('o') != '':
pagenumber = int(self.request.get('o'))
if 'last' == lastpageview:
results = lastpageresultset
elif self.request.get('firstpageview') == 'f1':
results = firstpageresults
if results:
trunc = int(results.number_found) % 50
if 'last' == lastpageview: del results.results[trunc:]
number_returned = len(results.results)
if 'last' == lastpageview:
results.results.reverse()
logging.info('i 7 regionsearch '+str(regionID))
numberofpages = math.ceil(results.number_found / 50.0)
jobs_count = get_jobs_count(region=regionID, city=cityID)
estate_count = get_estate_count(region=regionID, city=cityID)
electronics_count = get_electronics_count(region=regionID, city=cityID)
home_count = get_home_count(region=regionID, city=cityID)
leisure_count = get_leisure_count(region=regionID, city=cityID)
vehicles_count = get_vehicles_count(region=regionID, city=cityID)
#logging.info('i regionsearch end '+regionname)
template_values = {
'results': results, 'regionname': regionname,
'pagenumber': pagenumber,
'numberofpages': numberofpages,
'cursor': next_cursor,
'country': country,
'user': self.current_user,
'number_returned': number_returned,
'loggedin': self.logged_in,
'VERSION': VERSION,
'region': region,
'jobs_count': jobs_count,
'estate_count': estate_count,
'electronics_count': electronics_count,
'home_count': home_count,
'leisure_count': leisure_count,
'vehicles_count': vehicles_count,
'cityentity': cityentity,
'request': self.request,
'categoryID': categoryID,
'form': form,
'query': query, 'number_found': results.number_found,
'namedquery': namedquery,
'cityname': cityname,
'category': category,
}
if self.request.host.find('koolbusiness') > -1:
self.render_template('view-ads.html', template_values)
else:
self.render_template('koolsearch.html', template_values)