Way to go backwards and to middle of result set?

1 view
Skip to first unread message

Dj Dac via StackOverflow

unread,
Apr 1, 2017, 1:18:05 AM4/1/17
to google-appengin...@googlegroups.com

I code a webapp in google-app-engine in python and I use cursors in my pagination to go to the next page. The pagination should also let the user go backwards and for example 20 pages forward (but maybe not to the last page). Previously appengine API couldn't let the user go backwards in a good way except caching the previous page and there was no good or easy way to go to the middle of a result set or 20 pages forward in one go. enter image description here Is my best bet to do like google and fetch 10 or 20 pages and then use caching to split the pages and forget about achieving a way to go to the middle of a large result set? I agree there is no clear rationale why a user would want to go to the last page or to the middle of a result set that has 100 pages of 50 items each.

I'm going to implement sorting by date as the default sorting and also sorting by price ascending and descending (the webapp is an online classifieds site similar to craigslist where you can search and page through classified ads).

My somewhat dirty code that I'm going to clean is the following which achieves paging to the next page by using the cursor method. I beg your pardon if the code is very dirty but it's the first write without refactoring.

def find_documents3(url, query_string, doc_limit, cursor, indexname=_INDEX_NAME):
    try:
        date_desc = search.SortExpression(expression='date',
                                          direction=search.SortExpression.DESCENDING,
                                          default_value=datetime(1999, 01, 01))

        hr_desc = search.SortExpression(expression='hour',
                                        direction=search.SortExpression.DESCENDING,
                                        default_value=1)

        min_desc = search.SortExpression(expression='minute',
                                         direction=search.SortExpression.DESCENDING,
                                         default_value=1)

        # Sort up to  matching results by subject in descending order
        sort = search.SortOptions(expressions=[date_desc, hr_desc,
                                               min_desc], limit=ACCURACY)

        # Set query options
        options = search.QueryOptions(limit=doc_limit, cursor=cursor,
                                      sort_options=sort,
                                      number_found_accuracy=10000,
                                      #  returned_fields=['title', 'city', 'region','category', 'adID', 'date','price', 'type', 'company_ad', 'adID', 'cityID','regionID', 'hour','minute'],
                                      #snippeted_fields=['text']
        )
        if "127" not in url:
            query_string = query_string + ' url:' + url
        query = search.Query(query_string=query_string, options=options)
        '''
        if url == 'www.kewlbusiness.com':
            _INDEX_NAME='kewlbusiness'

        elif url  == 'www.qlbusiness.com':
            _INDEX_NAME='qlbusiness'

        elif url == 'www.new-ads.com':
            _INDEX_NAME='new-ads'

        elif url  == 'www.ads-somali.com':
            _INDEX_NAME='ads-somali'

        elif url == 'www.klokaffar.se' or url  == '127.0.0.1:8080' :
            _INDEX_NAME='klokaffar'
        '''
        logging.info('indexname '+indexname)
        index = search.Index(name=indexname)
        # Execute the query
        return index.search(query)

    except search.PutError as e:
        logging.exception('caught PutError %s', e)

    except search.InternalError as e:
        logging.exception('caught InternalError %s', e)

    except search.DeleteError as e:
        logging.exception('caught DeleteError %s', e)

    except search.TransientError as e:
        logging.exception('caught TransientError %s', e)

    except search.InvalidRequest as e:
        logging.exception('caught InvalidRequest %s', e)

    except search.Error as e:
        logging.exception('caught unknown error  %s', e)

    return None


class RegionSearch(SearchBaseHandler):
    """Handles regional search requests."""

    def get(self):
        """Handles a get request with a query."""
        logging.info('i regionsearch')
        lastpageresultset = None
        previouspageresultset = None
        firstpageresults = None
        category = None
        cityentity = None
        next_cursor = None
        country = ''
        url = ''
        indexname = _INDEX_NAME
        if self.request.host.find('qlbusiness') > -1:
            country = 'USA'
            indexname = 'qlbusiness'

        elif self.request.host.find('koolbusiness') > -1:
            country = 'India'
            setIndex('koolbusiness')
        elif self.request.host.find('new-ads') > -1:
            country = 'Philippines'
            indexname = 'new-ads'
        elif self.request.host.find('kewlbusiness') > -1:
            country = 'Pakistan'
            indexname = 'kewlbusiness'
        elif self.request.host.find('montao') > -1:
            country = 'Brasil'
            indexname = 'montao'
        elif self.request.host.find('morocco-business') > -1:
            country = 'Morocco'
            indexname = 'morocco-business'
        elif self.request.host.find('businessafrikaans') > -1:
            country = 'South Africa'
            indexname = 'businessafrikaans'
        elif self.request.host.find('qewlbusiness') > -1:
            country = 'Malaysia'
            indexname = 'qewlbusiness'
        elif self.request.host.find('klokaffar') > -1:
            country = 'Sverige'
            indexname = 'klokaffar'
        elif self.request.host.find('montao') > -1:
            country = 'Brasil'
            indexname = 'montao'
        elif self.request.host.find('127') > -1:
            country = 'Sverige'
            indexname = 'klokaffar'
        number_returned = 0
        regionname = None
        cityname = None
        regionentity = None
        region = None
        cursor = self.request.get('cursor')
        uri = urlparse(self.request.uri)
        query = ''
        regionID = regionid = self.request.get('regionid', 0)
        logging.info('i regionsearch  '+str(regionID))
        cityID = cityid = self.request.get('cityid', 0)
        categoryID = categoryid = self.request.get('category', 0)
        if uri.query:
            tquery = parse_qs(uri.query)
            try:
                query = tquery['query'][0]
                cursor = tquery['cursor'][0]
            except KeyError, err:
                query = ''
                # logging.debug('KeyError')
        next_cursor_urlsafe = self.request.get('cursor')
        query = self.request.get('query')
        memcache_key = 'memcachedkey'

        try:
            offsetval = int(self.request.get('offset', 0))
        except ValueError:
            offsetval = 0
        logging.info('i regionsearch 2 '+ indexname)
        # Try get region ID and/or cityID and categoryID
        if regionid or query.find('regionID') > -1:
            query = query.strip()
            regionID = re.sub("^regionID=(\d+).*", r'\1', query)
            memcache_key = 'region%s' % str(regionID)
            region = Region.get_by_id(long(regionID))
            regionname = region.name
            logging.info('i regionsearch  '+regionname)

        if regionid:
            regionID = regionid
            region = Region.get_by_id(long(regionID))
            regionname = region.name
            logging.info('i regionsearch  '+regionname)

        if cityid or query.find('cityID') > -1:
            cityID = re.sub("^.*cityID=(\d+).*", r'\1', query)
            if cityid: cityID = cityid
            city = montaomodel.City.get_by_id(long(cityID))
            cityID = city.key().id()
            cityentity = city
            cityname = city.name
            region = Region.get_by_id(long(city.region.key().id()))
            regionID = region.key().id()

        if categoryid or query.find('category') > -1:
            categoryID = re.sub("^.*category=(\d+).*", r'\1', query)
            if categoryid: categoryID = categoryid
            logging.info('category')
            results = find_documents3(self.request.host, 'category='+categoryid, 50, search.Cursor(web_safe_string=next_cursor_urlsafe),
                                      indexname)


        logging.info('i4 regionsearch  next'+str(regionID)+self.request.get('next'))
        if not self.request.get('o') and not self.request.get('next') and not self.request.get(
                'firstpageview') == 'f1' and not self.request.get('prevprev') and not self.request.get(
                'previouspageview') == 'previous' and cursor:
            #results = find_documents3(self.request.host, query, 50, search.Cursor(web_safe_string=next_cursor_urlsafe), indexname)
            memcache_key = memcache_key + str(query)
            data = memcache.get(memcache_key)
            if data is not None:
                memcache.add(memcache_key + 'Fs', data, 36000)
                memcache.add(memcache_key, results, 36000)
                firstpageresults = data
                results = data
            else:
                results = find_documents3(self.request.host, query, 50, search.Cursor(), indexname)
                memcache.add(memcache_key + 'Fs', results, 36000)
                memcache.add(memcache_key, results, 36000)
                firstpageresults = results

        elif self.request.get('firstpageview') == 'f1':
            memcache_key = memcache_key + str(query)
            data = memcache.get(memcache_key + 'F')
            if data is not None:
                firstpageresults = data
                results = data
            else:
                results = find_documents3(self.request.host, query, 50, search.Cursor(), indexname)
                #memcache_key = memcache_key + str(query)
                memcache.add(memcache_key, results, 36000)
                memcache.add(memcache_key + 'F', results, 36000)
                firstpageresults = results

        elif self.request.get('next'):
            logging.info('next search')
            results = find_documents3(self.request.host, query, 50, search.Cursor(web_safe_string=next_cursor_urlsafe), indexname)
            memcache_key = memcache_key + str(query)
            memcache.add(memcache_key + 'prev', memcache.get(memcache_key), 36000)
            memcache.add(memcache_key, results, 36000)

        elif not categoryID and not self.request.get('previouspageview') and not self.request.get('o'):
            logging.info('common case')
            results = find_documents3(self.request.host, query, 50, search.Cursor(), indexname)
            firstpageresults = results
        elif self.request.get('o'):
            size = int(self.request.get('o')) - 1
            results = find_documents_offset(query, 50, size * 50)
        lastpageview = self.request.get('lastpageview')
        previous = self.request.get('previouspageview')
        previouspageview = False
        if 'previous' == previous:
            previouspageview = True

        sortoptions = None
        paginationLast = False
        if 'last' == lastpageview:
            paginationLast = True
        logging.info('i  6 regionsearch  '+str(regionID))
        if paginationLast:
            date_asc = search.SortExpression(expression='date',
                                             direction=search.SortExpression.ASCENDING,
                                             default_value=datetime(1999, 01, 01))

            hr_asc = search.SortExpression(expression='hour',
                                           direction=search.SortExpression.ASCENDING,
                                           default_value=1)

            min_asc = search.SortExpression(expression='minute',
                                            direction=search.SortExpression.ASCENDING,
                                            default_value=1)

            # Sort up to  matching results by subject in descending order
            sortoptions = search.SortOptions(expressions=[date_asc, hr_asc,
                                                          min_asc], limit=ACCURACY)
            data = memcache.get(memcache_key + 'ASCENDING')
            if data is not None:
                results = data
            else:
                results = find_documents3(self.request.host, query, 50, search.Cursor(), sortoptions, indexname)
                memcache.add(memcache_key + 'ASCENDING', data, 5 * 36000)
            lastpageresultset = results

        if previouspageview:  # and cursor: results = find_previous_page(query, 50, search.Cursor(web_safe_string=next_cursor_urlsafe)):
            # search for documents with timestamps that are newest that the query
            memcache_key = memcache_key + str(query) + 'prev'
            data = memcache.get(memcache_key + 'prev')
            if data is not None:
                results = data
            previouspageresultset = results

        if results and results.cursor: next_cursor = results.cursor.web_safe_string
        namedquery = query
        query = query.replace(' and company_ad=0', ''
        ).replace(' and company_ad=1', ''
        ).replace(' and category:(6010 OR 6020 OR 6030 OR 6040 OR 6090)'
                  , ''
        ).replace(' and category:(6010 OR 6020 OR 6030 OR 6040 OR 6090)'
                  , ''
        ).replace(' and category:(1020 OR 1010 OR 1030 OR 1050 OR 1080 OR 1100 OR 1090)'
                  , ''
        ).replace(' and category:(2010 OR 2030 OR 2040 OR 2080 OR 2070)'
                  , ''
        ).replace(' and category:(3040 OR 3050 OR 3030 OR 3060)'
                  , ''
        ).replace(' and category:(4010 OR 4020 OR 4040 OR 4030 OR 4090 OR 4060 OR 4070)'
                  , '')

        query = re.sub("regionID=\d+", '', query)
        query = query.replace('category and', '')
        query = query.replace('type=s', '')
        query = query.replace('type=w', '')
        query = query.replace('type=r', '')
        query = query.replace('type=b', '')
        query = query.replace('cityID and', '')
        query = query.replace('and ', '')
        query = query.replace(' and', '')
        query = query.replace('regionID', '')
        query = query.replace('=', '%3D')

        namedquery = namedquery.replace('=', '%3D')

        query = re.sub("cityID%3D\d+", '', query)
        query = re.sub("category%3D\d+", '', query)
        query = query.replace('  ', ' ')

        # to do: make into a dictionary for O(1) access

        if int(regionid) > 0: regionname = region_id_to_name[regionid]
        form = SearchForm()

        form.w.choices = [
            ('4703187', u'Andaman & Nicobar Islands'),
            ('4694186', u'Andhra Pradesh'),
            ('4699188', u'Arunachal Pradesh'),
            ('4692186', u'Assam'),
            ('4702186', u'Bihar'),
            ('4698185', u'Chandigarh'),
            ('4676188', u'Chhattisgarh'),
            ('4691190', u'Dadra & Nagar Haveli'),
            ('4704183', u'Daman & Diu'),
            ('4699183', u'Delhi'),
            ('4702187', u'Goa'),
            ('4691189', u'Gujarat'),
            ('4700186', u'Haryana'),
            ('4703185', u'Himachal Pradesh'),
            ('4694187', u'Jammu & Kashmir'),
            ('4699189', u'Jharkhand'),
            ('4701185', u'Karnataka'),
            ('4695189', u'Kerala'),
            ('4700189', u'Lakshadweep'),
            ('4697186', u'Madhya Pradesh'),
            ('4694184', u'Maharashtra'),
            ('4700187', u'Manipur'),
            ('4703186', u'Meghalaya'),
            ('4698184', u'Mizoram'),
            ('4692187', u'Nagaland'),
            ('4696185', u'Orissa'),
            ('4676189', u'Pondicherry'),
            ('4693185', u'Punjab'),
            ('4701186', u'Rajasthan'),
            ('4701187', u'Sikkim'),
            ('4701188', u'Tamil Nadu'),
            ('4697187', u'Tripura'),
            ('4699190', u'Uttaranchal'),
            ('4692188', u'Uttar Pradesh'),
            ('4700188', u'West Bengal'),
        ]

        if region or cityentity:

            # to do:use memcache

            form.area.choices = []  # to do: use memcache for the list
            for cityitem in City.all().filter('region =',
                                              region.key()).order('-vieworder').order('name'
            ).fetch(99999):
                form.area.choices.append([str(cityitem.key().id()),
                                          cityitem.name])
            if cityentity:
                form.area.data = str(cityentity.key().id())

            if self.request.host.find('hipheap') > -1:
                if region and (str(region.key().id()), region.name) \
                        in form.w_us.choices:
                    form.w_us.choices.remove((str(region.key().id()),
                                              region.name))
            else:
                if region and (str(region.key().id()), region.name) \
                        in form.w.choices:
                    form.w.choices.remove((str(region.key().id()),
                                           region.name))
        if region: regionname = region.name

        pagenumber = 1
        if self.request.get('o') != '':
            pagenumber = int(self.request.get('o'))

        if 'last' == lastpageview:
            results = lastpageresultset

        elif self.request.get('firstpageview') == 'f1':
            results = firstpageresults

        if results:
            trunc = int(results.number_found) % 50
            if 'last' == lastpageview: del results.results[trunc:]
            number_returned = len(results.results)

        if 'last' == lastpageview:
            results.results.reverse()
        logging.info('i  7 regionsearch  '+str(regionID))
        numberofpages = math.ceil(results.number_found / 50.0)
        jobs_count = get_jobs_count(region=regionID, city=cityID)
        estate_count = get_estate_count(region=regionID, city=cityID)
        electronics_count = get_electronics_count(region=regionID, city=cityID)
        home_count = get_home_count(region=regionID, city=cityID)
        leisure_count = get_leisure_count(region=regionID, city=cityID)
        vehicles_count = get_vehicles_count(region=regionID, city=cityID)
        #logging.info('i regionsearch end '+regionname)
        template_values = {
            'results': results, 'regionname': regionname,
            'pagenumber': pagenumber,
            'numberofpages': numberofpages,
            'cursor': next_cursor,
            'country': country,
            'user': self.current_user,
            'number_returned': number_returned,
            'loggedin': self.logged_in,
            'VERSION': VERSION,
            'region': region,
            'jobs_count': jobs_count,
            'estate_count': estate_count,
            'electronics_count': electronics_count,
            'home_count': home_count,
            'leisure_count': leisure_count,
            'vehicles_count': vehicles_count,
            'cityentity': cityentity,
            'request': self.request,
            'categoryID': categoryID,
            'form': form,
            'query': query, 'number_found': results.number_found,
            'namedquery': namedquery,
            'cityname': cityname,
            'category': category,
        }
        if self.request.host.find('koolbusiness') > -1:
            self.render_template('view-ads.html', template_values)

        else:
            self.render_template('koolsearch.html', template_values)


Please DO NOT REPLY directly to this email but go to StackOverflow:
http://stackoverflow.com/questions/43153091/way-to-go-backwards-and-to-middle-of-result-set
Reply all
Reply to author
Forward
0 new messages