Cronjob only reload the index page but not reload the detail page

15 views
Skip to first unread message

yyi...@gmail.com

unread,
Jan 15, 2018, 10:06:43 PM1/15/18
to pyspider-users
Hi, I have written some code and try to insert the data into mongo db. It is fine when the first run but seems the schedule can not trigger the detail pages code so new data have not inserted into my mongo db.

Can you let me know where is the problem? Thanks.


Below is my code. 
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2018-01-11 16:20:11
# Project: NetaPorter

from pyspider.libs.base_handler import *
import pymongo


class Handler(BaseHandler):
    crawl_config = {
    }
    
    headers = {  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                 'Host': 'www.net-a-porter.com',
                 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36'}
    
    client = pymongo.MongoClient('localhost')
    db = client['test']

    @every(seconds=60)
    def on_start(self):
        headers = {  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36'}

    @config(age=10)
    def index_page(self, response):
        for each in response.doc('.product-images a').items():
            self.crawl(each.attr.href, callback=self.detail_page)

    @config(priority=2)
    def detail_page(self, response):
          
        return {
            "url":response.url,
            "brand":response.doc ('[itemprop="brand"] span').text() ,
            "itemdesc":response.doc ('.product-name').text(),
            "Price": response.doc ('.product-data').attr('data-price-full')[:-2]
        }
    
    def on_result(self,result):
        if result:
            self.save_to_mongo(result)
            
    def save_to_mongo(self,result):
        if self.db['netaporter'].insert(result):
            print('saved to mongo',result)

History:

Reply all
Reply to author
Forward
0 new messages