Can you let me know where is the problem? Thanks.
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Created on 2018-01-11 16:20:11
# Project: NetaPorter
from pyspider.libs.base_handler import *
import pymongo
class Handler(BaseHandler):
crawl_config = {
}
headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36'}
client = pymongo.MongoClient('localhost')
db = client['test']
@every(seconds=60)
def on_start(self):
headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36'}
@config(age=10)
def index_page(self, response):
for each in response.doc('.product-images a').items():
self.crawl(each.attr.href, callback=self.detail_page)
@config(priority=2)
def detail_page(self, response):
return {
"url":response.url,
"brand":response.doc ('[itemprop="brand"] span').text() ,
"itemdesc":response.doc ('.product-name').text(),
"Price": response.doc ('.product-data').attr('data-price-full')[:-2]
}
def on_result(self,result):
if result:
self.save_to_mongo(result)
def save_to_mongo(self,result):
if self.db['netaporter'].insert(result):
print('saved to mongo',result)
History: