I am new at the group and to scrapy.
Last week I had a post saying that I wasn't being able to scrapy a https page... But by friday I was just able to pass the login page!
Well, now that I am able to login, I want to continue crawling but the code complains that the parse function I call doesn't exist.
Do I need to predefine the function before? or at least make a "mention" in some settings page?
As far as I could see I did the exactly same thing I was following in some discussion page but then I get this message.
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from scrapy.http import Request, FormRequest
from scrapy.item import Item, Field
# Data Definition
class BlaItem(Item):
title = Field()
name_type = Field()
name_value = Field()
# Spider Definition (Login Page Spider)
class LoginSpider(BaseSpider):
name="blalogin"
def parse(self, response):
filename = response.url.split("/")[-2]
open(filename, 'wb').write(response.body)
hxs = HtmlXPathSelector(response)
bla_title = hxs.select("//title/text()").extract()
bla_name = hxs.select("//h1/text()").extract()
bla_form = hxs.select("//form[@id = 'MainForm']/text()").extract()
print bla_title
print bla_name
print bla_form
print ''
return [FormRequest.from_response(response,formdata={'USERID': 'Username', 'Password': 'password'},callback=self.after_login)]
def after_login(self, response):
# check login succeed before going on
if "authentication failed" in response.body:
self.log("Login failed", level=log.ERROR)
return
else:
print 'Login worked!!! YEY'
def parse_impersonate(self, response):
message = 'yes, I am here'
print 'Impersonate here!'
return message
ERROR MESSAGE
[...]
--- <exception caught here> ---
File "/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/twisted/internet/defer.py", line 551, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "/Users/ANA/Documents/Scraper/blablablaProject/blablablaProject/spiders/blablablaScrapy.py", line 73, in after_login
exceptions.AttributeError: 'LoginSpider' object has no attribute 'parse_impersonate'