In [1]: type(response)
Out[1]: scrapy.http.response.Response
In [2]: response.headers
Out[2]:
{'Date': 'Tue, 11 Aug 2015 16:57:27 GMT',
'Server': 'Webs.com/1.0',
'Set-Cookie': 'fwww=b4e6b552bf12b31f11fd753117ad163ea80e738c7fe8587bfd2eebc489eb9921; Path=/',
'X-Robots-Tag': 'nofollow'}
def parse(self, response): # Scrapy doesn't return an HtmlResponse for some sites which makes loading items fail # This forces the response to be HtmlResponse type # As seen here http://git.io/v3zoP if response.status == 200 and not isinstance(response, HtmlResponse): try: flags = response.flags if "partial" in flags: flags.remove('partial') flags.append('fixed') response = HtmlResponse(response.url, headers=response.headers, body=response.body, flags=flags, request=response.request) log.msg('Response transformed into HtmlResponse for %s' % response.url, level=log.WARNING) except: pass
l = WaLoader(item=WaItem(), response=response)