I can't crawl some urls. For example '
http://mail.ru'.
scrapy shell
In [1]: fetch('
http://mail.ru')
2012-01-27 12:52:03+0600 [default] INFO: Spider opened
---------------------------------------------------------------------------
error Traceback (most recent call
last)
/usr/lib/pymodules/python2.7/scrapy/utils/console.pyc in <module>()
----> 1
2
3
4
5
/usr/lib/pymodules/python2.7/scrapy/shell.pyc in fetch(self,
request_or_url, spider)
86 response = None
87 response, spider =
threads.blockingCallFromThread(reactor, \
---> 88 self._schedule, request, spider)
89 self.populate_vars(response, request, spider)
90
/usr/lib/python2.7/dist-packages/twisted/internet/threads.pyc in
blockingCallFromThread(reactor, f, *a, **kw)
116 result = queue.get()
117 if isinstance(result, failure.Failure):
--> 118 result.raiseException()
119 return result
120
/usr/lib/python2.7/dist-packages/twisted/internet/defer.pyc in
_runCallbacks(self)
540 current._runningCallbacks = True
541 try:
--> 542 current.result =
callback(current.result, *args, **kw)
543 finally:
544 current._runningCallbacks = False
/usr/lib/pymodules/python2.7/scrapy/core/downloader/middleware.pyc in
process_response(response)
44
45 for method in self.methods['process_response']:
---> 46 response = method(request=request,
response=response, spider=spider)
47 assert isinstance(response, (Response,
Request)), \
48 'Middleware %s.process_response must
return Response or Request, got %s' % \
/usr/lib/pymodules/python2.7/scrapy/contrib/downloadermiddleware/
httpcompression.pyc in process_response(self, request, response,
spider)
18 if content_encoding:
19 encoding = content_encoding.pop()
---> 20 decoded_body = self._decode(response.body,
encoding.lower())
21 respcls =
responsetypes.from_args(headers=response.headers, \
22 url=response.url)
/usr/lib/pymodules/python2.7/scrapy/contrib/downloadermiddleware/
httpcompression.pyc in _decode(self, body, encoding)
34 def _decode(self, body, encoding):
35 if encoding == 'gzip' or encoding == 'x-gzip':
---> 36 body = gunzip(body)
37
38 if encoding == 'deflate':
/usr/lib/pymodules/python2.7/scrapy/utils/gz.pyc in gunzip(data)
12 while chunk:
13 try:
---> 14 chunk = f.read(8196)
15 output += chunk
16 except IOError:
/usr/lib/python2.7/gzip.pyc in read(self, size)
250 try:
251 while size > self.extrasize:
--> 252 self._read(readsize)
253 readsize = min(self.max_read_chunk,
readsize * 2)
254 except EOFError:
/usr/lib/python2.7/gzip.pyc in _read(self, size)
314 # Check the CRC and file size, and set the flag so
we read
315 # a new member on the next call
--> 316 self._read_eof()
317 self._new_member = True
318
/usr/lib/python2.7/gzip.pyc in _read_eof(self)
332 # stored is the true file size mod 2**32.
333 self.fileobj.seek(-8, 1)
--> 334 crc32 = read32(self.fileobj)
335 isize = read32(self.fileobj) # may exceed 2GB
336 if crc32 != self.crc:
/usr/lib/python2.7/gzip.pyc in read32(input)
23
24 def read32(input):
---> 25 return struct.unpack("<I", input.read(4))[0]
26
27 def open(filename, mode="rb", compresslevel=9):
error: unpack requires a string argument of length 4