with tempfile.TemporaryFile() as f: # (Real code retrieves archive via urllib2.urlopen().) zip = zipfile.ZipFile(f, mode='w') zip.writestr('unknowndir/src.txt', 'Hello, world!') zip.close();
# (Pretend we just downloaded the zip file.) f.seek(0)
# Result of urlopen() is not seekable, but ZipFile requires a # seekable file. Work around this by copying the file into a # memory stream. with io.BytesIO() as memio: shutil.copyfileobj(f, memio) zip = zipfile.ZipFile(file=memio) # Can't use zip.extract(), because I want to ignore paths # within archive. src = zip.open('unknowndir/src.txt') with open('dst.txt', mode='wb') as dst: shutil.copyfileobj(src, dst)
The last line throws an Error:
Traceback (most recent call last): File "test.py", line 25, in <module> shutil.copyfileobj(src, dst) File "C:\Python26\lib\shutil.py", line 27, in copyfileobj buf = fsrc.read(length) File "C:\Python26\lib\zipfile.py", line 594, in read bytes = self.fileobj.read(bytesToRead) TypeError: integer argument expected, got 'long'
> with io.BytesIO() as memio: > shutil.copyfileobj(f, memio) > zip = zipfile.ZipFile(file=memio) > # Can't use zip.extract(), because I want to ignore paths > # within archive. > src = zip.open('unknowndir/src.txt') > with open('dst.txt', mode='wb') as dst: > shutil.copyfileobj(src, dst)
> The last line throws an Error:
> Traceback (most recent call last): > File "test.py", line 25, in <module> > shutil.copyfileobj(src, dst) > File "C:\Python26\lib\shutil.py", line 27, in copyfileobj > buf = fsrc.read(length) > File "C:\Python26\lib\zipfile.py", line 594, in read > bytes = self.fileobj.read(bytesToRead) > TypeError: integer argument expected, got 'long'
Try adding a length parameter to the copyfileobj call, so the copy is done in small enough chunks.
> En Fri, 23 Oct 2009 14:15:33 -0300, Moore, Mathew L > <Moor...@battelle.org> > escribió:
> > with io.BytesIO() as memio: > > shutil.copyfileobj(f, memio) > > zip = zipfile.ZipFile(file=memio) > > # Can't use zip.extract(), because I want to ignore paths > > # within archive. > > src = zip.open('unknowndir/src.txt') > > with open('dst.txt', mode='wb') as dst: > > shutil.copyfileobj(src, dst)
> > The last line throws an Error:
> > Traceback (most recent call last): > > File "test.py", line 25, in <module> > > shutil.copyfileobj(src, dst) > > File "C:\Python26\lib\shutil.py", line 27, in copyfileobj > > buf = fsrc.read(length) > > File "C:\Python26\lib\zipfile.py", line 594, in read > > bytes = self.fileobj.read(bytesToRead) > > TypeError: integer argument expected, got 'long'
> Try adding a length parameter to the copyfileobj call, so the copy is > done in small enough chunks.
Hmmm...tried a variety of lengths (512, 1024, etc.) with no luck. Maybe this is a good opportunity for me to learn some Python debugging tools.
> with tempfile.TemporaryFile() as f: > # (Real code retrieves archive via urllib2.urlopen().) > zip = zipfile.ZipFile(f, mode='w') > zip.writestr('unknowndir/src.txt', 'Hello, world!') > zip.close();
> # (Pretend we just downloaded the zip file.) > f.seek(0)
> # Result of urlopen() is not seekable, but ZipFile requires a > # seekable file. Work around this by copying the file into a > # memory stream. > with io.BytesIO() as memio: > shutil.copyfileobj(f, memio) > zip = zipfile.ZipFile(file=memio) > # Can't use zip.extract(), because I want to ignore paths > # within archive. > src = zip.open('unknowndir/src.txt') > with open('dst.txt', mode='wb') as dst: > shutil.copyfileobj(src, dst)
> The last line throws an Error:
> Traceback (most recent call last): > File "test.py", line 25, in <module> > shutil.copyfileobj(src, dst) > File "C:\Python26\lib\shutil.py", line 27, in copyfileobj > buf = fsrc.read(length) > File "C:\Python26\lib\zipfile.py", line 594, in read > bytes = self.fileobj.read(bytesToRead) > TypeError: integer argument expected, got 'long'
It should hopefully work if you use cStringIO/StringIO instead of BytesIO.
I think the issue is essentially that StringIO.read() will accept a long object while the backport of bytesio to to 2.6 does an explicit check for int:
py> StringIO.StringIO("foo").read(long(1)) 'f'
py> io.BytesIO("foo").read(long(1)) Traceback (most recent call last): File "<stdin>", line 1, in <module> TypeError: integer argument expected, got 'long'
Should this be amended? Perhaps someone on core can consider it.
As for why the bytesToRead calculation in ZipExtFile.read() results in a long, I've not yet looked at it closely.
<snip> > > with io.BytesIO() as memio: > > shutil.copyfileobj(f, memio) > > zip = zipfile.ZipFile(file=memio) > > # Can't use zip.extract(), because I want to ignore paths > > # within archive. > > src = zip.open('unknowndir/src.txt') > > with open('dst.txt', mode='wb') as dst: > > shutil.copyfileobj(src, dst)
> > The last line throws an Error:
> > Traceback (most recent call last): > > File "test.py", line 25, in <module> > > shutil.copyfileobj(src, dst) > > File "C:\Python26\lib\shutil.py", line 27, in copyfileobj > > buf = fsrc.read(length) > > File "C:\Python26\lib\zipfile.py", line 594, in read > > bytes = self.fileobj.read(bytesToRead) > > TypeError: integer argument expected, got 'long'
> It should hopefully work if you use cStringIO/StringIO instead of > BytesIO.
It does! Excellent! You've saved me the trouble of a weekend debug session.