--Darrell
import Image, re, zlib, sys
def stripImages(fn):
buf= open(fn,'rb').read()
fnS= fn.split(".")[0]
s =
re.findall("(?s)/XObject\s+/Subtype\s+/Image(.*?)stream\s*\012(.*?)endstream
", buf)
print len(s)
for i in s:
try:
name = re.findall("(?i)/name\s+/(\w+)",i[0])[0]
width= re.findall("(?i)/Width\s+(\d+)",i[0])[0]
height= re.findall("(?i)/Height\s+(\d+)",i[0])[0]
filter = re.findall("(?i)/filter\s+/(\w+)",i[0])[0]
colorSpace = re.findall("(?i)/ColorSpace\s+/(\w+)",i[0])[0]
except IndexError:
print "Skip:", i[0]
continue
print "Found:", name, width, height, filter, colorSpace
if filter=="FlateDecode":
im = zlib.decompress(i[1])
im = Image.fromstring("RGB", (int(width),int(height)), im)
im.save("%s_%s.jpg"%(fnS,name))
elif filter == "DCTDecode":
open("%s_%s.jpg"%(fnS,name),'wb').write(i[1])
stripImages(sys.argv[1])
>I was playing with pulling images out of PDF files tonight.
>Thought I'd share.
>
>--Darrell
>
That's neat! However, many PDFs define 'inline images'
buried in the page stream, which itself may
be compressed. So this won't work for all cases.
Andy Robinson
CEO/Chief Architet, ReportLab Inc.