class ItemPipeline(object): def __init__(self): self.conn = MySQLdb.connect(******) self.cursor = self.conn.cursor() log.msg("Initialize mysql connection", level=log.INFO)
def __del__(self): self.conn.close() log.msg("Closing mysql connection", level=log.INFO)
def close_spider(spider): self.conn.close() log.msg("Closing mysql connection", level=log.INFO)
def process_item(self, item, spider): try: self.cursor.execute("""CHECK DUPLICATE ROW""") if self.cursor.fetchone() is None: try: self.cursor.execute("""INSERT NEW ROW""") self.conn.commit() except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1])
return item
else: raise DropItem("Duplicate found.") except MySQLdb.Error, e: print "Error %d: %s" % (e.args[0], e.args[1])from scrapy.xlib.pydispatch import dispatcherfrom scrapy import signals
class ItemPipeline(object): def __init__(self): dispatcher.connect(self.close_spider, signals.spider_closed) # NEW
#look like closing db connection when close spider signal broadcast is a good way to go
def close_spider(self, spider): self.conn.close() log.msg("Closing mysql connection", level=log.INFO)
ERROR: Error caught on signal handler: <bound method ?.close_spider of <pipelines.Pipeline object at 0x3a0afd0>> Traceback (most recent call last): File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 545, in _runCallbacks current.result = callback(current.result, *args, **kw) File "/usr/lib/pymodules/python2.7/scrapy/core/engine.py", line 272, in <lambda> spider=spider, reason=reason, spider_stats=self.crawler.stats.get_stats())) File "/usr/lib/pymodules/python2.7/scrapy/signalmanager.py", line 23, in send_catch_log_deferred return signal.send_catch_log_deferred(*a, **kw) File "/usr/lib/pymodules/python2.7/scrapy/utils/signal.py", line 53, in send_catch_log_deferred *arguments, **named) --- <exception caught here> --- File "/usr/lib/python2.7/dist-packages/twisted/internet/defer.py", line 134, in maybeDeferred result = f(*args, **kw) File "/usr/lib/pymodules/python2.7/scrapy/xlib/pydispatch/robustapply.py", line 54, in robustApply return receiver(*arguments, **named) File "/home/scrapy/pipelines.py", line 30, in close_spider self.conn.close() _mysql_exceptions.ProgrammingError: closing a closed connection