Hi,
I have written below script to run scrapy but it is not working.
# -*- coding: utf-8 -*-
import logging
from multiprocessing import Process
from twisted.internet import reactor
from scrapy.crawler import Crawler
from scrapy import log, signals
from bollywood.spiders.bollyspider import BollywoodSpider
from scrapy.utils.project import get_project_settings
from scrapy.log import ScrapyFileLogObserver
from scrapy.settings import Settings
from scrapy.xlib.pydispatch import dispatcher
class BollywoodCrawlerScript():
def __init__(self):
self.crawler = Crawler(get_project_settings())
self.crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
self.crawler.configure()
def _crawl(self, domain):
self.crawler.crawl(BollywoodSpider(source=domain))
log.start(logstdout=True)
log.msg('Running reactor...')
reactor.run()
log.msg('Reactor stopped.')
def stop_reactor(self):
reactor.stop()
def crawl(self, domain):
self._crawl(domain)
def start_crawl(domain='filmfare'):
crawler = BollywoodCrawlerScript()
crawler.crawl(domain)
if __name__ == '__main__':
start_crawl()
Output:
(crawler)MC-INMU-DIM042L:scrapy-bollywood rranjan$ python startcrawl.py
/Users/rranjan/Documents/Disney/scrapy-bollywood/bollywood/utils.py:4: ScrapyDeprecationWarning: Module `scrapy.conf` is deprecated, use `crawler.settings` attribute instead
from scrapy.conf import settings
2015-04-07 12:14:25+0530 [scrapy] INFO: Running reactor...
^C2015-04-07 12:14:37+0530 [scrapy] INFO: Reactor stopped.
I am not able to debug i.e. what's the problem. Crawler is not able to start the crawl process.