class FetchDetails(CrawlSpider): name = "youtube" allowed_domains = ["www.youtube.com"] start_urls = ["https://www.youtube.com/results?search_query=scrapy&page=1"]
def parse(self, response): hxs = HtmlXPathSelector(response) base_url = "https://www.youtube.com" video_links = hxs.select("//h3[@class='yt-lockup-title']/a/@href").extract() # href="/watch?v=1EFnX1UkXVU" for video in video_links: if "watch" in video: request = Request(base_url+video, callback=self.download_video) yield request
def download_video(self, response): item = YoutubeItem()
selector = Selector(response) view_count = selector.xpath("//div[@class='watch-view-count']/text()")[0].extract().strip() # video_url = selector.xpath("//video[contains(@class,'html5-main-video')]/@src").extract()
# item['video_url'] = video_url item['view_count'] = view_count
subprocess.call(["youtube-dl", response.url]) print view_count
yield item--
You received this message because you are subscribed to the Google Groups "scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users...@googlegroups.com.
To post to this group, send email to scrapy...@googlegroups.com.
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.
--
You received this message because you are subscribed to a topic in the Google Groups "scrapy-users" group.
To unsubscribe from this topic, visit https://groups.google.com/d/topic/scrapy-users/3fqqBEmnNYE/unsubscribe.
To unsubscribe from this group and all its topics, send an email to scrapy-users...@googlegroups.com.