My spider gathers second for-loop data, but does not save list(?) into the item. (it save just only one data into the item['Actress']) How do I let my spider save all gathered Actress data???
I don't know where the issues lies probably super easy to fix since I am new to scrapy. I hope to find a solution.
I am using utnutu 14.04, python 3.4
Thanks for any tips and let me know if I'm missing anything.
My Spider:
```
class EnMovieSpider(scrapy.Spider):
name = "en_movie"
allowed_domains = ["r18.com"]
start_urls = ["http://www.r18.com/videos/vod/movies/list/pagesize=30/price=all/sort=popular/type=all/page=1/", ]
def parse(self, response):
for sel in response.xpath('//*[@id="contents"]/div[2]/section/ul[2]/li/a/@href'):
url = response.urljoin(sel.extract())
yield scrapy.Request(url, callback=self.parse_item)
next_page = response.css("#contents > div.main > section > ul.cmn-box-tabMain01 > li.col04Wrap01 > div > div > ol > li.next > a::attr('href')")
if next_page:
url = response.urljoin(next_page[0].extract())
yield scrapy.Request(url, self.parse)
def parse_item(self, response):
for sel in response.xpath('//*[@id="contents"]/div[10]/section/section[1]/section[1]'):
item = EnMovie()
Content_ID = sel.xpath('normalize-space(div[2]/dl/dt[contains (.,"Content ID:")]/following-sibling::dd[1]/text())').extract()
item['Content_ID'] = Content_ID[0].encode('utf-8')
release_date = sel.xpath('normalize-space(div[2]/dl[1]/dt[contains (.,"Release Date:")]/following-sibling::dd[1]/text())').extract()
item['release_date'] = release_date[0].encode('utf-8')
running_time = sel.xpath('normalize-space(div[2]/dl[1]/dt[contains (.,"Runtime:")]/following-sibling::dd[1]/text())').extract()
item['running_time'] = running_time[0].encode('utf-8')
Series = sel.xpath('normalize-space(div[2]/dl[2]/dt[contains (.,"Series:")]/following-sibling::dd[1]/text())').extract()
item['Series'] = Series[0].encode('utf-8')
Studio = sel.xpath('normalize-space(div[2]/dl[2]/dt[contains (.,"Studio:")]/following-sibling::dd[1]/a/text())').extract()
item['Studio'] = Studio[0].encode('utf-8')
Director = sel.xpath('normalize-space(div[2]/dl[2]/dt[contains (.,"Director:")]/following-sibling::dd[1]/text())').extract()
item['Director'] = Director[0].encode('utf-8')
Label = sel.xpath('normalize-space(div[2]/dl[2]/dt[contains (.,"Label:")]/following-sibling::dd[1]/text())').extract()
item['Label'] = Label[0].encode('utf-8')
item['image_urls'] = sel.xpath('div[1]/img/@src').extract()
actresses = sel.xpath("//*[@itemprop='actors']//*[@itemprop='name']/text()").extract()
for actress in actresses:
item['Actress'] = actress.strip()
yield item
```
actresses = sel.xpath("//*[@itemprop='actors']//*[@itemprop='name']/text()").extract()
item['Actress'] = [actress.strip() for actress in actresses]
yield item