Hi everyone, I'm a new scrapy user and I have try to extract the item
from my crawling for several days.
#######################################################
Spider.py
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.loader import XPathItemLoader
from trip.items import TripItem, Field
class TripSpider(BaseSpider):
name = "tripadvisor"
domain_name = "
tripadvisor.com"
start_urls = ['
http://www.tripadvisor.com/ShowUserReviews-g293916-
d311043-r59187869.html']
def parse(self, response):
hxs = HtmlXPathSelector(response)
selector = hxs.select('//div[contains(@class,"deckC")]')
items = []
## ItemLoader
l = XPathItemLoader(item=TripItem(), selector= selector)
l.add_xpath('place', '//div[@id and @class]/h2[@class="name
hotel"]/a/text()')
l.add_xpath('quote','//div[@class and @id]/div[1]
[@class="quote"]/text()')
## l.add_xpath('comment','//p[@id]/text()')
l.add_xpath('date','//div[@id and @class]/div[2][@class]/
div[@class="profile"]/div[6][@class="date "]/text()')
yield l.load_item()
SPIDER = TripSpider()
##########################################################
Item.py
class TripItem(Item):
# define the fields for your item here like:
# name = Field()
quote = Field()
date = Field()
place = Field()
##########################################################
pipeline.py
import csv
class CsvWriterPipeline(object):
def __init__(self):
self.csvwriter = csv.writer(open('items.csv', 'wb'))
def process_item(self, item,spider ):
self.csvwriter.writerow([item['quote'], item['date']])
return item
#########################################################
There are my output....
2010-10-17 16:12:43+0700 [tripadvisor] INFO: Passed TripItem(
date=[u'\nMar 22, 2010\n', u'\nMar 4, 2010\n', u'\nFeb 11, 2010\n',
u'\nFeb 4, 2010\n', u'\nFeb 2,
2010\n'],
quote=[u'Large, impressive Buddha, but little quietness...', u'wat pho
thailand nice toursit spot', u'Size queens will love it!', u'Favourite
temple in Thailand', u'Beautiful'],
place=[u'Temple of the Reclining Buddha (Wat Pho)', u'Temple of the
Reclining Buddha (Wat Pho)', u'Temple of the Reclining Buddha (Wat
Pho)', u'Temple of the Reclining Buddha (Wat Pho)', u'Temple of the
Reclining Buddha (Wat Pho)'])
but i wish to export to XML or CSV file and my expect output is
TripItem( ## First Item
date=[u'\nMar 22, 2010\n'],
quote=[u'Large, impressive Buddha, but little quietness...'],
place=[u'Temple of the Reclining Buddha (Wat Pho)'])
TripItem( ## Second Item
date=[u'\nMar 4, 2010\n'],
quote=[u'wat pho thailand nice toursit spot'],
place=[u'Temple of the Reclining Buddha (Wat Pho)'])
....
I don't know how to split them and they make me mortal for several
days. I'm a bachelor's student and this is one part of my senior
project. I have try many of Scrapy's tutorials but there're not help
me anymore.
Highly hope someone can help me
Thank you in advance