item['price'] = hxs.select('/html').re('[0-9]€')--
You received this message because you are subscribed to the Google Groups "scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users...@googlegroups.com.
To post to this group, send email to scrapy...@googlegroups.com.
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/groups/opt_out.
--
You received this message because you are subscribed to a topic in the Google Groups "scrapy-users" group.
To unsubscribe from this topic, visit https://groups.google.com/d/topic/scrapy-users/Q5YJPx3vEiQ/unsubscribe.
To unsubscribe from this group and all its topics, send an email to scrapy-users...@googlegroups.com.
To post to this group, send email to scrapy...@googlegroups.com.
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/groups/opt_out.
def parse(self, response):
hxs = HtmlXPathSelector(response)
item = DmozItem()
item['price'] = hxs.select('//span/text()').re('([0-9]+(?:[,.][0-9])?)\s')
cur = self.db.cursor()
cur.execute("select url from urls")
for j in range(len(item['price'])):
cursor = self.db.cursor()
sql = "update urls set price_%s = '%s' where url = '%s'" % (j, item['price'][j], response.url)
cursor.execute(sql)
self.db.commit()
return item
#!/usr/bin/env python# -*- coding: utf-8 -*->>> text = u"<span>12,76 €</span>">>> [text][u'<span>12,76 \u20ac</span>']sel.xpath('//span/text()').re(u'(\d+,\d+) \u20ac')