Scrapy + Facebook/Linkedin API

809 views

Skip to first unread message

vitsin

unread,

Jan 27, 2012, 5:28:27 PM1/27/12

to scrapy-users

hi,
did anyone try to integrate latest Scrapy release with Facebook or
Linkedin APIs, for example OAuth v2 for login/authentication?

appreciate short example,
--vs

beat random

unread,

Jul 24, 2013, 2:49:32 PM7/24/13

to scrapy...@googlegroups.com

import sys
from scrapy.spider import BaseSpider
from scrapy.http import FormRequest
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from spider.items import Item
from scrapy.http import Request
#from spider.settings import JsonWriterPipeline

class MySpider (CrawlSpider):
    name = 'facebook'
    allowed_domains = ['facebook.com']
    start_urls = ['https://login.facebook.com/login.php']

    def parse(self, response):
        return [FormRequest.from_response(response,
                    formname='login_form',
                    formdata={'email':'xxx',
                              'pass':'xxx'},
                    callback=self.after_login)]
    def after_login(self, response):
        # check login succeed before going on
        if "authentication failed" in response.body:
            self.log("Login failed", level=log.ERROR)
            return
        else:
            return Request(url="put some URL here",
                   callback=self.parse_items)

    def parse_items(self,response):
        hxs = HtmlXPathSelector(response)
        titles = hxs.select("some path here")
        items = []
        for title in titles:
            item = Item()
            item['friendName']= titles.select("some path here").extract().pop().encode('UTF-8',errors='strict')
            item['numberOffriends']= titles.select("some path here").extract().pop().encode('UTF-8',errors='strict')
            items.append(item)
        return (items)

so, im storing scraped data in mysql database (...pop().encode('UTF-8',errors='strict')), for the moment i recieving crapdata, i have to work on xpaths and javascripts in html tags
hope this help a little bit

Reply all

Reply to author

Forward

0 new messages