In [1]: import js2xml
In [2]: import urlparse
In [3]: import pprint
In [4]: for script in response.css('#player script').xpath('string()').extract():
jstree = js2xml.parse(script)
data = js2xml.jsonlike.getall(jstree)
for d in data:
pprint.pprint(d)
...:
{}
{'args': {'account_playback_token': 'QUFFLUhqa0sweExRZno5OHZEaGcwWVVQaXAxVWh0NUNFZ3xBQ3Jtc0tseE9DRUw3cFVRbkFGN1hub2VmQlNERGl3WjFIQV84aTI0b0lxZnhwdDZKRl96N1g5eWN3dkZER1pFbVM4dS1FeWJoc1FJeTBXdS0tbU5LY1NsWngtSHY1R0hoTl9xdy1iWUNoam1nRFM2czEweVdMNA==',
'adaptive_fmts': 'size=1280x720&clen=51269588&fps=15&itag=136&init=0-709...bitrate=80798',
'allow_embed': '1',
'allow_ratings': '1',
'atc': 'a=3&b=nhjwMM7ySu8wj8OhutnokFK8Dvs&c=1419949090&d=1&e=1EFnX1UkXVU&c3a=28&c1a=1&hh=hKbH2J9f2WwblpFs2hvo0H17oZo',
'author': 'Michael Herman',
'avg_rating': '4.948387146',
'c': 'WEB',
'cc3_module': '1',
'cc_asr': '1',
'cc_font': 'Arial Unicode MS, arial, verdana, _sans',
'cc_load_policy': '2',
'cl': '82697338',
'cr': 'FR',
'csi_page_type': 'watch,watch7',
'dash': '1',
'enablecsi': '1',
'enablejsapi': 1,
'eventid': 'IrSiVP-kC4v4cKrwgRg',
'fexp': '900718,927622,931342,932404,938809,9405699,9406022,940927,940940,941004,943917,947209,947218,948124,952302,952605,952901,955110,955301,957103,957105,957201',
'fmt_list': '22/1280x720/9/0/115,43/640x360/99/0/0,18/640x360/9/0/115,5/426x240/7/0/0,36/426x240/99/1/0,17/256x144/99/1/0',
'hl': 'en_US',
'host_language': 'en',
'idpj': '-6',
'iv3_module': '1',
'iv_load_policy': '1',
'keywords': 'Scrapy,Python,scraping,python scrapy,web scraping',
'ldpj': '-25',
'length_seconds': '717',
'no_get_video_log': '1',
'of': 'lNeUuIm8BRrYa4UFYW3Vbw',
'plid': 'AAULb6kfjbEHoNwt',
'pltype': 'contentugc',
'ptk': 'youtube_none',
'ssl': '1',
't': '1',
'timestamp': '1419949090',
'title': 'Scraping Web Pages with Scrapy',
'tmi': '1',
'token': '1',
'ucid': 'UCt7yOnL7bI7yCa1Xe_GTjJQ',
'video_id': '1EFnX1UkXVU',
'view_count': '52035',
'vq': 'auto',
'html': '/html5_player_template',
'attrs': {'id': 'movie_player'},
'html5': False,
'messages': {'player_fallback': ['Adobe Flash Player or an HTML5 supported browser is required for video playback.<br><a href="http://get.adobe.com/flashplayer/">Get the latest Flash Player </a><br><a href="/html5">Learn more about upgrading to an HTML5 browser</a>']}, 'min_version': '8.0.0',
'params': {'allowfullscreen': 'true',
'allowscriptaccess': 'always',
'bgcolor': '#000000'},
'sts': 16427,
[]
In [5]: for script in response.css('#player script').xpath('string()').extract():
...: jstree = js2xml.parse(script)
...: data = js2xml.jsonlike.getall(jstree)
...: for d in data:
...: try:
...: if d:
...: pprint.pprint(urlparse.parse_qsl(d.get("args", {}).get("url_encoded_fmt_stream_map", "")))
...: except:
...: pass
...:
('quality', 'hd720'),
('itag', '22'),
('type', 'video/mp4; codecs="avc1.64001F, mp4a.40.2"'),
('url',
('quality', 'medium'),
('itag', '43'),
('type', 'video/webm; codecs="vp8.0, vorbis"'),
('url',
('quality', 'medium'),
('itag', '18'),
('type', 'video/mp4; codecs="avc1.42001E, mp4a.40.2"'),
('url',
('quality', 'small'),
('itag', '5'),
('type', 'video/x-flv'),
('url',
('quality', 'small'),
('itag', '36'),
('type', 'video/3gpp; codecs="mp4v.20.3, mp4a.40.2"'),
('url',
('quality', 'small'),
('itag', '17'),
('type', 'video/3gpp; codecs="mp4v.20.3, mp4a.40.2"'),
('url',