Added an IE for gamespot. Although gamespot allows downloading but it is only availab...
[youtube-dl] / youtube_dl / extractor / gamespot.py
1 import re
2 import xml.etree.ElementTree
3
4 from .common import InfoExtractor
5 from ..utils import (
6     unified_strdate,
7 )
8
9 class GameSpotIE(InfoExtractor):
10     _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/'
11     _TEST = {
12         u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
13         u"file": u"6410818.mp4",
14         u"md5": u"5569d64ca98db01f0177c934fe8c1e9b",
15         u"info_dict": {
16             u"title": u"Arma III - Community Guide: SITREP I",
17             u"upload_date": u"20130627", 
18         }
19     }
20
21
22     def _real_extract(self, url):
23         mobj = re.match(self._VALID_URL, url)
24         video_id = mobj.group(3).split("-")[-1]
25         info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id)
26         info_xml = self._download_webpage(info_url, video_id)
27         doc = xml.etree.ElementTree.fromstring(info_xml)
28         clip_el = doc.find('./playList/clip')
29
30         video_url = clip_el.find('./URI').text
31         title = clip_el.find('./title').text
32         ext = video_url.rpartition('.')[2]
33         thumbnail_url = clip_el.find('./screenGrabURI').text
34         view_count = int(clip_el.find('./views').text)
35         upload_date = unified_strdate(clip_el.find('./postDate').text)
36
37         return [{
38             'id'          : video_id,
39             'url'         : video_url,
40             'ext'         : ext,
41             'title'       : title,
42             'thumbnail'   : thumbnail_url,
43             'upload_date' : upload_date,
44             'view_count'  : view_count,
45         }]