_ Git - youtube-dl/blob - youtube_dl/extractor/gamespot.py

   1 import re
   2 import xml.etree.ElementTree
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     unified_strdate,
   7 )
   8
   9 class GameSpotIE(InfoExtractor):
  10     _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/([^/]+)/videos/([^/]+)-([^/d]+)/'
  11     _TEST = {
  12         u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/",
  13         u"file": u"6410818.mp4",
  14         u"md5": u"5569d64ca98db01f0177c934fe8c1e9b",
  15         u"info_dict": {
  16             u"title": u"Arma III - Community Guide: SITREP I",
  17             u"upload_date": u"20130627",
  18         }
  19     }
  20
  21
  22     def _real_extract(self, url):
  23         mobj = re.match(self._VALID_URL, url)
  24         video_id = mobj.group(3).split("-")[-1]
  25         info_url = "http://www.gamespot.com/pages/video_player/xml.php?id="+str(video_id)
  26         info_xml = self._download_webpage(info_url, video_id)
  27         doc = xml.etree.ElementTree.fromstring(info_xml)
  28         clip_el = doc.find('./playList/clip')
  29
  30         video_url = clip_el.find('./URI').text
  31         title = clip_el.find('./title').text
  32         ext = video_url.rpartition('.')[2]
  33         thumbnail_url = clip_el.find('./screenGrabURI').text
  34         view_count = int(clip_el.find('./views').text)
  35         upload_date = unified_strdate(clip_el.find('./postDate').text)
  36
  37         return [{
  38             'id'          : video_id,
  39             'url'         : video_url,
  40             'ext'         : ext,
  41             'title'       : title,
  42             'thumbnail'   : thumbnail_url,
  43             'upload_date' : upload_date,
  44             'view_count'  : view_count,
  45         }]