Merge remote-tracking branch 'upstream/master' into bliptv
[youtube-dl] / youtube_dl / extractor / gametrailers.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5     int_or_none,
6     parse_age_limit,
7     url_basename,
8 )
9
10
11 class GametrailersIE(InfoExtractor):
12     _VALID_URL = r'http://www\.gametrailers\.com/videos/view/[^/]+/(?P<id>.+)'
13
14     _TEST = {
15         'url': 'http://www.gametrailers.com/videos/view/gametrailers-com/116437-Just-Cause-3-Review',
16         'md5': 'f28c4efa0bdfaf9b760f6507955b6a6a',
17         'info_dict': {
18             'id': '2983958',
19             'ext': 'mp4',
20             'display_id': '116437-Just-Cause-3-Review',
21             'title': 'Just Cause 3 - Review',
22             'description': 'It\'s a lot of fun to shoot at things and then watch them explode in Just Cause 3, but should there be more to the experience than that?',
23         },
24     }
25
26     def _real_extract(self, url):
27         display_id = self._match_id(url)
28         webpage = self._download_webpage(url, display_id)
29         title = self._html_search_regex(
30             r'<title>(.+?)\|', webpage, 'title').strip()
31         embed_url = self._proto_relative_url(
32             self._search_regex(
33                 r'src=\'(//embed.gametrailers.com/embed/[^\']+)\'', webpage,
34                 'embed url'),
35             scheme='http:')
36         video_id = url_basename(embed_url)
37         embed_page = self._download_webpage(embed_url, video_id)
38         embed_vars_json = self._search_regex(
39             r'(?s)var embedVars = (\{.*?\})\s*</script>', embed_page,
40             'embed vars')
41         info = self._parse_json(embed_vars_json, video_id)
42
43         formats = []
44         for media in info['media']:
45             if media['mediaPurpose'] == 'play':
46                 formats.append({
47                     'url': media['uri'],
48                     'height': media['height'],
49                     'width:': media['width'],
50                 })
51         self._sort_formats(formats)
52
53         return {
54             'id': video_id,
55             'display_id': display_id,
56             'title': title,
57             'formats': formats,
58             'thumbnail': info.get('thumbUri'),
59             'description': self._og_search_description(webpage),
60             'duration': int_or_none(info.get('videoLengthInSeconds')),
61             'age_limit': parse_age_limit(info.get('audienceRating')),
62         }