1 from __future__ import unicode_literals
3 from .common import InfoExtractor
15 class EscapistIE(InfoExtractor):
16 _VALID_URL = r'https?://?(www\.)?escapistmagazine\.com/videos/view/[^/?#]+/(?P<id>[0-9]+)-[^/?#]*(?:$|[?#])'
17 _USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko'
19 'url': 'http://www.escapistmagazine.com/videos/view/the-escapist-presents/6618-Breaking-Down-Baldurs-Gate',
20 'md5': 'ab3a706c681efca53f0a35f1415cf0d1',
24 'description': "Baldur's Gate: Original, Modded or Enhanced Edition? I'll break down what you can expect from the new Baldur's Gate: Enhanced Edition.",
25 'uploader_id': 'the-escapist-presents',
26 'uploader': 'The Escapist Presents',
27 'title': "Breaking Down Baldur's Gate",
28 'thumbnail': 're:^https?://.*\.jpg$',
33 def _real_extract(self, url):
34 video_id = self._match_id(url)
35 webpage_req = compat_urllib_request.Request(url)
36 webpage_req.add_header('User-Agent', self._USER_AGENT)
37 webpage = self._download_webpage(webpage_req, video_id)
39 uploader_id = self._html_search_regex(
40 r"<h1\s+class='headline'>\s*<a\s+href='/videos/view/(.*?)'",
41 webpage, 'uploader ID', fatal=False)
42 uploader = self._html_search_regex(
43 r"<h1\s+class='headline'>(.*?)</a>",
44 webpage, 'uploader', fatal=False)
45 description = self._html_search_meta('description', webpage)
46 duration = parse_duration(self._html_search_meta('duration', webpage))
48 raw_title = self._html_search_meta('title', webpage, fatal=True)
49 title = raw_title.partition(' : ')[2]
51 config_url = compat_urllib_parse.unquote(self._html_search_regex(
54 <param\s+name="flashvars".*?\s+value="config=|
55 flashvars="config=
59 webpage, 'config URL'))
64 def _add_format(name, cfg_url, quality):
65 cfg_req = compat_urllib_request.Request(cfg_url)
66 cfg_req.add_header('User-Agent', self._USER_AGENT)
67 config = self._download_json(
69 'Downloading ' + name + ' configuration',
70 'Unable to download ' + name + ' configuration',
71 transform_source=js_to_json)
73 playlist = config['playlist']
75 if p.get('eventCategory') == 'Video':
77 elif p.get('eventCategory') == 'Video Postroll':
87 'User-Agent': self._USER_AGENT,
91 _add_format('normal', config_url, quality=0)
92 hq_url = (config_url +
93 ('&hq=1' if '?' in config_url else config_url + '?hq=1'))
95 _add_format('hq', hq_url, quality=1)
96 except ExtractorError:
97 pass # That's fine, we'll just use normal quality
98 self._sort_formats(formats)
100 if '/escapist/sales-marketing/' in formats[-1]['url']:
101 raise ExtractorError('This IP address has been blocked by The Escapist', expected=True)
106 'uploader': uploader,
107 'uploader_id': uploader_id,
109 'thumbnail': self._og_search_thumbnail(webpage),
110 'description': description,
111 'duration': duration,
114 if self._downloader.params.get('include_ads') and ad_formats:
115 self._sort_formats(ad_formats)
117 'id': '%s-ad' % video_id,
118 'title': '%s (Postroll)' % title,
119 'formats': ad_formats,
123 'entries': [res, ad_res],