Move Escapist into its own file
[youtube-dl] / youtube_dl / extractor / escapist.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5 from ..utils import (
6     compat_str,
7     compat_urllib_parse,
8
9     ExtractorError,
10 )
11
12
13 class EscapistIE(InfoExtractor):
14     _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
15
16     def _real_extract(self, url):
17         mobj = re.match(self._VALID_URL, url)
18         if mobj is None:
19             raise ExtractorError(u'Invalid URL: %s' % url)
20         showName = mobj.group('showname')
21         videoId = mobj.group('episode')
22
23         self.report_extraction(videoId)
24         webpage = self._download_webpage(url, videoId)
25
26         videoDesc = self._html_search_regex('<meta name="description" content="([^"]*)"',
27             webpage, u'description', fatal=False)
28
29         imgUrl = self._html_search_regex('<meta property="og:image" content="([^"]*)"',
30             webpage, u'thumbnail', fatal=False)
31
32         playerUrl = self._html_search_regex('<meta property="og:video" content="([^"]*)"',
33             webpage, u'player url')
34
35         title = self._html_search_regex('<meta name="title" content="([^"]*)"',
36             webpage, u'player url').split(' : ')[-1]
37
38         configUrl = self._search_regex('config=(.*)$', playerUrl, u'config url')
39         configUrl = compat_urllib_parse.unquote(configUrl)
40
41         configJSON = self._download_webpage(configUrl, videoId,
42                                             u'Downloading configuration',
43                                             u'unable to download configuration')
44
45         # Technically, it's JavaScript, not JSON
46         configJSON = configJSON.replace("'", '"')
47
48         try:
49             config = json.loads(configJSON)
50         except (ValueError,) as err:
51             raise ExtractorError(u'Invalid JSON in configuration file: ' + compat_str(err))
52
53         playlist = config['playlist']
54         videoUrl = playlist[1]['url']
55
56         info = {
57             'id': videoId,
58             'url': videoUrl,
59             'uploader': showName,
60             'upload_date': None,
61             'title': title,
62             'ext': 'mp4',
63             'thumbnail': imgUrl,
64             'description': videoDesc,
65             'player_url': playerUrl,
66         }
67
68         return [info]