[break] adapt to new paths
[youtube-dl] / youtube_dl / extractor / infoq.py
1 import base64
2 import re
3
4 from .common import InfoExtractor
5 from ..utils import (
6     compat_urllib_parse,
7
8     ExtractorError,
9 )
10
11
12 class InfoQIE(InfoExtractor):
13     _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
14
15     def _real_extract(self, url):
16         mobj = re.match(self._VALID_URL, url)
17
18         webpage = self._download_webpage(url, video_id=url)
19         self.report_extraction(url)
20
21         # Extract video URL
22         mobj = re.search(r"jsclassref ?= ?'([^']*)'", webpage)
23         if mobj is None:
24             raise ExtractorError(u'Unable to extract video url')
25         real_id = compat_urllib_parse.unquote(base64.b64decode(mobj.group(1).encode('ascii')).decode('utf-8'))
26         video_url = 'rtmpe://video.infoq.com/cfx/st/' + real_id
27
28         # Extract title
29         video_title = self._search_regex(r'contentTitle = "(.*?)";',
30             webpage, u'title')
31
32         # Extract description
33         video_description = self._html_search_regex(r'<meta name="description" content="(.*)"(?:\s*/)?>',
34             webpage, u'description', fatal=False)
35
36         video_filename = video_url.split('/')[-1]
37         video_id, extension = video_filename.split('.')
38
39         info = {
40             'id': video_id,
41             'url': video_url,
42             'uploader': None,
43             'upload_date': None,
44             'title': video_title,
45             'ext': extension, # Extension is always(?) mp4, but seems to be flv
46             'thumbnail': None,
47             'description': video_description,
48         }
49
50         return [info]