[screencastomatic] Add new extractor (Fixes #4497)
[youtube-dl] / youtube_dl / extractor / screencastomatic.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_urlparse
6 from ..utils import (
7     ExtractorError,
8     js_to_json,
9 )
10
11
12 class ScreencastOMaticIE(InfoExtractor):
13     _VALID_URL = r'https?://screencast-o-matic\.com/watch/(?P<id>[0-9a-zA-Z]+)'
14     _TEST = {
15         'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl',
16         'md5': '483583cb80d92588f15ccbedd90f0c18',
17         'info_dict': {
18             'id': 'c2lD3BeOPl',
19             'ext': 'mp4',
20             'title': 'Welcome to 3-4 Philosophy @ DECV!',
21             'thumbnail': 're:^https?://.*\.jpg$',
22             'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.',
23         }
24     }
25
26     def _real_extract(self, url):
27         video_id = self._match_id(url)
28         webpage = self._download_webpage(url, video_id)
29
30         setup_js = self._search_regex(
31             r"(?s)jwplayer\('mp4Player'\).setup\((\{.*?\})\);",
32             webpage, 'setup code')
33         data = self._parse_json(setup_js, video_id, transform_source=js_to_json)
34         modes = data['modes']
35         try:
36             video_data = next(
37                 m for m in data['modes'] if m.get('type') == 'html5')
38         except StopIteration:
39             raise ExtractorError('Could not find any video entries!')
40         video_url = compat_urlparse.urljoin(url, video_data['config']['file'])
41         thumbnail = data.get('image')
42
43         return {
44             'id': video_id,
45             'title': self._og_search_title(webpage),
46             'description': self._og_search_description(webpage),
47             'url': video_url,
48             'ext': 'mp4',
49             'thumbnail': thumbnail,
50         }