[youtube] Fix extraction.
[youtube-dl] / youtube_dl / extractor / egghead.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_str
6 from ..utils import (
7     determine_ext,
8     int_or_none,
9     try_get,
10     unified_timestamp,
11     url_or_none,
12 )
13
14
15 class EggheadCourseIE(InfoExtractor):
16     IE_DESC = 'egghead.io course'
17     IE_NAME = 'egghead:course'
18     _VALID_URL = r'https://egghead\.io/courses/(?P<id>[^/?#&]+)'
19     _TEST = {
20         'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript',
21         'playlist_count': 29,
22         'info_dict': {
23             'id': '72',
24             'title': 'Professor Frisby Introduces Composable Functional JavaScript',
25             'description': 're:(?s)^This course teaches the ubiquitous.*You\'ll start composing functionality before you know it.$',
26         },
27     }
28
29     def _real_extract(self, url):
30         playlist_id = self._match_id(url)
31
32         lessons = self._download_json(
33             'https://egghead.io/api/v1/series/%s/lessons' % playlist_id,
34             playlist_id, 'Downloading course lessons JSON')
35
36         entries = []
37         for lesson in lessons:
38             lesson_url = url_or_none(lesson.get('http_url'))
39             if not lesson_url:
40                 continue
41             lesson_id = lesson.get('id')
42             if lesson_id:
43                 lesson_id = compat_str(lesson_id)
44             entries.append(self.url_result(
45                 lesson_url, ie=EggheadLessonIE.ie_key(), video_id=lesson_id))
46
47         course = self._download_json(
48             'https://egghead.io/api/v1/series/%s' % playlist_id,
49             playlist_id, 'Downloading course JSON', fatal=False) or {}
50
51         playlist_id = course.get('id')
52         if playlist_id:
53             playlist_id = compat_str(playlist_id)
54
55         return self.playlist_result(
56             entries, playlist_id, course.get('title'),
57             course.get('description'))
58
59
60 class EggheadLessonIE(InfoExtractor):
61     IE_DESC = 'egghead.io lesson'
62     IE_NAME = 'egghead:lesson'
63     _VALID_URL = r'https://egghead\.io/(?:api/v1/)?lessons/(?P<id>[^/?#&]+)'
64     _TESTS = [{
65         'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box',
66         'info_dict': {
67             'id': '1196',
68             'display_id': 'javascript-linear-data-flow-with-container-style-types-box',
69             'ext': 'mp4',
70             'title': 'Create linear data flow with container style types (Box)',
71             'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e',
72             'thumbnail': r're:^https?:.*\.jpg$',
73             'timestamp': 1481296768,
74             'upload_date': '20161209',
75             'duration': 304,
76             'view_count': 0,
77             'tags': ['javascript', 'free'],
78         },
79         'params': {
80             'skip_download': True,
81             'format': 'bestvideo',
82         },
83     }, {
84         'url': 'https://egghead.io/api/v1/lessons/react-add-redux-to-a-react-application',
85         'only_matching': True,
86     }]
87
88     def _real_extract(self, url):
89         display_id = self._match_id(url)
90
91         lesson = self._download_json(
92             'https://egghead.io/api/v1/lessons/%s' % display_id, display_id)
93
94         lesson_id = compat_str(lesson['id'])
95         title = lesson['title']
96
97         formats = []
98         for _, format_url in lesson['media_urls'].items():
99             format_url = url_or_none(format_url)
100             if not format_url:
101                 continue
102             ext = determine_ext(format_url)
103             if ext == 'm3u8':
104                 formats.extend(self._extract_m3u8_formats(
105                     format_url, lesson_id, 'mp4', entry_protocol='m3u8',
106                     m3u8_id='hls', fatal=False))
107             elif ext == 'mpd':
108                 formats.extend(self._extract_mpd_formats(
109                     format_url, lesson_id, mpd_id='dash', fatal=False))
110             else:
111                 formats.append({
112                     'url': format_url,
113                 })
114         self._sort_formats(formats)
115
116         return {
117             'id': lesson_id,
118             'display_id': display_id,
119             'title': title,
120             'description': lesson.get('summary'),
121             'thumbnail': lesson.get('thumb_nail'),
122             'timestamp': unified_timestamp(lesson.get('published_at')),
123             'duration': int_or_none(lesson.get('duration')),
124             'view_count': int_or_none(lesson.get('plays_count')),
125             'tags': try_get(lesson, lambda x: x['tag_list'], list),
126             'series': try_get(
127                 lesson, lambda x: x['series']['title'], compat_str),
128             'formats': formats,
129         }