[raywenderlich] Extract videos in order
[youtube-dl] / youtube_dl / extractor / raywenderlich.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from .vimeo import VimeoIE
7 from ..utils import (
8     extract_attributes,
9     ExtractorError,
10     orderedSet,
11     smuggle_url,
12     unsmuggle_url,
13     urljoin,
14 )
15
16
17 class RayWenderlichIE(InfoExtractor):
18     _VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
19
20     _TESTS = [{
21         'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
22         'info_dict': {
23             'id': '248377018',
24             'ext': 'mp4',
25             'title': 'Testing In iOS Episode 1: Introduction',
26             'duration': 133,
27             'uploader': 'Ray Wenderlich',
28             'uploader_id': 'user3304672',
29         },
30         'params': {
31             'noplaylist': True,
32             'skip_download': True,
33         },
34         'add_ie': [VimeoIE.ie_key()],
35         'expected_warnings': ['HTTP Error 403: Forbidden'],
36     }, {
37         'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
38         'info_dict': {
39             'title': 'Testing in iOS',
40             'id': '105-testing-in-ios',
41         },
42         'params': {
43             'noplaylist': False,
44         },
45         'playlist_count': 29,
46     }]
47
48     def _real_extract(self, url):
49         url, smuggled_data = unsmuggle_url(url, {})
50
51         mobj = re.match(self._VALID_URL, url)
52         course_id, lesson_id = mobj.group('course_id', 'id')
53         video_id = '%s/%s' % (course_id, lesson_id)
54
55         webpage = self._download_webpage(url, video_id)
56
57         no_playlist = self._downloader.params.get('noplaylist')
58         if no_playlist or smuggled_data.get('force_video', False):
59             if no_playlist:
60                 self.to_screen(
61                     'Downloading just video %s because of --no-playlist'
62                     % video_id)
63             if '>Subscribe to unlock' in webpage:
64                 raise ExtractorError(
65                     'This content is only available for subscribers',
66                     expected=True)
67             vimeo_id = self._search_regex(
68                 r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
69             return self.url_result(
70                 VimeoIE._smuggle_referrer(
71                     'https://player.vimeo.com/video/%s' % vimeo_id, url),
72                 ie=VimeoIE.ie_key(), video_id=vimeo_id)
73
74         self.to_screen(
75             'Downloading playlist %s - add --no-playlist to just download video'
76             % course_id)
77
78         lesson_ids = set((lesson_id, ))
79         for lesson in re.findall(
80                 r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
81             attrs = extract_attributes(lesson)
82             if not attrs:
83                 continue
84             lesson_url = attrs.get('href')
85             if not lesson_url:
86                 continue
87             lesson_id = self._search_regex(
88                 r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
89             if not lesson_id:
90                 continue
91             lesson_ids.add(lesson_id)
92
93         entries = []
94         for lesson_id in sorted(lesson_ids):
95             entries.append(self.url_result(
96                 smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
97                 ie=RayWenderlichIE.ie_key()))
98
99         title = self._search_regex(
100             r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
101             default=None)
102
103         return self.playlist_result(entries, course_id, title)