_ Git - youtube-dl/blob - youtube_dl/extractor/raywenderlich.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from .vimeo import VimeoIE
   7 from ..utils import (
   8     extract_attributes,
   9     ExtractorError,
  10     orderedSet,
  11     smuggle_url,
  12     unsmuggle_url,
  13     urljoin,
  14 )
  15
  16
  17 class RayWenderlichIE(InfoExtractor):
  18     _VALID_URL = r'https?://videos\.raywenderlich\.com/courses/(?P<course_id>[^/]+)/lessons/(?P<id>\d+)'
  19
  20     _TESTS = [{
  21         'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
  22         'info_dict': {
  23             'id': '248377018',
  24             'ext': 'mp4',
  25             'title': 'Testing In iOS Episode 1: Introduction',
  26             'duration': 133,
  27             'uploader': 'Ray Wenderlich',
  28             'uploader_id': 'user3304672',
  29         },
  30         'params': {
  31             'noplaylist': True,
  32             'skip_download': True,
  33         },
  34         'add_ie': [VimeoIE.ie_key()],
  35         'expected_warnings': ['HTTP Error 403: Forbidden'],
  36     }, {
  37         'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
  38         'info_dict': {
  39             'title': 'Testing in iOS',
  40             'id': '105-testing-in-ios',
  41         },
  42         'params': {
  43             'noplaylist': False,
  44         },
  45         'playlist_count': 29,
  46     }]
  47
  48     def _real_extract(self, url):
  49         url, smuggled_data = unsmuggle_url(url, {})
  50
  51         mobj = re.match(self._VALID_URL, url)
  52         course_id, lesson_id = mobj.group('course_id', 'id')
  53         video_id = '%s/%s' % (course_id, lesson_id)
  54
  55         webpage = self._download_webpage(url, video_id)
  56
  57         no_playlist = self._downloader.params.get('noplaylist')
  58         if no_playlist or smuggled_data.get('force_video', False):
  59             if no_playlist:
  60                 self.to_screen(
  61                     'Downloading just video %s because of --no-playlist'
  62                     % video_id)
  63             if '>Subscribe to unlock' in webpage:
  64                 raise ExtractorError(
  65                     'This content is only available for subscribers',
  66                     expected=True)
  67             vimeo_id = self._search_regex(
  68                 r'data-vimeo-id=["\'](\d+)', webpage, 'video id')
  69             return self.url_result(
  70                 VimeoIE._smuggle_referrer(
  71                     'https://player.vimeo.com/video/%s' % vimeo_id, url),
  72                 ie=VimeoIE.ie_key(), video_id=vimeo_id)
  73
  74         self.to_screen(
  75             'Downloading playlist %s - add --no-playlist to just download video'
  76             % course_id)
  77
  78         lesson_ids = set((lesson_id, ))
  79         for lesson in re.findall(
  80                 r'(<a[^>]+\bclass=["\']lesson-link[^>]+>)', webpage):
  81             attrs = extract_attributes(lesson)
  82             if not attrs:
  83                 continue
  84             lesson_url = attrs.get('href')
  85             if not lesson_url:
  86                 continue
  87             lesson_id = self._search_regex(
  88                 r'/lessons/(\d+)', lesson_url, 'lesson id', default=None)
  89             if not lesson_id:
  90                 continue
  91             lesson_ids.add(lesson_id)
  92
  93         entries = []
  94         for lesson_id in sorted(lesson_ids):
  95             entries.append(self.url_result(
  96                 smuggle_url(urljoin(url, lesson_id), {'force_video': True}),
  97                 ie=RayWenderlichIE.ie_key()))
  98
  99         title = self._search_regex(
 100             r'class=["\']course-title[^>]+>([^<]+)', webpage, 'course title',
 101             default=None)
 102
 103         return self.playlist_result(entries, course_id, title)