_ Git - youtube-dl/blob - youtube_dl/extractor/khanacademy.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     unified_strdate,
   8 )
   9
  10
  11 class KhanAcademyIE(InfoExtractor):
  12     _VALID_URL = r'^https?://(?:(?:www|api)\.)?khanacademy\.org/(?P<key>[^/]+)/(?:[^/]+/){,2}(?P<id>[^?#/]+)(?:$|[?#])'
  13     IE_NAME = 'KhanAcademy'
  14
  15     _TESTS = [{
  16         'url': 'http://www.khanacademy.org/video/one-time-pad',
  17         'md5': '7b391cce85e758fb94f763ddc1bbb979',
  18         'info_dict': {
  19             'id': 'one-time-pad',
  20             'ext': 'webm',
  21             'title': 'The one-time pad',
  22             'description': 'The perfect cipher',
  23             'duration': 176,
  24             'uploader': 'Brit Cruise',
  25             'uploader_id': 'khanacademy',
  26             'upload_date': '20120411',
  27         },
  28         'add_ie': ['Youtube'],
  29     }, {
  30         'url': 'https://www.khanacademy.org/math/applied-math/cryptography',
  31         'info_dict': {
  32             'id': 'cryptography',
  33             'title': 'Journey into cryptography',
  34             'description': 'How have humans protected their secret messages through history? What has changed today?',
  35         },
  36         'playlist_mincount': 3,
  37     }]
  38
  39     def _real_extract(self, url):
  40         m = re.match(self._VALID_URL, url)
  41         video_id = m.group('id')
  42
  43         if m.group('key') == 'video':
  44             data = self._download_json(
  45                 'http://api.khanacademy.org/api/v1/videos/' + video_id,
  46                 video_id, 'Downloading video info')
  47
  48             upload_date = unified_strdate(data['date_added'])
  49             uploader = ', '.join(data['author_names'])
  50             return {
  51                 '_type': 'url_transparent',
  52                 'url': data['url'],
  53                 'id': video_id,
  54                 'title': data['title'],
  55                 'thumbnail': data['image_url'],
  56                 'duration': data['duration'],
  57                 'description': data['description'],
  58                 'uploader': uploader,
  59                 'upload_date': upload_date,
  60             }
  61         else:
  62             # topic
  63             data = self._download_json(
  64                 'http://api.khanacademy.org/api/v1/topic/' + video_id,
  65                 video_id, 'Downloading topic info')
  66
  67             entries = [
  68                 {
  69                     '_type': 'url',
  70                     'url': c['url'],
  71                     'id': c['id'],
  72                     'title': c['title'],
  73                 }
  74                 for c in data['children'] if c['kind'] in ('Video', 'Topic')]
  75
  76             return {
  77                 '_type': 'playlist',
  78                 'id': video_id,
  79                 'title': data['title'],
  80                 'description': data['description'],
  81                 'entries': entries,
  82             }