_ Git - youtube-dl/blob - youtube_dl/extractor/adobetv.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     parse_duration,
   8     unified_strdate,
   9     str_to_int,
  10     int_or_none,
  11     float_or_none,
  12     ISO639Utils,
  13 )
  14
  15
  16 class AdobeTVIE(InfoExtractor):
  17     _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
  18
  19     _TEST = {
  20         'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
  21         'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
  22         'info_dict': {
  23             'id': '10981',
  24             'ext': 'mp4',
  25             'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
  26             'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
  27             'thumbnail': 're:https?://.*\.jpg$',
  28             'upload_date': '20110914',
  29             'duration': 60,
  30             'view_count': int,
  31         },
  32     }
  33
  34     def _real_extract(self, url):
  35         language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
  36         if not language:
  37             language = 'en'
  38
  39         video_data = self._download_json(
  40             'http://tv.adobe.com/api/v4/episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
  41             urlname)['data'][0]
  42
  43         formats = [{
  44             'url': source['url'],
  45             'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
  46             'width': int_or_none(source.get('width')),
  47             'height': int_or_none(source.get('height')),
  48             'tbr': int_or_none(source.get('video_data_rate')),
  49         } for source in video_data['videos']]
  50         self._sort_formats(formats)
  51
  52         return {
  53             'id': str(video_data['id']),
  54             'title': video_data['title'],
  55             'description': video_data.get('description'),
  56             'thumbnail': video_data.get('thumbnail'),
  57             'upload_date': unified_strdate(video_data.get('start_date')),
  58             'duration': parse_duration(video_data.get('duration')),
  59             'view_count': str_to_int(video_data.get('playcount')),
  60             'formats': formats,
  61         }
  62
  63
  64 class AdobeTVVideoIE(InfoExtractor):
  65     _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
  66
  67     _TEST = {
  68         # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
  69         'url': 'https://video.tv.adobe.com/v/2456/',
  70         'md5': '43662b577c018ad707a63766462b1e87',
  71         'info_dict': {
  72             'id': '2456',
  73             'ext': 'mp4',
  74             'title': 'New experience with Acrobat DC',
  75             'description': 'New experience with Acrobat DC',
  76             'duration': 248.667,
  77         },
  78     }
  79
  80     def _real_extract(self, url):
  81         video_id = self._match_id(url)
  82
  83         webpage = self._download_webpage(url, video_id)
  84
  85         player_params = self._parse_json(self._search_regex(
  86             r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
  87             video_id)
  88
  89         formats = [{
  90             'url': source['src'],
  91             'width': source.get('width'),
  92             'height': source.get('height'),
  93             'tbr': source.get('bitrate'),
  94         } for source in player_params['sources']]
  95
  96         # For both metadata and downloaded files the duration varies among
  97         # formats. I just pick the max one
  98         duration = max(filter(None, [
  99             float_or_none(source.get('duration'), scale=1000)
 100             for source in player_params['sources']]))
 101
 102         subtitles = {}
 103         for translation in player_params.get('translations', []):
 104             lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
 105             if lang_id not in subtitles:
 106                 subtitles[lang_id] = []
 107             subtitles[lang_id].append({
 108                 'url': translation['vttPath'],
 109                 'ext': 'vtt',
 110             })
 111
 112         return {
 113             'id': video_id,
 114             'formats': formats,
 115             'title': player_params['title'],
 116             'description': self._og_search_description(webpage),
 117             'duration': duration,
 118             'subtitles': subtitles,
 119         }