[adobetv] use api for extraction and add support specific language videos
[youtube-dl] / youtube_dl / extractor / adobetv.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7     parse_duration,
8     unified_strdate,
9     str_to_int,
10     int_or_none,
11     float_or_none,
12     ISO639Utils,
13 )
14
15
16 class AdobeTVIE(InfoExtractor):
17     _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
18
19     _TEST = {
20         'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
21         'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
22         'info_dict': {
23             'id': '10981',
24             'ext': 'mp4',
25             'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
26             'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
27             'thumbnail': 're:https?://.*\.jpg$',
28             'upload_date': '20110914',
29             'duration': 60,
30             'view_count': int,
31         },
32     }
33
34     def _real_extract(self, url):
35         language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
36         if not language:
37             language = 'en'
38
39         video_data = self._download_json(
40             'http://tv.adobe.com/api/v4/episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
41             urlname)['data'][0]
42
43         formats = [{
44             'url': source['url'],
45             'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
46             'width': int_or_none(source.get('width')),
47             'height': int_or_none(source.get('height')),
48             'tbr': int_or_none(source.get('video_data_rate')),
49         } for source in video_data['videos']]
50         self._sort_formats(formats)
51
52         return {
53             'id': str(video_data['id']),
54             'title': video_data['title'],
55             'description': video_data.get('description'),
56             'thumbnail': video_data.get('thumbnail'),
57             'upload_date': unified_strdate(video_data.get('start_date')),
58             'duration': parse_duration(video_data.get('duration')),
59             'view_count': str_to_int(video_data.get('playcount')),
60             'formats': formats,
61         }
62
63
64 class AdobeTVVideoIE(InfoExtractor):
65     _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
66
67     _TEST = {
68         # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
69         'url': 'https://video.tv.adobe.com/v/2456/',
70         'md5': '43662b577c018ad707a63766462b1e87',
71         'info_dict': {
72             'id': '2456',
73             'ext': 'mp4',
74             'title': 'New experience with Acrobat DC',
75             'description': 'New experience with Acrobat DC',
76             'duration': 248.667,
77         },
78     }
79
80     def _real_extract(self, url):
81         video_id = self._match_id(url)
82
83         webpage = self._download_webpage(url, video_id)
84
85         player_params = self._parse_json(self._search_regex(
86             r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'),
87             video_id)
88
89         formats = [{
90             'url': source['src'],
91             'width': source.get('width'),
92             'height': source.get('height'),
93             'tbr': source.get('bitrate'),
94         } for source in player_params['sources']]
95
96         # For both metadata and downloaded files the duration varies among
97         # formats. I just pick the max one
98         duration = max(filter(None, [
99             float_or_none(source.get('duration'), scale=1000)
100             for source in player_params['sources']]))
101
102         subtitles = {}
103         for translation in player_params.get('translations', []):
104             lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
105             if lang_id not in subtitles:
106                 subtitles[lang_id] = []
107             subtitles[lang_id].append({
108                 'url': translation['vttPath'],
109                 'ext': 'vtt',
110             })
111
112         return {
113             'id': video_id,
114             'formats': formats,
115             'title': player_params['title'],
116             'description': self._og_search_description(webpage),
117             'duration': duration,
118             'subtitles': subtitles,
119         }