X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fadobetv.py;h=5e43adc51f98c2f22e728c49150b84ae64f704e3;hb=55801fc76e2813de9a84eaa830d70ed73cb44463;hp=28e07f8b04ed89fe7c79f445f3454adfb04d0561;hpb=c24dfef63c55ef1a5424d11b485c3b76245448a4;p=youtube-dl diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py index 28e07f8b0..5e43adc51 100644 --- a/youtube_dl/extractor/adobetv.py +++ b/youtube_dl/extractor/adobetv.py @@ -5,6 +5,8 @@ from ..utils import ( parse_duration, unified_strdate, str_to_int, + float_or_none, + ISO639Utils, ) @@ -28,7 +30,6 @@ class AdobeTVIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) player = self._parse_json( @@ -44,8 +45,10 @@ class AdobeTVIE(InfoExtractor): self._html_search_meta('datepublished', webpage, 'upload date')) duration = parse_duration( - self._html_search_meta('duration', webpage, 'duration') - or self._search_regex(r'Runtime:\s*(\d{2}:\d{2}:\d{2})', webpage, 'duration')) + self._html_search_meta('duration', webpage, 'duration') or + self._search_regex( + r'Runtime:\s*(\d{2}:\d{2}:\d{2})', + webpage, 'duration', fatal=False)) view_count = str_to_int(self._search_regex( r'
\s*Views?:\s*([\d,.]+)\s*
', @@ -68,3 +71,61 @@ class AdobeTVIE(InfoExtractor): 'view_count': view_count, 'formats': formats, } + + +class AdobeTVVideoIE(InfoExtractor): + _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P\d+)' + + _TEST = { + # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners + 'url': 'https://video.tv.adobe.com/v/2456/', + 'md5': '43662b577c018ad707a63766462b1e87', + 'info_dict': { + 'id': '2456', + 'ext': 'mp4', + 'title': 'New experience with Acrobat DC', + 'description': 'New experience with Acrobat DC', + 'duration': 248.667, + }, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + player_params = self._parse_json(self._search_regex( + r'var\s+bridge\s*=\s*([^;]+);', webpage, 'player parameters'), + video_id) + + formats = [{ + 'url': source['src'], + 'width': source.get('width'), + 'height': source.get('height'), + 'tbr': source.get('bitrate'), + } for source in player_params['sources']] + + # For both metadata and downloaded files the duration varies among + # formats. I just pick the max one + duration = max(filter(None, [ + float_or_none(source.get('duration'), scale=1000) + for source in player_params['sources']])) + + subtitles = {} + for translation in player_params.get('translations', []): + lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium']) + if lang_id not in subtitles: + subtitles[lang_id] = [] + subtitles[lang_id].append({ + 'url': translation['vttPath'], + 'ext': 'vtt', + }) + + return { + 'id': video_id, + 'formats': formats, + 'title': player_params['title'], + 'description': self._og_search_description(webpage), + 'duration': duration, + 'subtitles': subtitles, + }