X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fvoicerepublic.py;h=a52e40afa2892a10538251ba40e4d2a44a10a67d;hb=HEAD;hp=1a90693cb7757d617789f23b25244c1674fed8f0;hpb=c6ddbdb66c5d6ead5e198013c54ef53d641063f1;p=youtube-dl diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py index 1a90693cb..a52e40afa 100644 --- a/youtube_dl/extractor/voicerepublic.py +++ b/youtube_dl/extractor/voicerepublic.py @@ -1,55 +1,62 @@ -# coding: utf-8 from __future__ import unicode_literals from .common import InfoExtractor - -from ..compat import ( - compat_urllib_request, +from ..compat import compat_str +from ..utils import ( + ExtractorError, + determine_ext, + int_or_none, + urljoin, ) class VoiceRepublicIE(InfoExtractor): - _VALID_URL = r'https?://voicerepublic\.com/talks/(?P[0-9a-z-]+)' - _TEST = { - 'url': 'https://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', - 'md5': '0554a24d1657915aa8e8f84e15dc9353', + _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P[0-9a-z-]+)' + _TESTS = [{ + 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state', + 'md5': 'b9174d651323f17783000876347116e3', 'info_dict': { 'id': '2296', + 'display_id': 'watching-the-watchers-building-a-sousveillance-state', 'ext': 'm4a', 'title': 'Watching the Watchers: Building a Sousveillance State', - 'thumbnail': 'https://voicerepublic.com/system/flyer/2296.png', - 'description': 'md5:715ba964958afa2398df615809cfecb1', - 'creator': 'M. C. McGrath', + 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.', + 'duration': 1556, + 'view_count': int, } - } + }, { + 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) - req = compat_urllib_request.Request(url) - # Older versions of Firefox get redirected to an "upgrade browser" page - req.add_header('User-Agent', 'youtube-dl') - webpage = self._download_webpage(req, display_id) - thumbnail = self._og_search_thumbnail(webpage) - video_id = self._search_regex(r'/(\d+)\.png', thumbnail, 'id') - - if '
Queued for processing, please stand by...<' in webpage: + raise ExtractorError( + 'Audio is still queued for processing', expected=True) + + talk = self._parse_json(self._search_regex( + r'initialSnapshot\s*=\s*({.+?});', + webpage, 'talk'), display_id)['talk'] + title = talk['title'] + formats = [{ + 'url': urljoin(url, talk_url), + 'format_id': format_id, + 'ext': determine_ext(talk_url) or format_id, + 'vcodec': 'none', + } for format_id, talk_url in talk['media_links'].items()] + self._sort_formats(formats) return { - 'id': video_id, - 'title': self._og_search_title(webpage), + 'id': compat_str(talk.get('id') or display_id), + 'display_id': display_id, + 'title': title, + 'description': talk.get('teaser'), + 'thumbnail': talk.get('image_url'), + 'duration': int_or_none(talk.get('archived_duration')), + 'view_count': int_or_none(talk.get('play_count')), 'formats': formats, - 'url': self._og_search_url(webpage), - 'thumbnail': thumbnail, - 'description': self._og_search_description(webpage), - 'creator': self._search_regex(r'', webpage, 'author', fatal=False), }