X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=3fe0237b6ebb2c6821660f4cd671304be5768ee8;hb=c73e330e7adc9c0c15ac51aeea8fbb7dad95351a;hp=40201f3119b69b4d703881ff97523f99285ffbd6;hpb=972efe60c3fdaff83f9b8e7a637ee81f4c27bb64;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 40201f311..3fe0237b6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -20,6 +20,7 @@ from ..utils import ( float_or_none, HEADRequest, is_html, + js_to_json, orderedSet, sanitized_Request, smuggle_url, @@ -29,6 +30,7 @@ from ..utils import ( UnsupportedError, xpath_text, ) +from .commonprotocols import RtmpIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, @@ -80,6 +82,8 @@ from .piksel import PikselIE from .videa import VideaIE from .twentymin import TwentyMinutenIE from .ustream import UstreamIE +from .openload import OpenloadIE +from .videopress import VideoPressIE class GenericIE(InfoExtractor): @@ -945,6 +949,43 @@ class GenericIE(InfoExtractor): 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', }, }, + # jwplayer rtmp + { + 'url': 'http://www.suffolk.edu/sjc/', + 'info_dict': { + 'id': 'sjclive', + 'ext': 'flv', + 'title': 'Massachusetts Supreme Judicial Court Oral Arguments', + 'uploader': 'www.suffolk.edu', + }, + 'params': { + 'skip_download': True, + } + }, + # Complex jwplayer + { + 'url': 'http://www.indiedb.com/games/king-machine/videos', + 'info_dict': { + 'id': 'videos', + 'ext': 'mp4', + 'title': 'king machine trailer 1', + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, + { + # JWPlayer config passed as variable + 'url': 'http://www.txxx.com/videos/3326530/ariele/', + 'info_dict': { + 'id': '3326530_hq', + 'ext': 'mp4', + 'title': 'ARIELE | Tube Cup', + 'uploader': 'www.txxx.com', + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + } + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -975,19 +1016,6 @@ class GenericIE(InfoExtractor): 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', }, }, - # Kaltura embed protected with referrer - { - 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero', - 'info_dict': { - 'id': '1_g4fbemnq', - 'ext': 'mp4', - 'title': 'Violetta - Achter De Schermen - Ruggero', - 'description': 'Achter de schermen met Ruggero', - 'timestamp': 1435133761, - 'upload_date': '20150624', - 'uploader_id': 'echojecka', - }, - }, # Kaltura embed with single quotes { 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY', @@ -1472,7 +1500,27 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [TwentyMinutenIE.ie_key()], - } + }, + { + # VideoPress embed + 'url': 'https://en.support.wordpress.com/videopress/', + 'info_dict': { + 'id': 'OcobLTqC', + 'ext': 'm4v', + 'title': 'IMG_5786', + 'timestamp': 1435711927, + 'upload_date': '20150701', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [VideoPressIE.ie_key()], + }, + { + # ThePlatform embedded with whitespaces in URLs + 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', + 'only_matching': True, + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2319,8 +2367,9 @@ class GenericIE(InfoExtractor): 'Channel': 'channel', 'ChannelList': 'channel_list', } - return self.url_result('limelight:%s:%s' % ( - lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + return self.url_result(smuggle_url('limelight:%s:%s' % ( + lm[mobj.group(1)], mobj.group(2)), {'source_url': url}), + 'Limelight%s' % mobj.group(1), mobj.group(2)) mobj = re.search( r'''(?sx) @@ -2330,7 +2379,9 @@ class GenericIE(InfoExtractor): value=(["\'])(?:(?!\3).)*mediaId=(?P[a-z0-9]{32}) ''', webpage) if mobj: - return self.url_result('limelight:media:%s' % mobj.group('id')) + return self.url_result(smuggle_url( + 'limelight:media:%s' % mobj.group('id'), + {'source_url': url}), 'LimelightMedia', mobj.group('id')) # Look for AdobeTVVideo embeds mobj = re.search( @@ -2431,6 +2482,18 @@ class GenericIE(InfoExtractor): return _playlist_from_matches( twentymin_urls, ie=TwentyMinutenIE.ie_key()) + # Look for Openload embeds + openload_urls = OpenloadIE._extract_urls(webpage) + if openload_urls: + return _playlist_from_matches( + openload_urls, ie=OpenloadIE.ie_key()) + + # Look for VideoPress embeds + videopress_urls = VideoPressIE._extract_urls(webpage) + if videopress_urls: + return _playlist_from_matches( + videopress_urls, ie=VideoPressIE.ie_key()) + # Looking for http://schema.org/VideoObject json_ld = self._search_json_ld( webpage, video_id, default={}, expected_type='VideoObject') @@ -2455,9 +2518,16 @@ class GenericIE(InfoExtractor): self._sort_formats(entry['formats']) return self.playlist_result(entries) + jwplayer_data = self._find_jwplayer_data( + webpage, video_id, transform_source=js_to_json) + if jwplayer_data: + return self._parse_jwplayer_data(jwplayer_data, video_id) + def check_video(vurl): if YoutubeIE.suitable(vurl): return True + if RtmpIE.suitable(vurl): + return True vpath = compat_urlparse.urlparse(vurl).path vext = determine_ext(vpath) return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js') @@ -2565,6 +2635,15 @@ class GenericIE(InfoExtractor): 'age_limit': age_limit, } + if RtmpIE.suitable(video_url): + entry_info_dict.update({ + '_type': 'url_transparent', + 'ie_key': RtmpIE.ie_key(), + 'url': video_url, + }) + entries.append(entry_info_dict) + continue + ext = determine_ext(video_url) if ext == 'smil': entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)