X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=36d23d2f37181a37e3ba79db9c9e65ec42d7d960;hb=0ee79a378ad9536cfb580187c9a30a0463ad5d8b;hp=a71d6bac01faa69a377f01d2fdd39c927766684e;hpb=46b18f2349670d395b9d84a57ee3d9b5d221ff4b;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index a71d6bac0..36d23d2f3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -730,6 +730,21 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, + # YouTube embed + { + 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/', + 'md5': '516718101ec834f74318df76259fb3cc', + 'info_dict': { + 'id': 'msN87y-iEx0', + 'ext': 'webm', + 'title': 'Feynman: Mirrors FUN TO IMAGINE 6', + 'upload_date': '20080526', + 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d', + 'uploader': 'Christopher Sykes', + 'uploader_id': 'ChristopherJSykes', + }, + 'add_ie': ['Youtube'], + }, # Camtasia studio { 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', @@ -902,12 +917,13 @@ class GenericIE(InfoExtractor): }, # LazyYT { - 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', + 'url': 'https://skiplagged.com/', 'info_dict': { - 'id': '1986', - 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', + 'id': 'skiplagged', + 'title': 'Skiplagged: The smart way to find cheap flights', }, - 'playlist_mincount': 2, + 'playlist_mincount': 1, + 'add_ie': ['Youtube'], }, # Cinchcast embed { @@ -990,6 +1006,20 @@ class GenericIE(InfoExtractor): 'thumbnail': r're:^https?://.*\.jpg$', }, }, + { + # JWPlayer config passed as variable + 'url': 'http://www.txxx.com/videos/3326530/ariele/', + 'info_dict': { + 'id': '3326530_hq', + 'ext': 'mp4', + 'title': 'ARIELE | Tube Cup', + 'uploader': 'www.txxx.com', + 'age_limit': 18, + }, + 'params': { + 'skip_download': True, + } + }, # rtl.nl embed { 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', @@ -1065,6 +1095,21 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, + { + # Kaltura iframe embed + 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/', + 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44', + 'info_dict': { + 'id': '0_f2cfbpwy', + 'ext': 'mp4', + 'title': 'I. M. Pei: A Centennial Celebration', + 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c', + 'upload_date': '20170403', + 'uploader_id': 'batchUser', + 'timestamp': 1491232186, + }, + 'add_ie': ['Kaltura'], + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -1542,6 +1587,17 @@ class GenericIE(InfoExtractor): 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm', 'only_matching': True, }, + { + # Senate ISVP iframe https + 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security', + 'md5': 'fb8c70b0b515e5037981a2492099aab8', + 'info_dict': { + 'id': 'govtaff020316', + 'ext': 'mp4', + 'title': 'Integrated Senate Video Player', + }, + 'add_ie': [SenateISVPIE.ie_key()], + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -1897,6 +1953,7 @@ class GenericIE(InfoExtractor): data-video-url=| ]+?src=| embedSWF\(?:\s*| + ]+data=| new\s+SWFObject\( ) (["\']) @@ -2538,18 +2595,14 @@ class GenericIE(InfoExtractor): self._sort_formats(entry['formats']) return self.playlist_result(entries) - jwplayer_data_str = self._find_jwplayer_data(webpage) - if jwplayer_data_str: - try: - jwplayer_data = self._parse_json( - jwplayer_data_str, video_id, transform_source=js_to_json) - info = self._parse_jwplayer_data( - jwplayer_data, video_id, require_title=False) - if not info.get('title'): - info['title'] = video_title - return info - except ExtractorError: - pass + jwplayer_data = self._find_jwplayer_data( + webpage, video_id, transform_source=js_to_json) + if jwplayer_data: + info = self._parse_jwplayer_data( + jwplayer_data, video_id, require_title=False, base_url=url) + if not info.get('title'): + info['title'] = video_title + return info def check_video(vurl): if YoutubeIE.suitable(vurl): @@ -2558,7 +2611,7 @@ class GenericIE(InfoExtractor): return True vpath = compat_urlparse.urlparse(vurl).path vext = determine_ext(vpath) - return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js') + return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml') def filter_video(urls): return list(filter(check_video, urls)) @@ -2624,11 +2677,14 @@ class GenericIE(InfoExtractor): found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1))) - self.report_following_redirect(new_url) - return { - '_type': 'url', - 'url': new_url, - } + if new_url != url: + self.report_following_redirect(new_url) + return { + '_type': 'url', + 'url': new_url, + } + else: + found = None if not found: # twitter:player is a https URL to iframe player that may or may not