X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=9883cde6193063d76f65cc2af316bb947481e413;hb=7ded6545edb18bb008e8277b42a21d60fb6cd653;hp=518b4f9de1c49cd7884f8c590422e8249e476d20;hpb=ef0e4e7bc03669fbef0cb7923a21637a71993dad;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 518b4f9de..9883cde61 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -51,7 +51,7 @@ from .tnaflix import TNAFlixNetworkEmbedIE from .vimeo import VimeoIE from .dailymotion import DailymotionCloudIE from .onionstudios import OnionStudiosIE -from .snagfilms import SnagFilmsEmbedIE +from .viewlift import ViewLiftEmbedIE from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE @@ -61,6 +61,7 @@ from .jwplatform import JWPlatformIE from .digiteka import DigitekaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE +from .threeqsdn import ThreeQSDNIE class GenericIE(InfoExtractor): @@ -237,6 +238,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': 'car-20120827-manifest', 'formats': 'mincount:9', + 'upload_date': '20130904', }, 'params': { 'format': 'bestvideo', @@ -596,7 +598,11 @@ class GenericIE(InfoExtractor): 'id': 'k2mm4bCdJ6CQ2i7c8o2', 'ext': 'mp4', 'title': 'Le Zap de Spi0n n°216 - Zapping du Web', + 'description': 'md5:faf028e48a461b8b7fad38f1e104b119', 'uploader': 'Spi0n', + 'uploader_id': 'xgditw', + 'upload_date': '20140425', + 'timestamp': 1398441542, }, 'add_ie': ['Dailymotion'], }, @@ -729,9 +735,28 @@ class GenericIE(InfoExtractor): 'id': 'uxjb0lwrcz', 'ext': 'mp4', 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', + 'description': 'a Martin Fowler video from ThoughtWorks', 'duration': 1715.0, 'uploader': 'thoughtworks.wistia.com', + 'upload_date': '20140603', + 'timestamp': 1401832161, + }, + }, + # Wistia standard embed (async) + { + 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', + 'info_dict': { + 'id': '807fafadvk', + 'ext': 'mp4', + 'title': 'Drip Brennan Dunn Workshop', + 'description': 'a JV Webinars video from getdrip-1', + 'duration': 4986.95, + 'upload_date': '20160518', + 'timestamp': 1463607249, }, + 'params': { + 'skip_download': True, + } }, # Soundcloud embed { @@ -879,6 +904,7 @@ class GenericIE(InfoExtractor): # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '227304', 'ext': 'mp4', @@ -893,6 +919,7 @@ class GenericIE(InfoExtractor): # ClipYou (Eagle.Platform) embed (custom URL) { 'url': 'http://muz-tv.ru/play/7129/', + # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used 'info_dict': { 'id': '12820', 'ext': 'mp4', @@ -981,6 +1008,9 @@ class GenericIE(InfoExtractor): 'ext': 'flv', 'title': "PFT Live: New leader in the 'new-look' defense", 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e', + 'uploader': 'NBCU-SPORTS', + 'upload_date': '20140107', + 'timestamp': 1389118457, }, }, # UDN embed @@ -1033,6 +1063,9 @@ class GenericIE(InfoExtractor): 'title': 'SN Presents: Russell Martin, World Citizen', 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.', 'uploader': 'Rogers Sportsnet', + 'uploader_id': '1704050871', + 'upload_date': '20150525', + 'timestamp': 1432570283, }, }, # Dailymotion Cloud video @@ -1124,6 +1157,9 @@ class GenericIE(InfoExtractor): 'title': 'The Cardinal Pell Interview', 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ', 'uploader': 'GlobeCast Australia - GlobeStream', + 'uploader_id': '2733773828001', + 'upload_date': '20160304', + 'timestamp': 1457083087, }, 'params': { # m3u8 downloads @@ -1408,7 +1444,8 @@ class GenericIE(InfoExtractor): # Site Name | Video Title # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical - video_title = self._html_search_regex( + video_title = self._og_search_title( + webpage, default=None) or self._html_search_regex( r'(?s)(.*?)', webpage, 'video title', default='video') @@ -1426,6 +1463,9 @@ class GenericIE(InfoExtractor): video_uploader = self._search_regex( r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') + video_description = self._og_search_description(webpage, default=None) + video_thumbnail = self._og_search_thumbnail(webpage, default=None) + # Helper method def _playlist_from_matches(matches, getter=None, ie=None): urlrs = orderedSet( @@ -1539,6 +1579,15 @@ class GenericIE(InfoExtractor): 'id': match.group('id') } + match = re.search( + r'''(?sx) + ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 + ''', webpage) + if match: + return self.url_result(self._proto_relative_url( + 'wistia:%s' % match.group('id')), 'Wistia') + # Look for SVT player svt_url = SVTIE._extract_url(webpage) if svt_url: @@ -1905,10 +1954,10 @@ class GenericIE(InfoExtractor): if onionstudios_url: return self.url_result(onionstudios_url) - # Look for SnagFilms embeds - snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage) - if snagfilms_url: - return self.url_result(snagfilms_url) + # Look for ViewLift embeds + viewlift_url = ViewLiftEmbedIE._extract_url(webpage) + if viewlift_url: + return self.url_result(viewlift_url) # Look for JWPlatform embeds jwplatform_url = JWPlatformIE._extract_url(webpage) @@ -1956,13 +2005,27 @@ class GenericIE(InfoExtractor): # Look for Instagram embeds instagram_embed_url = InstagramIE._extract_embed_url(webpage) if instagram_embed_url is not None: - return self.url_result(instagram_embed_url, InstagramIE.ie_key()) + return self.url_result( + self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key()) # Look for LiveLeak embeds liveleak_url = LiveLeakIE._extract_url(webpage) if liveleak_url: return self.url_result(liveleak_url, 'LiveLeak') + # Look for 3Q SDN embeds + threeqsdn_url = ThreeQSDNIE._extract_url(webpage) + if threeqsdn_url: + return { + '_type': 'url_transparent', + 'ie_key': ThreeQSDNIE.ie_key(), + 'url': self._proto_relative_url(threeqsdn_url), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } + def check_video(vurl): if YoutubeIE.suitable(vurl): return True @@ -2044,6 +2107,7 @@ class GenericIE(InfoExtractor): entries = [] for video_url in found: + video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = compat_urlparse.urljoin(url, video_url) video_id = compat_urllib_parse_unquote(os.path.basename(video_url))