X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=303e112d2d3f056172e791c3000ed0c5601bdce0;hb=240b60453e1237473dfd8deff40c9dc54661668c;hp=b48ccfc97ba2ca1fae72704cf09d81c807166f08;hpb=5d39176f6de8bab1e019ead7cd497659f3fc1a94;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b48ccfc97..303e112d2 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -62,6 +62,7 @@ from .digiteka import DigitekaIE from .instagram import InstagramIE from .liveleak import LiveLeakIE from .threeqsdn import ThreeQSDNIE +from .theplatform import ThePlatformIE class GenericIE(InfoExtractor): @@ -717,15 +718,18 @@ class GenericIE(InfoExtractor): }, # Wistia embed { - 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', - 'md5': '8788b683c777a5cf25621eaf286d0c23', + 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson', + 'md5': '1953f3a698ab51cfc948ed3992a0b7ff', 'info_dict': { - 'id': '1cfaf6b7ea', + 'id': '6e2wtrbdaf', 'ext': 'mov', - 'title': 'md5:51364a8d3d009997ba99656004b5e20d', - 'duration': 643.0, - 'filesize': 182808282, - 'uploader': 'education-portal.com', + 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england', + 'description': 'a Paywall Videos video from Remilon', + 'duration': 644.072, + 'uploader': 'study.com', + 'timestamp': 1459678540, + 'upload_date': '20160403', + 'filesize': 24687186, }, }, { @@ -734,14 +738,30 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'uxjb0lwrcz', 'ext': 'mp4', - 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks', + 'title': 'Conversation about Hexagonal Rails Part 1', 'description': 'a Martin Fowler video from ThoughtWorks', 'duration': 1715.0, 'uploader': 'thoughtworks.wistia.com', - 'upload_date': '20140603', 'timestamp': 1401832161, + 'upload_date': '20140603', }, }, + # Wistia standard embed (async) + { + 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/', + 'info_dict': { + 'id': '807fafadvk', + 'ext': 'mp4', + 'title': 'Drip Brennan Dunn Workshop', + 'description': 'a JV Webinars video from getdrip-1', + 'duration': 4986.95, + 'timestamp': 1463607249, + 'upload_date': '20160518', + }, + 'params': { + 'skip_download': True, + } + }, # Soundcloud embed { 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/', @@ -764,6 +784,19 @@ class GenericIE(InfoExtractor): 'title': 'Rosetta #CometLanding webcast HL 10', } }, + # Another Livestream embed, without 'new.' in URL + { + 'url': 'https://www.freespeech.org/', + 'info_dict': { + 'id': '123537347', + 'ext': 'mp4', + 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', + }, + 'params': { + # Live stream + 'skip_download': True, + }, + }, # LazyYT { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', @@ -1174,6 +1207,16 @@ class GenericIE(InfoExtractor): 'uploader': 'Lake8737', } }, + # Duplicated embedded video URLs + { + 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443', + 'info_dict': { + 'id': '149298443_480_16c25b74_2', + 'ext': 'mp4', + 'title': 'vs. Blue Orange Spring Game', + 'uploader': 'www.hudl.com', + }, + }, ] def report_following_redirect(self, new_url): @@ -1428,7 +1471,8 @@ class GenericIE(InfoExtractor): # Site Name | Video Title # Video Title - Tagline | Site Name # and so on and so forth; it's just not practical - video_title = self._html_search_regex( + video_title = self._og_search_title( + webpage, default=None) or self._html_search_regex( r'(?s)(.*?)', webpage, 'video title', default='video') @@ -1446,6 +1490,9 @@ class GenericIE(InfoExtractor): video_uploader = self._search_regex( r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') + video_description = self._og_search_description(webpage, default=None) + video_thumbnail = self._og_search_thumbnail(webpage, default=None) + # Helper method def _playlist_from_matches(matches, getter=None, ie=None): urlrs = orderedSet( @@ -1476,6 +1523,11 @@ class GenericIE(InfoExtractor): if bc_urls: return _playlist_from_matches(bc_urls, ie='BrightcoveNew') + # Look for ThePlatform embeds + tp_urls = ThePlatformIE._extract_urls(webpage) + if tp_urls: + return _playlist_from_matches(tp_urls, ie='ThePlatform') + # Look for embedded rtl.nl player matches = re.findall( r']+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"', @@ -1544,21 +1596,26 @@ class GenericIE(InfoExtractor): 'url': embed_url, 'ie_key': 'Wistia', 'uploader': video_uploader, - 'title': video_title, - 'id': video_id, } match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P[^"\']+)', webpage) if match: return { '_type': 'url_transparent', - 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')), + 'url': 'wistia:%s' % match.group('id'), 'ie_key': 'Wistia', 'uploader': video_uploader, - 'title': video_title, - 'id': match.group('id') } + match = re.search( + r'''(?sx) + ]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? + ]+class=(["']).*?\bwistia_async_(?P[a-z0-9]+)\b.*?\2 + ''', webpage) + if match: + return self.url_result(self._proto_relative_url( + 'wistia:%s' % match.group('id')), 'Wistia') + # Look for SVT player svt_url = SVTIE._extract_url(webpage) if svt_url: @@ -1834,7 +1891,7 @@ class GenericIE(InfoExtractor): return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast') mobj = re.search( - r']+src="(?Phttps?://new\.livestream\.com/[^"]+/player[^"]+)"', + r']+src="(?Phttps?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"', webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'Livestream') @@ -1987,7 +2044,15 @@ class GenericIE(InfoExtractor): # Look for 3Q SDN embeds threeqsdn_url = ThreeQSDNIE._extract_url(webpage) if threeqsdn_url: - return self.url_result(self._proto_relative_url(threeqsdn_url), ThreeQSDNIE.ie_key()) + return { + '_type': 'url_transparent', + 'ie_key': ThreeQSDNIE.ie_key(), + 'url': self._proto_relative_url(threeqsdn_url), + 'title': video_title, + 'description': video_description, + 'thumbnail': video_thumbnail, + 'uploader': video_uploader, + } def check_video(vurl): if YoutubeIE.suitable(vurl): @@ -2069,7 +2134,7 @@ class GenericIE(InfoExtractor): raise UnsupportedError(url) entries = [] - for video_url in found: + for video_url in orderedSet(found): video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = compat_urlparse.urljoin(url, video_url)