X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=3aff57e30302d3c33ce5e468f9df642cda0f6ff8;hb=cd5b4b0bc2876e16656d33156754ce3c05aa1619;hp=5c41ff517c114bc90407cb3890c7f3959c232209;hpb=87830900a95f95308dac565f9da12387edea65e5;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 5c41ff517..3aff57e30 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -17,6 +17,7 @@ from ..utils import ( ExtractorError, float_or_none, HEADRequest, + is_html, orderedSet, parse_xml, smuggle_url, @@ -139,6 +140,19 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Ooyala'], }, + # multiple ooyala embeds on SBN network websites + { + 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', + 'info_dict': { + 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok', + 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com', + }, + 'playlist_mincount': 3, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', @@ -361,7 +375,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', 'title': 'Zero Punctuation', - 'description': 're:' + 'description': 're:.*groundbreaking video review series.*' }, 'playlist_mincount': 11, }, @@ -459,6 +473,7 @@ class GenericIE(InfoExtractor): { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', 'info_dict': { + 'id': '1986', 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', }, 'playlist_mincount': 2, @@ -488,6 +503,60 @@ class GenericIE(InfoExtractor): 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', } }, + # Cinerama player + { + 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm', + 'info_dict': { + 'id': '730m_DandD_1901_512k', + 'ext': 'mp4', + 'uploader': 'www.abc.net.au', + 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015', + } + }, + # embedded viddler video + { + 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597', + 'info_dict': { + 'id': '4d03aad9', + 'ext': 'mp4', + 'uploader': 'deadspin', + 'title': 'WALL-TO-GORTAT', + 'timestamp': 1422285291, + 'upload_date': '20150126', + }, + 'add_ie': ['Viddler'], + }, + # jwplayer YouTube + { + 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/', + 'info_dict': { + 'id': 'Mrj4DVp2zeA', + 'ext': 'mp4', + 'upload_date': '20150212', + 'uploader': 'The National Archives UK', + 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', + 'uploader_id': 'NationalArchives08', + 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', + }, + }, + # rtl.nl embed + { + 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'aanslagen-kopenhagen', + 'title': 'Aanslagen Kopenhagen | RTL Nieuws', + } + }, + # Zapiks embed + { + 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', + 'info_dict': { + 'id': '118046', + 'ext': 'mp4', + 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', + } + }, ] def report_following_redirect(self, new_url): @@ -647,7 +716,7 @@ class GenericIE(InfoExtractor): # Maybe it's a direct link to a video? # Be careful not to download the whole thing! first_bytes = full_response.read(512) - if not re.match(r'^\s*<', first_bytes.decode('utf-8', 'replace')): + if not is_html(first_bytes): self._downloader.report_warning( 'URL could be a direct video link, returning it as such.') upload_date = unified_strdate( @@ -732,6 +801,13 @@ class GenericIE(InfoExtractor): 'entries': entries, } + # Look for embedded rtl.nl player + matches = re.findall( + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) @@ -739,7 +815,6 @@ class GenericIE(InfoExtractor): player_url = unescapeHTML(mobj.group('url')) surl = smuggle_url(player_url, {'Referer': url}) return self.url_result(surl) - # Look for embedded (swf embed) Vimeo player mobj = re.search( r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) @@ -849,12 +924,28 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + # Look for embedded Viddler player + mobj = re.search( + r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Ooyala videos - mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or - re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage)) + mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or + re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage) or + re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage)) if mobj is not None: return OoyalaIE._build_url_result(mobj.group('ec')) + # Look for multiple Ooyala embeds on SBN network websites + mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) + if mobj is not None: + embeds = self._parse_json(mobj.group(1), video_id, fatal=False) + if embeds: + return _playlist_from_matches( + embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') + # Look for Aparat videos mobj = re.search(r'