X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=5dc53685cf36f0a9adf8e1896da7db80ab0ba385;hb=76c73715fb1e0eee61ace5ff7855d8237abdcd54;hp=b893d8149f2c63e081341d2dec2d71ba05f3e64c;hpb=b59c17e543206220c1809ab0fe6131280dd02b1f;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b893d8149..5dc53685c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -26,6 +26,7 @@ from ..utils import ( unsmuggle_url, UnsupportedError, url_basename, + xpath_text, ) from .brightcove import BrightcoveIE from .ooyala import OoyalaIE @@ -140,6 +141,19 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Ooyala'], }, + # multiple ooyala embeds on SBN network websites + { + 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok', + 'info_dict': { + 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok', + 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com', + }, + 'playlist_mincount': 3, + 'params': { + 'skip_download': True, + }, + 'add_ie': ['Ooyala'], + }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', @@ -362,7 +376,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml', 'title': 'Zero Punctuation', - 'description': 're:' + 'description': 're:.*groundbreaking video review series.*' }, 'playlist_mincount': 11, }, @@ -460,6 +474,7 @@ class GenericIE(InfoExtractor): { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', 'info_dict': { + 'id': '1986', 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', }, 'playlist_mincount': 2, @@ -489,6 +504,82 @@ class GenericIE(InfoExtractor): 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing', } }, + # Cinerama player + { + 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm', + 'info_dict': { + 'id': '730m_DandD_1901_512k', + 'ext': 'mp4', + 'uploader': 'www.abc.net.au', + 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015', + } + }, + # embedded viddler video + { + 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597', + 'info_dict': { + 'id': '4d03aad9', + 'ext': 'mp4', + 'uploader': 'deadspin', + 'title': 'WALL-TO-GORTAT', + 'timestamp': 1422285291, + 'upload_date': '20150126', + }, + 'add_ie': ['Viddler'], + }, + # jwplayer YouTube + { + 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/', + 'info_dict': { + 'id': 'Mrj4DVp2zeA', + 'ext': 'mp4', + 'upload_date': '20150212', + 'uploader': 'The National Archives UK', + 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', + 'uploader_id': 'NationalArchives08', + 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', + }, + }, + # rtl.nl embed + { + 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'aanslagen-kopenhagen', + 'title': 'Aanslagen Kopenhagen | RTL Nieuws', + } + }, + # Zapiks embed + { + 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', + 'info_dict': { + 'id': '118046', + 'ext': 'mp4', + 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', + } + }, + # Kaltura embed + { + 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15', + 'info_dict': { + 'id': '1_eergr3h1', + 'ext': 'mp4', + 'upload_date': '20150226', + 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com', + 'timestamp': int, + 'title': 'John Carlson Postgame 2/25/15', + }, + }, + # RSS feed with enclosure + { + 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'info_dict': { + 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + 'ext': 'm4v', + 'upload_date': '20150228', + 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + } + } ] def report_following_redirect(self, new_url): @@ -500,11 +591,24 @@ class GenericIE(InfoExtractor): playlist_desc_el = doc.find('./channel/description') playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text - entries = [{ - '_type': 'url', - 'url': e.find('link').text, - 'title': e.find('title').text, - } for e in doc.findall('./channel/item')] + entries = [] + for it in doc.findall('./channel/item'): + next_url = xpath_text(it, 'link', fatal=False) + if not next_url: + enclosure_nodes = it.findall('./enclosure') + for e in enclosure_nodes: + next_url = e.attrib.get('url') + if next_url: + break + + if not next_url: + continue + + entries.append({ + '_type': 'url', + 'url': next_url, + 'title': it.find('title').text, + }) return { '_type': 'playlist', @@ -733,6 +837,13 @@ class GenericIE(InfoExtractor): 'entries': entries, } + # Look for embedded rtl.nl player + matches = re.findall( + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) @@ -740,7 +851,6 @@ class GenericIE(InfoExtractor): player_url = unescapeHTML(mobj.group('url')) surl = smuggle_url(player_url, {'Referer': url}) return self.url_result(surl) - # Look for embedded (swf embed) Vimeo player mobj = re.search( r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) @@ -850,12 +960,28 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + # Look for embedded Viddler player + mobj = re.search( + r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Ooyala videos - mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or - re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage)) + mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P[^"&]+)', webpage) or + re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P.{32})[\'"]', webpage) or + re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage)) if mobj is not None: return OoyalaIE._build_url_result(mobj.group('ec')) + # Look for multiple Ooyala embeds on SBN network websites + mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) + if mobj is not None: + embeds = self._parse_json(mobj.group(1), video_id, fatal=False) + if embeds: + return _playlist_from_matches( + embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') + # Look for Aparat videos mobj = re.search(r'