X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2Fextractor%2Fgeneric.py;h=5dc53685cf36f0a9adf8e1896da7db80ab0ba385;hb=76c73715fb1e0eee61ace5ff7855d8237abdcd54;hp=f4500e931ba1a2c72fa6e4e87e120e317e236e56;hpb=e98b8e79ead03f33705356b32a08664ef2df2628;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index f4500e931..5dc53685c 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -26,6 +26,7 @@ from ..utils import ( unsmuggle_url, UnsupportedError, url_basename, + xpath_text, ) from .brightcove import BrightcoveIE from .ooyala import OoyalaIE @@ -473,6 +474,7 @@ class GenericIE(InfoExtractor): { 'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986', 'info_dict': { + 'id': '1986', 'title': 'Unity 8 desktop-mode windows on Mir! - Ubuntu Discourse', }, 'playlist_mincount': 2, @@ -531,12 +533,52 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': 'Mrj4DVp2zeA', 'ext': 'mp4', - 'upload_date': '20150204', + 'upload_date': '20150212', 'uploader': 'The National Archives UK', 'description': 'md5:a236581cd2449dd2df4f93412f3f01c6', 'uploader_id': 'NationalArchives08', 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue', }, + }, + # rtl.nl embed + { + 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'aanslagen-kopenhagen', + 'title': 'Aanslagen Kopenhagen | RTL Nieuws', + } + }, + # Zapiks embed + { + 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html', + 'info_dict': { + 'id': '118046', + 'ext': 'mp4', + 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !', + } + }, + # Kaltura embed + { + 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15', + 'info_dict': { + 'id': '1_eergr3h1', + 'ext': 'mp4', + 'upload_date': '20150226', + 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com', + 'timestamp': int, + 'title': 'John Carlson Postgame 2/25/15', + }, + }, + # RSS feed with enclosure + { + 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', + 'info_dict': { + 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + 'ext': 'm4v', + 'upload_date': '20150228', + 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624', + } } ] @@ -549,11 +591,24 @@ class GenericIE(InfoExtractor): playlist_desc_el = doc.find('./channel/description') playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text - entries = [{ - '_type': 'url', - 'url': e.find('link').text, - 'title': e.find('title').text, - } for e in doc.findall('./channel/item')] + entries = [] + for it in doc.findall('./channel/item'): + next_url = xpath_text(it, 'link', fatal=False) + if not next_url: + enclosure_nodes = it.findall('./enclosure') + for e in enclosure_nodes: + next_url = e.attrib.get('url') + if next_url: + break + + if not next_url: + continue + + entries.append({ + '_type': 'url', + 'url': next_url, + 'title': it.find('title').text, + }) return { '_type': 'playlist', @@ -782,6 +837,13 @@ class GenericIE(InfoExtractor): 'entries': entries, } + # Look for embedded rtl.nl player + matches = re.findall( + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) @@ -789,7 +851,6 @@ class GenericIE(InfoExtractor): player_url = unescapeHTML(mobj.group('url')) surl = smuggle_url(player_url, {'Referer': url}) return self.url_result(surl) - # Look for embedded (swf embed) Vimeo player mobj = re.search( r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) @@ -1082,6 +1143,18 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'Livestream') + # Look for Zapiks embed + mobj = re.search( + r']+src="(?Phttps?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Zapiks') + + # Look for Kaltura embeds + mobj = re.search( + r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P[^']+)',.*?'entry_id'\s*:\s*'(?P[^']+)',", webpage) + if mobj is not None: + return self.url_result('kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(), 'Kaltura') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True @@ -1177,7 +1250,9 @@ class GenericIE(InfoExtractor): return entries[0] else: for num, e in enumerate(entries, start=1): - e['title'] = '%s (%d)' % (e['title'], num) + # 'url' results don't have a title + if e.get('title') is not None: + e['title'] = '%s (%d)' % (e['title'], num) return { '_type': 'playlist', 'entries': entries,