X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2Fextractor%2Fgeneric.py;h=9a7b0d25d790054e39729bab63e42b1ea7a89dff;hb=bf24c3d01798fad0a8344a642eb5d46231fd78c2;hp=7f2faa935f434724aed28ad1199d77f4923a457a;hpb=c798f15b989bc8c3578c5b0baf75f4fb4760ba81;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 7f2faa935..9a7b0d25d 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -32,9 +32,13 @@ from .brightcove import BrightcoveIE from .nbc import NBCSportsVPlayerIE from .ooyala import OoyalaIE from .rutv import RUTVIE +from .sportbox import SportBoxEmbedIE from .smotri import SmotriIE from .condenast import CondeNastIE from .udn import UDNEmbedIE +from .senateisvp import SenateISVPIE +from .bliptv import BlipTVIE +from .svt import SVTIE class GenericIE(InfoExtractor): @@ -221,6 +225,37 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # SportBox embed + { + 'url': 'http://www.vestifinance.ru/articles/25753', + 'info_dict': { + 'id': '25753', + 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"', + }, + 'playlist': [{ + 'info_dict': { + 'id': '370908', + 'title': 'Госзаказ. День 3', + 'ext': 'mp4', + } + }, { + 'info_dict': { + 'id': '370905', + 'title': 'Госзаказ. День 2', + 'ext': 'mp4', + } + }, { + 'info_dict': { + 'id': '370902', + 'title': 'Госзаказ. День 1', + 'ext': 'mp4', + } + }], + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, # Embedded TED video { 'url': 'http://en.support.wordpress.com/videos/ted-talks/', @@ -615,7 +650,7 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': '100183293', 'ext': 'mp4', - 'title': 'Тайны перевала Дятлова • Тайна перевала Дятлова 1 серия 2 часть', + 'title': 'Тайны перевала Дятлова • 1 серия 2 часть', 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века', 'thumbnail': 're:^https?://.*\.jpg$', 'duration': 694, @@ -643,6 +678,17 @@ class GenericIE(InfoExtractor): 'title': 'Facebook Creates "On This Day" | Crunch Report', }, }, + # SVT embed + { + 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun', + 'info_dict': { + 'id': '2900353', + 'ext': 'flv', + 'title': 'Här trycker Jagr till Giroux (under SVT-intervjun)', + 'duration': 27, + 'age_limit': 0, + }, + }, # RSS feed with enclosure { 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml', @@ -713,6 +759,20 @@ class GenericIE(InfoExtractor): # m3u8 downloads 'skip_download': True, } + }, + # Contains a SMIL manifest + { + 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html', + 'info_dict': { + 'id': 'file', + 'ext': 'flv', + 'title': '+ Football: Lottery Champions League Europe', + 'uploader': 'www.telewebion.com', + }, + 'params': { + # rtmpe downloads + 'skip_download': True, + } } ] @@ -1058,12 +1118,14 @@ class GenericIE(InfoExtractor): } # Look for embedded blip.tv player - mobj = re.search(r']*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) - if mobj: - return self.url_result('http://blip.tv/a/a-' + mobj.group(1), 'BlipTV') - mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9_]+)', webpage) - if mobj: - return self.url_result(mobj.group(1), 'BlipTV') + bliptv_url = BlipTVIE._extract_url(webpage) + if bliptv_url: + return self.url_result(bliptv_url, 'BlipTV') + + # Look for SVT player + svt_url = SVTIE._extract_url(webpage) + if svt_url: + return self.url_result(svt_url, 'SVT') # Look for embedded condenast player matches = re.findall( @@ -1199,6 +1261,11 @@ class GenericIE(InfoExtractor): if rutv_url: return self.url_result(rutv_url, 'RUTV') + # Look for embedded SportBox player + sportbox_urls = SportBoxEmbedIE._extract_urls(webpage) + if sportbox_urls: + return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed') + # Look for embedded TED player mobj = re.search( r']+?src=(["\'])(?Phttps?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage) @@ -1276,6 +1343,10 @@ class GenericIE(InfoExtractor): mobj = re.search( r']+?src=(["\'])(?Phttps?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1', webpage) + if not mobj: + mobj = re.search( + r'data-video-link=["\'](?Phttp://m.mlb.com/video/[^"\']+)', + webpage) if mobj is not None: return self.url_result(mobj.group('url'), 'MLB') @@ -1351,6 +1422,11 @@ class GenericIE(InfoExtractor): return self.url_result( compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed') + # Look for Senate ISVP iframe + senate_isvp_url = SenateISVPIE._search_iframe_url(webpage) + if senate_isvp_url: + return self.url_result(senate_isvp_url, 'SenateISVP') + def check_video(vurl): if YoutubeIE.suitable(vurl): return True @@ -1418,7 +1494,7 @@ class GenericIE(InfoExtractor): if refresh_header: found = re.search(REDIRECT_REGEX, refresh_header) if found: - new_url = found.group(1) + new_url = compat_urlparse.urljoin(url, found.group(1)) self.report_following_redirect(new_url) return { '_type': 'url', @@ -1440,13 +1516,22 @@ class GenericIE(InfoExtractor): # here's a fun little line of code for you: video_id = os.path.splitext(video_id)[0] - entries.append({ - 'id': video_id, - 'url': video_url, - 'uploader': video_uploader, - 'title': video_title, - 'age_limit': age_limit, - }) + if determine_ext(video_url) == 'smil': + entries.append({ + 'id': video_id, + 'formats': self._extract_smil_formats(video_url, video_id), + 'uploader': video_uploader, + 'title': video_title, + 'age_limit': age_limit, + }) + else: + entries.append({ + 'id': video_id, + 'url': video_url, + 'uploader': video_uploader, + 'title': video_title, + 'age_limit': age_limit, + }) if len(entries) == 1: return entries[0]