X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=45adbb7a3947c9f3f6a3522864ac5fe370cdc254;hb=d800609c62703e4e6edd2891a8432306462e4db3;hp=c2e8f9b62d846f2fcd5af61097a3e698cdbe53dc;hpb=45dad7ba1b28321299f973fc6a42bd7de64481d7;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c2e8f9b62..45adbb7a3 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -44,7 +44,6 @@ from .myvi import MyviIE from .condenast import CondeNastIE from .udn import UDNEmbedIE from .senateisvp import SenateISVPIE -from .bliptv import BlipTVIE from .svt import SVTIE from .pornhub import PornHubIE from .xhamster import XHamsterEmbedIE @@ -55,6 +54,10 @@ from .snagfilms import SnagFilmsEmbedIE from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE from .pladform import PladformIE +from .videomore import VideomoreIE +from .googledrive import GoogleDriveIE +from .jwplatform import JWPlatformIE +from .digiteka import DigitekaIE class GenericIE(InfoExtractor): @@ -221,6 +224,20 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, }, + # MPD from http://dash-mse-test.appspot.com/media.html + { + 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd', + 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53', + 'info_dict': { + 'id': 'car-20120827-manifest', + 'ext': 'mp4', + 'title': 'car-20120827-manifest', + 'formats': 'mincount:9', + }, + 'params': { + 'format': 'bestvideo', + }, + }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', @@ -484,7 +501,7 @@ class GenericIE(InfoExtractor): 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9', } }, - # Embeded Ustream video + # Embedded Ustream video { 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', 'md5': '27b99cdb639c9b12a79bca876a073417', @@ -1226,19 +1243,24 @@ class GenericIE(InfoExtractor): # Check for direct link to a video content_type = head_response.headers.get('Content-Type', '') - m = re.match(r'^(?Paudio|video|application(?=/ogg$))/(?P.+)$', content_type) + m = re.match(r'^(?Paudio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P.+)$', content_type) if m: upload_date = unified_strdate( head_response.headers.get('Last-Modified')) + formats = [] + if m.group('format_id').endswith('mpegurl'): + formats = self._extract_m3u8_formats(url, video_id, 'mp4') + else: + formats = [{ + 'format_id': m.group('format_id'), + 'url': url, + 'vcodec': 'none' if m.group('type') == 'audio' else None + }] return { 'id': video_id, 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), 'direct': True, - 'formats': [{ - 'format_id': m.group('format_id'), - 'url': url, - 'vcodec': 'none' if m.group('type') == 'audio' else None - }], + 'formats': formats, 'upload_date': upload_date, } @@ -1281,7 +1303,7 @@ class GenericIE(InfoExtractor): self.report_extraction(video_id) - # Is it an RSS feed, a SMIL file or a XSPF playlist? + # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest? try: doc = compat_etree_fromstring(webpage.encode('utf-8')) if doc.tag == 'rss': @@ -1290,6 +1312,13 @@ class GenericIE(InfoExtractor): return self._parse_smil(doc, url, video_id) elif doc.tag == '{http://xspf.org/ns/0/}playlist': return self.playlist_result(self._parse_xspf(doc, video_id), video_id) + elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): + return { + 'id': video_id, + 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]), + 'formats': self._parse_mpd_formats( + doc, video_id, mpd_base_url=url.rpartition('/')[0]), + } except compat_xml_parse_error: pass @@ -1399,7 +1428,7 @@ class GenericIE(InfoExtractor): # Look for embedded Dailymotion player matches = re.findall( - r']+?src=(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/embed/video/.+?)\1', webpage) + r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) if matches: return _playlist_from_matches( matches, lambda m: unescapeHTML(m[1])) @@ -1440,11 +1469,6 @@ class GenericIE(InfoExtractor): 'id': match.group('id') } - # Look for embedded blip.tv player - bliptv_url = BlipTVIE._extract_url(webpage) - if bliptv_url: - return self.url_result(bliptv_url, 'BlipTV') - # Look for SVT player svt_url = SVTIE._extract_url(webpage) if svt_url: @@ -1646,7 +1670,7 @@ class GenericIE(InfoExtractor): if myvi_url: return self.url_result(myvi_url) - # Look for embeded soundcloud player + # Look for embedded soundcloud player mobj = re.search( r'https?://(?:w\.)?soundcloud\.com/player[^"]+)"', webpage) @@ -1746,6 +1770,11 @@ class GenericIE(InfoExtractor): if pladform_url: return self.url_result(pladform_url) + # Look for Videomore embeds + videomore_url = VideomoreIE._extract_url(webpage) + if videomore_url: + return self.url_result(videomore_url) + # Look for Playwire embeds mobj = re.search( r']+data-config=(["\'])(?P(?:https?:)?//config\.playwire\.com/.+?)\1', webpage) @@ -1769,6 +1798,11 @@ class GenericIE(InfoExtractor): if nbc_sports_url: return self.url_result(nbc_sports_url, 'NBCSportsVPlayer') + # Look for Google Drive embeds + google_drive_url = GoogleDriveIE._extract_url(webpage) + if google_drive_url: + return self.url_result(google_drive_url, 'GoogleDrive') + # Look for UDN embeds mobj = re.search( r']+src="(?P%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage) @@ -1796,11 +1830,32 @@ class GenericIE(InfoExtractor): if snagfilms_url: return self.url_result(snagfilms_url) + # Look for JWPlatform embeds + jwplatform_url = JWPlatformIE._extract_url(webpage) + if jwplatform_url: + return self.url_result(jwplatform_url, 'JWPlatform') + # Look for ScreenwaveMedia embeds mobj = re.search(ScreenwaveMediaIE.EMBED_PATTERN, webpage) if mobj is not None: return self.url_result(unescapeHTML(mobj.group('url')), 'ScreenwaveMedia') + # Look for Digiteka embeds + digiteka_url = DigitekaIE._extract_url(webpage) + if digiteka_url: + return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key()) + + # Look for Limelight embeds + mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P[a-z0-9]{32})', webpage) + if mobj: + lm = { + 'Media': 'media', + 'Channel': 'channel', + 'ChannelList': 'channel_list', + } + return self.url_result('limelight:%s:%s' % ( + lm[mobj.group(1)], mobj.group(2)), 'Limelight%s' % mobj.group(1), mobj.group(2)) + # Look for AdobeTVVideo embeds mobj = re.search( r']+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]', @@ -1917,6 +1972,8 @@ class GenericIE(InfoExtractor): return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id) elif ext == 'm3u8': entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4') + elif ext == 'mpd': + entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id) else: entry_info_dict['url'] = video_url