X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=7cf13fddfe37fa1bf2d2e6329c060f9d4c6286f7;hb=a8ae232fa9c24534bd9c838c793f182e6796fe4e;hp=c7cee5487986e21a157c260da66dec0d5699ff7f;hpb=5b5fae5f2004e5a1be36551bf477f74161d4f9ac;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c7cee5487..7cf13fddf 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,7 +11,6 @@ from .youtube import YoutubeIE from ..compat import ( compat_etree_fromstring, compat_urllib_parse_unquote, - compat_urllib_request, compat_urlparse, compat_xml_parse_error, ) @@ -22,6 +21,7 @@ from ..utils import ( HEADRequest, is_html, orderedSet, + sanitized_Request, smuggle_url, unescapeHTML, unified_strdate, @@ -54,6 +54,8 @@ from .onionstudios import OnionStudiosIE from .snagfilms import SnagFilmsEmbedIE from .screenwavemedia import ScreenwaveMediaIE from .mtv import MTVServicesEmbeddedIE +from .pladform import PladformIE +from .googledrive import GoogleDriveIE class GenericIE(InfoExtractor): @@ -339,6 +341,7 @@ class GenericIE(InfoExtractor): 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', 'ext': 'mp4', 'title': '2cc213299525360.mov', # that's what we get + 'duration': 238.231, }, 'add_ie': ['Ooyala'], }, @@ -350,6 +353,7 @@ class GenericIE(InfoExtractor): 'ext': 'mp4', 'title': '"Steve Jobs: Man in the Machine" trailer', 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."', + 'duration': 135.427, }, 'params': { 'skip_download': True, @@ -823,6 +827,19 @@ class GenericIE(InfoExtractor): 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014', }, }, + # Kaltura embed protected with referrer + { + 'url': 'http://www.disney.nl/disney-channel/filmpjes/achter-de-schermen#/videoId/violetta-achter-de-schermen-ruggero', + 'info_dict': { + 'id': '1_g4fbemnq', + 'ext': 'mp4', + 'title': 'Violetta - Achter De Schermen - Ruggero', + 'description': 'Achter de schermen met Ruggero', + 'timestamp': 1435133761, + 'upload_date': '20150624', + 'uploader_id': 'echojecka', + }, + }, # Eagle.Platform embed (generic URL) { 'url': 'http://lenta.ru/news/2015/03/06/navalny/', @@ -947,8 +964,9 @@ class GenericIE(InfoExtractor): 'info_dict': { 'id': '50YnY4czr4ms1vJ7yz3xzq0excz_pUMs', 'ext': 'mp4', - 'description': 'VIDEO: Index/Match versus VLOOKUP.', + 'description': 'VIDEO: INDEX/MATCH versus VLOOKUP.', 'title': 'This is what separates the Excel masters from the wannabes', + 'duration': 191.933, }, 'params': { # m3u8 downloads @@ -1045,6 +1063,20 @@ class GenericIE(InfoExtractor): 'description': 'Tabletop: Dread, Last Thoughts', 'duration': 51690, }, + }, + # JWPlayer with M3U8 + { + 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', + 'info_dict': { + 'id': 'playlist', + 'ext': 'mp4', + 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ', + 'uploader': 'ren.tv', + }, + 'params': { + # m3u8 downloads + 'skip_download': True, + } } ] @@ -1188,7 +1220,7 @@ class GenericIE(InfoExtractor): full_response = None if head_response is False: - request = compat_urllib_request.Request(url) + request = sanitized_Request(url) request.add_header('Accept-Encoding', '*') full_response = self._request_webpage(request, video_id) head_response = full_response @@ -1217,7 +1249,7 @@ class GenericIE(InfoExtractor): '%s on generic information extractor.' % ('Forcing' if force else 'Falling back')) if not full_response: - request = compat_urllib_request.Request(url) + request = sanitized_Request(url) # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # making it impossible to download only chunk of the file (yet we need only 512kB to # test whether it's HTML or not). According to youtube-dl default Accept-Encoding @@ -1474,7 +1506,7 @@ class GenericIE(InfoExtractor): re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P.{32})[\'"]\)', webpage) or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P.{32})[\'"]', webpage)) if mobj is not None: - return OoyalaIE._build_url_result(mobj.group('ec')) + return OoyalaIE._build_url_result(smuggle_url(mobj.group('ec'), {'domain': url})) # Look for multiple Ooyala embeds on SBN network websites mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage) @@ -1482,7 +1514,7 @@ class GenericIE(InfoExtractor): embeds = self._parse_json(mobj.group(1), video_id, fatal=False) if embeds: return _playlist_from_matches( - embeds, getter=lambda v: OoyalaIE._url_for_embed_code(v['provider_video_id']), ie='Ooyala') + embeds, getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala') # Look for Aparat videos mobj = re.search(r'