X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fgeneric.py;h=d48914495c4e02a62061a0bf97e3e6cf494fbb27;hb=6cc622327ff8289f94894f3695ed31014c61cf8e;hp=c3f201949bce359b5a097f336965cbbefe6cf37b;hpb=5399ab3f0c63d47d44a7f8c3506192775f2605d3;p=youtube-dl diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index c3f201949..d48914495 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -23,6 +23,7 @@ from ..utils import ( is_html, js_to_json, KNOWN_EXTENSIONS, + merge_dicts, mimetype2ext, orderedSet, sanitized_Request, @@ -58,6 +59,7 @@ from .xhamster import XHamsterEmbedIE from .tnaflix import TNAFlixNetworkEmbedIE from .drtuber import DrTuberIE from .redtube import RedTubeIE +from .tube8 import Tube8IE from .vimeo import VimeoIE from .dailymotion import DailymotionIE from .dailymail import DailyMailIE @@ -102,6 +104,9 @@ from .channel9 import Channel9IE from .vshare import VShareIE from .mediasite import MediasiteIE from .springboardplatform import SpringboardPlatformIE +from .yapfiles import YapFilesIE +from .vice import ViceIE +from .xfileshare import XFileShareIE class GenericIE(InfoExtractor): @@ -1216,7 +1221,7 @@ class GenericIE(InfoExtractor): 'title': '35871', 'timestamp': 1355743100, 'upload_date': '20121217', - 'uploader_id': 'batchUser', + 'uploader_id': 'cplapp@learn360.com', }, 'add_ie': ['Kaltura'], }, @@ -1267,23 +1272,21 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Kaltura'], }, - # EaglePlatform embed (generic URL) { - 'url': 'http://lenta.ru/news/2015/03/06/navalny/', - # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used + # meta twitter:player + 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/', 'info_dict': { - 'id': '227304', + 'id': '0_01b42zps', 'ext': 'mp4', - 'title': 'Навальный вышел на свободу', - 'description': 'md5:d97861ac9ae77377f3f20eaf9d04b4f5', - 'thumbnail': r're:^https?://.*\.jpg$', - 'duration': 87, - 'view_count': int, - 'age_limit': 0, + 'title': 'Main Twerk (Video)', + 'upload_date': '20171208', + 'uploader_id': 'sebastian.salinas@thechive.com', + 'timestamp': 1512713057, }, 'params': { 'skip_download': True, }, + 'add_ie': ['Kaltura'], }, # referrer protected EaglePlatform embed { @@ -1954,7 +1957,45 @@ class GenericIE(InfoExtractor): 'skip_download': True, }, 'add_ie': [SpringboardPlatformIE.ie_key()], - } + }, + { + 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU', + 'info_dict': { + 'id': 'uPDB5I9wfp8', + 'ext': 'webm', + 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3', + 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d', + 'upload_date': '20160219', + 'uploader': 'Pocoyo - Português (BR)', + 'uploader_id': 'PocoyoBrazil', + }, + 'add_ie': [YoutubeIE.ie_key()], + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html', + 'info_dict': { + 'id': 'vMDE4NzI1Mjgt690b', + 'ext': 'mp4', + 'title': 'Котята', + }, + 'add_ie': [YapFilesIE.ie_key()], + 'params': { + 'skip_download': True, + }, + }, + { + 'url': 'http://share-videos.se/auto/video/83645793?uid=13', + 'md5': 'b68d276de422ab07ee1d49388103f457', + 'info_dict': { + 'id': '83645793', + 'title': 'Lock up and get excited', + 'ext': 'mp4' + }, + 'skip': 'TODO: fix nested playlists processing in tests', + }, # { # # TODO: find another test # # http://schema.org/VideoObject @@ -2201,7 +2242,11 @@ class GenericIE(InfoExtractor): self._sort_formats(smil['formats']) return smil elif doc.tag == '{http://xspf.org/ns/0/}playlist': - return self.playlist_result(self._parse_xspf(doc, video_id), video_id) + return self.playlist_result( + self._parse_xspf( + doc, video_id, xspf_url=url, + xspf_base_url=compat_str(full_response.geturl())), + video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, @@ -2529,6 +2574,11 @@ class GenericIE(InfoExtractor): if redtube_urls: return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key()) + # Look for embedded Tube8 player + tube8_urls = Tube8IE._extract_urls(webpage) + if tube8_urls: + return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key()) + # Look for embedded Tvigle player mobj = re.search( r']+?src=(["\'])(?P(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage) @@ -2931,20 +2981,27 @@ class GenericIE(InfoExtractor): springboardplatform_urls, video_id, video_title, ie=SpringboardPlatformIE.ie_key()) - def merge_dicts(dict1, dict2): - merged = {} - for k, v in dict1.items(): - if v is not None: - merged[k] = v - for k, v in dict2.items(): - if v is None: - continue - if (k not in merged or - (isinstance(v, compat_str) and v and - isinstance(merged[k], compat_str) and - not merged[k])): - merged[k] = v - return merged + yapfiles_urls = YapFilesIE._extract_urls(webpage) + if yapfiles_urls: + return self.playlist_from_matches( + yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key()) + + vice_urls = ViceIE._extract_urls(webpage) + if vice_urls: + return self.playlist_from_matches( + vice_urls, video_id, video_title, ie=ViceIE.ie_key()) + + xfileshare_urls = XFileShareIE._extract_urls(webpage) + if xfileshare_urls: + return self.playlist_from_matches( + xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key()) + + sharevideos_urls = [mobj.group('url') for mobj in re.finditer( + r']+?\bsrc\s*=\s*(["\'])(?P(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', + webpage)] + if sharevideos_urls: + return self.playlist_from_matches( + sharevideos_urls, video_id, video_title) # Look for HTML5 media entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')