X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=c8d54f22a80ac447179305f50a85b42c483b946e;hb=be49068d65ae39bef5797071f8a7cf1c733f033b;hp=43fdbfab1f1e637d9201373fb6a659ec0c3ec723;hpb=7caf9830b0ae80363d3b1817b322452474631cc1;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 43fdbfab1..c8d54f22a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -17,16 +17,15 @@ from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, compat_parse_qs, - compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, + compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, compat_urlparse, compat_str, ) from ..utils import ( clean_html, - encode_dict, error_to_compat_str, ExtractorError, float_or_none, @@ -45,6 +44,7 @@ from ..utils import ( unified_strdate, unsmuggle_url, uppercase_escape, + urlencode_postdata, ISO3166Utils, ) @@ -116,7 +116,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'hl': 'en_US', } - login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii') + login_data = urlencode_postdata(login_form_strs) req = sanitized_Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( @@ -125,6 +125,12 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if login_results is False: return False + error_msg = self._html_search_regex( + r'<[^>]+id="errormsg_0_Passwd"[^>]*>([^<]+)<', + login_results, 'error message', default=None) + if error_msg: + raise ExtractorError('Unable to login: %s' % error_msg, expected=True) + if re.search(r'id="errormsg_0_Passwd"', login_results) is not None: raise ExtractorError('Please use your account password and a two-factor code instead of an application-specific password.', expected=True) @@ -149,7 +155,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'TrustDevice': 'on', }) - tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii') + tfa_data = urlencode_postdata(tfa_form_strs) tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( @@ -234,7 +240,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): def _process_page(self, content): - for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)): + for playlist_id in orderedSet(re.findall( + r']+class="[^"]*yt-lockup-title[^"]*"[^>]*>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', + content)): yield self.url_result( 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') @@ -268,7 +276,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): )) |(?: youtu\.be| # just youtu.be/xxxx - vid\.plus # or vid.plus/xxxx + vid\.plus| # or vid.plus/xxxx + zwearz\.com/watch| # or zwearz.com/watch/xxxx )/ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= ) @@ -309,6 +318,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20}, # Apple HTTP Live Streaming + '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10}, '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10}, @@ -334,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'}, '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'}, '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'}, + '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, + '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'}, # Dash webm '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40}, @@ -383,6 +395,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'youtube-dl test video "\'/\\ä↭𝕐', 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -409,6 +422,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'iconic ep', 'iconic', 'love', 'it'], 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IconaPop', 'license': 'Standard YouTube License', 'creator': 'Icona Pop', } @@ -425,6 +439,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:64249768eec3bc4276236606ea996373', 'uploader': 'justintimberlakeVEVO', 'uploader_id': 'justintimberlakeVEVO', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO', 'license': 'Standard YouTube License', 'creator': 'Justin Timberlake', 'age_limit': 18, @@ -441,6 +456,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', 'uploader': 'SET India', 'uploader_id': 'setindia', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/setindia', 'license': 'Standard YouTube License', 'age_limit': 18, } @@ -454,6 +470,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'youtube-dl test video "\'/\\ä↭𝕐', 'uploader': 'Philipp Hagemeister', 'uploader_id': 'phihag', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/phihag', 'upload_date': '20121002', 'license': 'Standard YouTube License', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', @@ -474,6 +491,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'upload_date': '20121002', 'uploader_id': '8KVIDEO', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/8KVIDEO', 'description': '', 'uploader': '8KVIDEO', 'license': 'Standard YouTube License', @@ -483,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'youtube_include_dash_manifest': True, 'format': '141', }, + 'skip': 'format 141 not served anymore', }, # DASH manifest with encrypted signature { @@ -499,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'youtube_include_dash_manifest': True, - 'format': '141', + 'format': '141/bestaudio[ext=m4a]', }, }, # JS player signature function name containing $ @@ -519,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'youtube_include_dash_manifest': True, - 'format': '141', + 'format': '141/bestaudio[ext=m4a]', }, }, # Controversy video @@ -531,6 +550,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20100909', 'uploader': 'The Amazing Atheist', 'uploader_id': 'TheAmazingAtheist', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist', 'license': 'Standard YouTube License', 'title': 'Burning Everyone\'s Koran', 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', @@ -546,6 +566,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}', 'uploader': 'The Witcher', 'uploader_id': 'WitcherGame', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/WitcherGame', 'upload_date': '20140605', 'license': 'Standard YouTube License', 'age_limit': 18, @@ -561,6 +582,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:33765bb339e1b47e7e72b5490139bb41', 'uploader': 'LloydVEVO', 'uploader_id': 'LloydVEVO', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/LloydVEVO', 'upload_date': '20110629', 'license': 'Standard YouTube License', 'age_limit': 18, @@ -574,6 +596,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'upload_date': '20100430', 'uploader_id': 'deadmau5', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/deadmau5', 'creator': 'deadmau5', 'description': 'md5:12c56784b8032162bb936a5f76d55360', 'uploader': 'deadmau5', @@ -593,9 +616,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'upload_date': '20150827', 'uploader_id': 'olympic', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic', 'license': 'Standard YouTube License', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', - 'uploader': 'Olympics', + 'uploader': 'Olympic', 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games', }, 'params': { @@ -611,6 +635,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'stretched_ratio': 16 / 9., 'upload_date': '20110310', 'uploader_id': 'AllenMeow', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/AllenMeow', 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯', 'uploader': '孫艾倫', 'license': 'Standard YouTube License', @@ -644,9 +669,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:116377fd2963b81ec4ce64b542173306', 'upload_date': '20150625', 'uploader_id': 'dorappi2000', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000', 'uploader': 'dorappi2000', 'license': 'Standard YouTube License', - 'formats': 'mincount:33', + 'formats': 'mincount:32', }, }, # DASH manifest with segment_list @@ -666,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'youtube_include_dash_manifest': True, 'format': '135', # bestvideo - } + }, + 'skip': 'This live event has ended.', }, { # Multifeed videos (multiple cameras), URL is for Main Camera @@ -685,6 +712,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -696,6 +724,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -707,6 +736,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }, { @@ -718,6 +748,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20150721', 'uploader': 'Beer Games Beer', 'uploader_id': 'beergamesbeer', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/beergamesbeer', 'license': 'Standard YouTube License', }, }], @@ -733,11 +764,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30', }, 'playlist_count': 2, + 'skip': 'Not multifeed anymore', }, { 'url': 'http://vid.plus/FlRa-iH7PGw', 'only_matching': True, }, + { + 'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html', + 'only_matching': True, + }, { # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468) # Also tests cut-off URL expansion in video description (see @@ -752,6 +788,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', 'upload_date': '20151119', 'uploader_id': 'IronSoulElf', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/IronSoulElf', 'uploader': 'IronSoulElf', 'license': 'Standard YouTube License', 'creator': 'Todd Haberman, Daniel Law Heath & Aaron Kaplan', @@ -780,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'params': { 'skip_download': True, }, + 'skip': 'This video does not exist.', }, { # Video licensed under Creative Commons @@ -791,6 +829,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'description': 'md5:a677553cf0840649b731a3024aeff4cc', 'upload_date': '20150127', 'uploader_id': 'BerkmanCenter', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', 'uploader': 'BerkmanCenter', 'license': 'Creative Commons Attribution license (reuse allowed)', }, @@ -798,6 +837,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # Channel-like uploader_url + 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg', + 'info_dict': { + 'id': 'eQcmzGIKrzg', + 'ext': 'mp4', + 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', + 'description': 'md5:dda0d780d5a6e120758d1711d062a867', + 'upload_date': '20151119', + 'uploader': 'Bernie 2016', + 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', + 'license': 'Creative Commons Attribution license (reuse allowed)', + }, + 'params': { + 'skip_download': True, + }, + }, { 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY', 'only_matching': True, @@ -969,7 +1026,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): continue sub_formats = [] for ext in self._SUBTITLE_FORMATS: - params = compat_urllib_parse.urlencode({ + params = compat_urllib_parse_urlencode({ 'lang': lang, 'v': video_id, 'fmt': ext, @@ -1018,7 +1075,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if caption_url: timestamp = args['timestamp'] # We get the available subtitles - list_params = compat_urllib_parse.urlencode({ + list_params = compat_urllib_parse_urlencode({ 'type': 'list', 'tlangs': 1, 'asrs': 1, @@ -1037,7 +1094,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): sub_lang = lang_node.attrib['lang_code'] sub_formats = [] for ext in self._SUBTITLE_FORMATS: - params = compat_urllib_parse.urlencode({ + params = compat_urllib_parse_urlencode({ 'lang': original_lang, 'tlang': sub_lang, 'fmt': ext, @@ -1056,7 +1113,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): caption_tracks = args['caption_tracks'] caption_translation_languages = args['caption_translation_languages'] caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0] - parsed_caption_url = compat_urlparse.urlparse(caption_url) + parsed_caption_url = compat_urllib_parse_urlparse(caption_url) caption_qs = compat_parse_qs(parsed_caption_url.query) sub_lang_list = {} @@ -1072,7 +1129,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fmt': [ext], }) sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace( - query=compat_urllib_parse.urlencode(caption_qs, True))) + query=compat_urllib_parse_urlencode(caption_qs, True))) sub_formats.append({ 'url': sub_url, 'ext': ext, @@ -1102,7 +1159,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'cpn': [cpn], }) playback_url = compat_urlparse.urlunparse( - parsed_playback_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) self._download_webpage( playback_url, video_id, 'Marking watched', @@ -1187,7 +1244,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # this can be viewed without login into Youtube url = proto + '://www.youtube.com/embed/%s' % video_id embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage') - data = compat_urllib_parse.urlencode({ + data = compat_urllib_parse_urlencode({ 'video_id': video_id, 'eurl': 'https://youtube.googleapis.com/v/' + video_id, 'sts': self._search_regex( @@ -1275,10 +1332,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if video_description: video_description = re.sub(r'''(?x) ]*> + (?:[a-zA-Z-]+="[^"]*"\s+)*? + class="[^"]*"[^>]*> [^<]+\.{3}\s* ''', r'\1', video_description) @@ -1334,9 +1391,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # uploader_id video_uploader_id = None - mobj = re.search(r'', video_webpage) + video_uploader_url = None + mobj = re.search( + r'', + video_webpage) if mobj is not None: - video_uploader_id = mobj.group(1) + video_uploader_id = mobj.group('uploader_id') + video_uploader_url = mobj.group('uploader_url') else: self._downloader.report_warning('unable to extract uploader nickname') @@ -1642,6 +1703,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': video_id, 'uploader': video_uploader, 'uploader_id': video_uploader_id, + 'uploader_url': video_uploader_url, 'upload_date': upload_date, 'license': video_license, 'creator': video_creator, @@ -1768,20 +1830,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): def _extract_mix(self, playlist_id): # The mixes are generated from a single video # the id of the playlist is just 'RD' + video_id - url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) - webpage = self._download_webpage( - url, playlist_id, 'Downloading Youtube mix') + ids = [] + last_id = playlist_id[-11:] + for n in itertools.count(1): + url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) + webpage = self._download_webpage( + url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n)) + new_ids = orderedSet(re.findall( + r'''(?xs)data-video-username=".*?".*? + href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), + webpage)) + # Fetch new pages until all the videos are repeated, it seems that + # there are always 51 unique videos. + new_ids = [_id for _id in new_ids if _id not in ids] + if not new_ids: + break + ids.extend(new_ids) + last_id = ids[-1] + + url_results = self._ids_to_results(ids) + search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) title_span = ( search_title('playlist-title') or search_title('title long-title') or search_title('title')) title = clean_html(title_span) - ids = orderedSet(re.findall( - r'''(?xs)data-video-username=".*?".*? - href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), - webpage)) - url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, title) @@ -1834,7 +1908,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): if video: return video - if playlist_id.startswith('RD') or playlist_id.startswith('UL'): + if playlist_id.startswith(('RD', 'UL', 'PU')): # Mixes require a custom extraction process return self._extract_mix(playlist_id) @@ -1868,7 +1942,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): @classmethod def suitable(cls, url): - return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url) + return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url) + else super(YoutubeChannelIE, cls).suitable(url)) def _real_extract(self, url): channel_id = self._match_id(url) @@ -1917,7 +1992,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): class YoutubeUserIE(YoutubeChannelIE): IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)' - _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)' + _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/|c/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P[A-Za-z0-9_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos' IE_NAME = 'youtube:user' @@ -1930,19 +2005,67 @@ class YoutubeUserIE(YoutubeChannelIE): }, { 'url': 'ytuser:phihag', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/c/gametrailers', + 'only_matching': True, }] @classmethod def suitable(cls, url): # Don't return True if the url can be extracted with other youtube # extractor, the regex would is too permissive and it would match. - other_ies = iter(klass for (name, klass) in globals().items() if name.endswith('IE') and klass is not cls) - if any(ie.suitable(url) for ie in other_ies): + other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls) + if any(ie.suitable(url) for ie in other_yt_ies): return False else: return super(YoutubeUserIE, cls).suitable(url) +class YoutubeLiveIE(YoutubeBaseInfoExtractor): + IE_DESC = 'YouTube.com live streams' + _VALID_URL = r'(?Phttps?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P[^/]+))/live' + IE_NAME = 'youtube:live' + + _TESTS = [{ + 'url': 'http://www.youtube.com/user/TheYoungTurks/live', + 'info_dict': { + 'id': 'a48o2S1cPoo', + 'ext': 'mp4', + 'title': 'The Young Turks - Live Main Show', + 'uploader': 'The Young Turks', + 'uploader_id': 'TheYoungTurks', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks', + 'upload_date': '20150715', + 'license': 'Standard YouTube License', + 'description': 'md5:438179573adcdff3c97ebb1ee632b891', + 'categories': ['News & Politics'], + 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'], + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'http://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live', + 'only_matching': True, + }] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + base_url = mobj.group('base_url') + webpage = self._download_webpage(url, channel_id, fatal=False) + if webpage: + page_type = self._og_search_property( + 'type', webpage, 'page type', default=None) + video_id = self._html_search_meta( + 'videoId', webpage, 'video id', default=None) + if page_type == 'video' and video_id and re.match(r'^[0-9A-Za-z_-]{11}$', video_id): + return self.url_result(video_id, YoutubeIE.ie_key()) + return self.url_result(base_url) + + class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor): IE_DESC = 'YouTube.com user/channel playlists' _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P[^/]+)/playlists' @@ -1996,7 +2119,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): 'spf': 'navigate', } url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data = self._download_json( result_url, video_id='query "%s"' % query, note='Downloading page %s' % pagenum, @@ -2025,10 +2148,11 @@ class YoutubeSearchDateIE(YoutubeSearchIE): _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} -class YoutubeSearchURLIE(InfoExtractor): +class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com search URLs' IE_NAME = 'youtube:search_url' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P[^&]+)(?:[&]|$)' + _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P[^"]+))?' _TESTS = [{ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'playlist_mincount': 5, @@ -2043,32 +2167,8 @@ class YoutubeSearchURLIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) query = compat_urllib_parse_unquote_plus(mobj.group('query')) - webpage = self._download_webpage(url, query) - result_code = self._search_regex( - r'(?s)<ol[^>]+class="item-section"(.*?)</ol>', webpage, 'result HTML') - - part_codes = re.findall( - r'(?s)<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*>(.*?)</h3>', result_code) - entries = [] - for part_code in part_codes: - part_title = self._html_search_regex( - [r'(?s)title="([^"]+)"', r'>([^<]+)</a>'], part_code, 'item title', fatal=False) - part_url_snippet = self._html_search_regex( - r'(?s)href="([^"]+)"', part_code, 'item URL') - part_url = compat_urlparse.urljoin( - 'https://www.youtube.com/', part_url_snippet) - entries.append({ - '_type': 'url', - 'url': part_url, - 'title': part_title, - }) - - return { - '_type': 'playlist', - 'entries': entries, - 'title': query, - } + return self.playlist_result(self._process_page(webpage), playlist_title=query) class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):