X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=4c8edef8d4d66a8abb5709305f36bda8fbef853a;hb=6496ccb41398971373a2f7162a0684dd12f0b56e;hp=8aa7dfc413a7141cd56328ec3c0eaa56941b4171;hpb=9558dcec9c7806c811f4fe8e7758977eaa01a702;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8aa7dfc41..4c8edef8d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -53,6 +53,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' + _PASSWORD_CHALLENGE_URL = 'https://accounts.google.com/signin/challenge/sl/password' _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False @@ -90,38 +91,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if login_page is False: return - galx = self._search_regex(r'(?s)]* id="challenge"', login_results) is not None: + if re.search(r'(?i)]+id="challenge"', login_results) is not None: tfa_code = self._get_tfa_info('2-step verification code') if not tfa_code: @@ -165,17 +146,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if tfa_results is False: return False - if re.search(r'(?i)]* id="challenge"', tfa_results) is not None: + if re.search(r'(?i)]+id="challenge"', tfa_results) is not None: self._downloader.report_warning('Two-factor code expired or invalid. Please try again, or use a one-use backup code instead.') return False - if re.search(r'(?i)]* id="gaia_loginform"', tfa_results) is not None: + if re.search(r'(?i)]+id="gaia_loginform"', tfa_results) is not None: self._downloader.report_warning('unable to log in - did the page structure change?') return False if re.search(r'smsauth-interstitial-reviewsettings', tfa_results) is not None: self._downloader.report_warning('Your Google account has a security notice. Please log in on your web browser, resolve the notice, and try again.') return False - if re.search(r'(?i)]* id="gaia_loginform"', login_results) is not None: + if re.search(r'(?i)]+id="gaia_loginform"', login_results) is not None: self._downloader.report_warning('unable to log in: bad username or password') return False return True @@ -858,6 +839,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor): { 'url': 'https://www.youtube.com/watch?feature=player_embedded&v=V36LpHqtcDY', 'only_matching': True, + }, + { + # YouTube Red paid video (https://github.com/rg3/youtube-dl/issues/10059) + 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo', + 'only_matching': True, + }, + { + # Rental video preview + 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg', + 'info_dict': { + 'id': 'uGpuVWrhIzE', + 'ext': 'mp4', + 'title': 'Piku - Trailer', + 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb', + 'upload_date': '20150811', + 'uploader': 'FlixMatrix', + 'uploader_id': 'FlixMatrixKaravan', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan', + 'license': 'Standard YouTube License', + }, + 'params': { + 'skip_download': True, + }, } ] @@ -1268,6 +1272,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Convert to the same format returned by compat_parse_qs video_info = dict((k, [v]) for k, v in args.items()) add_dash_mpd(video_info) + # Rental video is not rented but preview is available (e.g. + # https://www.youtube.com/watch?v=yYr8q0y5Jfg, + # https://github.com/rg3/youtube-dl/issues/10532) + if not video_info and args.get('ypc_vid'): + return self.url_result( + args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid']) if args.get('livestream') == '1' or args.get('live_playback') == 1: is_live = True if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): @@ -1731,7 +1741,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): class YoutubeSharedVideoIE(InfoExtractor): - _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P[0-9A-Za-z_-]{11})' + _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?.*\bci=(?P[0-9A-Za-z_-]{11})' IE_NAME = 'youtube:shared' _TEST = { @@ -1855,6 +1865,28 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUXw-G3eDE9trcvY2sBMM_aA', }, 'playlist_mincout': 21, + }, { + # Playlist URL that does not actually serve a playlist + 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4', + 'info_dict': { + 'id': 'FqZTN594JQw', + 'ext': 'webm', + 'title': "Smiley's People 01 detective, Adventure Series, Action", + 'uploader': 'STREEM', + 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng', + 'uploader_url': 're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng', + 'upload_date': '20150526', + 'license': 'Standard YouTube License', + 'description': 'md5:507cdcb5a49ac0da37a920ece610be80', + 'categories': ['People & Blogs'], + 'tags': list, + 'like_count': int, + 'dislike_count': int, + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [YoutubeIE.ie_key()], }] def _real_initialize(self): @@ -1915,9 +1947,20 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): playlist_title = self._html_search_regex( r'(?s)

]*>\s*(.*?)\s*

', - page, 'title') + page, 'title', default=None) - return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) + has_videos = True + + if not playlist_title: + try: + # Some playlist URLs don't actually serve a playlist (e.g. + # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4) + next(self._entries(page, playlist_id)) + except StopIteration: + has_videos = False + + return has_videos, self.playlist_result( + self._entries(page, playlist_id), playlist_id, playlist_title) def _check_download_just_video(self, url, playlist_id): # Check if it's a video-specific URL @@ -1926,9 +1969,11 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): video_id = query_dict['v'][0] if self._downloader.params.get('noplaylist'): self.to_screen('Downloading just video %s because of --no-playlist' % video_id) - return self.url_result(video_id, 'Youtube', video_id=video_id) + return video_id, self.url_result(video_id, 'Youtube', video_id=video_id) else: self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) + return video_id, None + return None, None def _real_extract(self, url): # Extract playlist id @@ -1937,7 +1982,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): raise ExtractorError('Invalid URL: %s' % url) playlist_id = mobj.group(1) or mobj.group(2) - video = self._check_download_just_video(url, playlist_id) + video_id, video = self._check_download_just_video(url, playlist_id) if video: return video @@ -1945,7 +1990,15 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): # Mixes require a custom extraction process return self._extract_mix(playlist_id) - return self._extract_playlist(playlist_id) + has_videos, playlist = self._extract_playlist(playlist_id) + if has_videos or not video_id: + return playlist + + # Some playlist URLs don't actually serve a playlist (see + # https://github.com/rg3/youtube-dl/issues/10537). + # Fallback to plain video extraction if there is a video id + # along with playlist id. + return self.url_result(video_id, 'Youtube', video_id=video_id) class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): @@ -2326,7 +2379,8 @@ class YoutubeWatchLaterIE(YoutubePlaylistIE): video = self._check_download_just_video(url, 'WL') if video: return video - return self._extract_playlist('WL') + _, playlist = self._extract_playlist('WL') + return playlist class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):