X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=bc01016e4376a19816be17fb454670d9b5222f6e;hp=02f3ab61aef7be11e68f7f16985823b9d908e70e;hb=HEAD;hpb=ea74e00b3afe604531419c17eb7291038115a271 diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 02f3ab61a..bc01016e4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2098,7 +2098,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): embed_webpage = self._download_webpage( embed_url, video_id, 'Downloading embed webpage') jsplayer_url_json = self._search_regex( - ASSETS_RE, embed_webpage, 'JS player URL') + ASSETS_RE, embed_webpage, 'JS player URL (2)', default=None) + + if not jsplayer_url_json: + jsplayer_url_json = self._search_regex( + r'"WEB_PLAYER_CONTEXT_CONFIG_ID_EMBEDDED_PLAYER":.+?"jsUrl":\s*("[^"]+")', + embed_webpage, + 'JS player URL') player_url = json.loads(jsplayer_url_json) if player_url is None: @@ -3181,54 +3187,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor): _MAX_RESULTS = float('inf') IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' - _EXTRA_QUERY_ARGS = {} + _SEARCH_PARAMS = None _TESTS = [] - def _get_n_results(self, query, n): - """Get a specified number of results for a query""" - - videos = [] - limit = n - - url_query = { - 'search_query': query.encode('utf-8'), + def _entries(self, query, n): + data = { + 'context': { + 'client': { + 'clientName': 'WEB', + 'clientVersion': '2.20201021.03.00', + } + }, + 'query': query, } - url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) - - for pagenum in itertools.count(1): - data = self._download_json( - result_url, video_id='query "%s"' % query, - note='Downloading page %s' % pagenum, - errnote='Unable to download API page', - query={'spf': 'navigate'}) - html_content = data[1]['body']['content'] - - if 'class="search-message' in html_content: - raise ExtractorError( - '[youtube] No video results', expected=True) - - new_videos = list(self._process_page(html_content)) - videos += new_videos - if not new_videos or len(videos) > limit: + if self._SEARCH_PARAMS: + data['params'] = self._SEARCH_PARAMS + total = 0 + for page_num in itertools.count(1): + search = self._download_json( + 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', + video_id='query "%s"' % query, + note='Downloading page %s' % page_num, + errnote='Unable to download API page', fatal=False, + data=json.dumps(data).encode('utf8'), + headers={'content-type': 'application/json'}) + if not search: break - next_link = self._html_search_regex( - r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', - html_content, 'next link', default=None) - if next_link is None: + slr_contents = try_get( + search, + (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], + lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), + list) + if not slr_contents: break - result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) + isr_contents = try_get( + slr_contents, + lambda x: x[0]['itemSectionRenderer']['contents'], + list) + if not isr_contents: + break + for content in isr_contents: + if not isinstance(content, dict): + continue + video = content.get('videoRenderer') + if not isinstance(video, dict): + continue + video_id = video.get('videoId') + if not video_id: + continue + title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) + description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) + duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) + view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' + view_count = int_or_none(self._search_regex( + r'^(\d+)', re.sub(r'\s', '', view_count_text), + 'view count', default=None)) + uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) + total += 1 + yield { + '_type': 'url_transparent', + 'ie_key': YoutubeIE.ie_key(), + 'id': video_id, + 'url': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'view_count': view_count, + 'uploader': uploader, + } + if total == n: + return + token = try_get( + slr_contents, + lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], + compat_str) + if not token: + break + data['continuation'] = token - if len(videos) > n: - videos = videos[:n] - return self.playlist_result(videos, query) + def _get_n_results(self, query, n): + """Get a specified number of results for a query""" + return self.playlist_result(self._entries(query, n), query) class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' IE_DESC = 'YouTube.com searches, newest videos first' - _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} + _SEARCH_PARAMS = 'CAI%3D' class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):