X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=25d056b3c21ea3f98c5c36384e86d243dfd4a913;hb=494d664e679c5b0f85e3c899579e7eb8a1cc8246;hp=9a182fcf67138bf347c677fe69ca73bcc4fc8290;hpb=8dbf751aa241475dd8a7a6d3040713b5874fd057;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9a182fcf6..25d056b3c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -31,6 +31,7 @@ from ..utils import ( clean_html, dict_get, error_to_compat_str, + extract_attributes, ExtractorError, float_or_none, get_element_by_attribute, @@ -324,17 +325,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): for video_id, video_title in self.extract_videos_from_page(content): yield self.url_result(video_id, 'Youtube', video_id, video_title) - def extract_videos_from_page(self, page): - ids_in_page = [] - titles_in_page = [] - for mobj in re.finditer(self._VIDEO_RE, page): + def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page): + for mobj in re.finditer(video_re, page): # The link with index 0 is not the first video of the playlist (not sure if still actual) if 'index' in mobj.groupdict() and mobj.group('id') == '0': continue video_id = mobj.group('id') - video_title = unescapeHTML(mobj.group('title')) + video_title = unescapeHTML( + mobj.group('title')) if 'title' in mobj.groupdict() else None if video_title: video_title = video_title.strip() + if video_title == '⺠Play all': + video_title = None try: idx = ids_in_page.index(video_id) if video_title and not titles_in_page[idx]: @@ -342,6 +344,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): except ValueError: ids_in_page.append(video_id) titles_in_page.append(video_title) + + def extract_videos_from_page(self, page): + ids_in_page = [] + titles_in_page = [] + self.extract_videos_from_page_impl( + self._VIDEO_RE, page, ids_in_page, titles_in_page) return zip(ids_in_page, titles_in_page) @@ -379,8 +387,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.mastodon\.host/| + (?:www\.)?invidious\.nixnet\.xyz/| (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| + (?:www\.)?yt\.elukerio\.org/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1595,17 +1605,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id = mobj.group(2) return video_id - def _extract_annotations(self, video_id): - return self._download_webpage( - 'https://www.youtube.com/annotations_invideo', video_id, - note='Downloading annotations', - errnote='Unable to download video annotations', fatal=False, - query={ - 'features': 1, - 'legacy': 1, - 'video_id': video_id, - }) - @staticmethod def _extract_chapters(description, duration): if not description: @@ -1700,6 +1699,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def extract_token(v_info): return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token')) + def extract_player_response(player_response, video_id): + pl_response = str_or_none(player_response) + if not pl_response: + return + pl_response = self._parse_json(pl_response, video_id, fatal=False) + if isinstance(pl_response, dict): + add_dash_mpd_pr(pl_response) + return pl_response + player_response = {} # Get video info @@ -1722,7 +1730,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): note='Refetching age-gated info webpage', errnote='unable to download video info webpage') video_info = compat_parse_qs(video_info_webpage) + pl_response = video_info.get('player_response', [None])[0] + player_response = extract_player_response(pl_response, video_id) add_dash_mpd(video_info) + view_count = extract_view_count(video_info) else: age_gate = False video_info = None @@ -1745,11 +1756,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): is_live = True sts = ytplayer_config.get('sts') if not player_response: - pl_response = str_or_none(args.get('player_response')) - if pl_response: - pl_response = self._parse_json(pl_response, video_id, fatal=False) - if isinstance(pl_response, dict): - player_response = pl_response + player_response = extract_player_response(args.get('player_response'), video_id) if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True): add_dash_mpd_pr(player_response) # We also try looking in get_video_info since it may contain different dashmpd @@ -1781,9 +1788,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): get_video_info = compat_parse_qs(video_info_webpage) if not player_response: pl_response = get_video_info.get('player_response', [None])[0] - if isinstance(pl_response, dict): - player_response = pl_response - add_dash_mpd_pr(player_response) + player_response = extract_player_response(pl_response, video_id) add_dash_mpd(get_video_info) if view_count is None: view_count = extract_view_count(get_video_info) @@ -1806,9 +1811,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): break def extract_unavailable_message(): - return self._html_search_regex( - r'(?s)