X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=abafd5157a6b40bc47741465cbe7ad652064861e;hb=bff90fc518d6ccadaafc26407a688dc1bbd32dff;hp=44740cb75c0867d4d4710ffebf23bff044dcb660;hpb=135773497857f04469219e97a5d09350f61672b0;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 44740cb75..abafd5157 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -31,6 +31,7 @@ from ..utils import ( clean_html, dict_get, error_to_compat_str, + extract_attributes, ExtractorError, float_or_none, get_element_by_attribute, @@ -324,17 +325,18 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): for video_id, video_title in self.extract_videos_from_page(content): yield self.url_result(video_id, 'Youtube', video_id, video_title) - def extract_videos_from_page(self, page): - ids_in_page = [] - titles_in_page = [] - for mobj in re.finditer(self._VIDEO_RE, page): + def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page): + for mobj in re.finditer(video_re, page): # The link with index 0 is not the first video of the playlist (not sure if still actual) if 'index' in mobj.groupdict() and mobj.group('id') == '0': continue video_id = mobj.group('id') - video_title = unescapeHTML(mobj.group('title')) + video_title = unescapeHTML( + mobj.group('title')) if 'title' in mobj.groupdict() else None if video_title: video_title = video_title.strip() + if video_title == '⺠Play all': + video_title = None try: idx = ids_in_page.index(video_id) if video_title and not titles_in_page[idx]: @@ -342,6 +344,12 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): except ValueError: ids_in_page.append(video_id) titles_in_page.append(video_title) + + def extract_videos_from_page(self, page): + ids_in_page = [] + titles_in_page = [] + self.extract_videos_from_page_impl( + self._VIDEO_RE, page, ids_in_page, titles_in_page) return zip(ids_in_page, titles_in_page) @@ -379,8 +387,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?invidious\.enkirton\.net/| (?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.mastodon\.host/| + (?:www\.)?invidious\.nixnet\.xyz/| (?:www\.)?tube\.poal\.co/| (?:www\.)?vid\.wxzm\.sx/| + (?:www\.)?yt\.elukerio\.org/| + (?:www\.)?kgg2m7yk5aybusll\.onion/| + (?:www\.)?qklhadlycap4cnod\.onion/| + (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/| + (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/| + (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/| + (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1595,17 +1611,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_id = mobj.group(2) return video_id - def _extract_annotations(self, video_id): - return self._download_webpage( - 'https://www.youtube.com/annotations_invideo', video_id, - note='Downloading annotations', - errnote='Unable to download video annotations', fatal=False, - query={ - 'features': 1, - 'legacy': 1, - 'video_id': video_id, - }) - @staticmethod def _extract_chapters(description, duration): if not description: @@ -1812,10 +1817,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): break def extract_unavailable_message(): - return self._html_search_regex( - (r'(?s)