X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=3b3678c6e638d29000dd6728358c76b319af4b9f;hb=ca0f500ecfde0a0ada7002348e8b04ac5c79d4b4;hp=eb55d24ce8111303c5bab52139c90c4b30d6da1f;hpb=5779b3e1fe4dd76972d3c00bb12cc1131765d0c1;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index eb55d24ce..3b3678c6e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -809,6 +809,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): player_url = None # Get video info + embed_webpage = None if re.search(r'player-age-gate-content">', video_webpage) is not None: age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} @@ -1016,10 +1017,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): url += '&signature=' + url_data['sig'][0] elif 's' in url_data: encrypted_sig = url_data['s'][0] + ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")' jsplayer_url_json = self._search_regex( - r'"assets":.+?"js":\s*("[^"]+")', - embed_webpage if age_gate else video_webpage, 'JS player URL') + ASSETS_RE, + embed_webpage if age_gate else video_webpage, + 'JS player URL (1)', default=None) + if not jsplayer_url_json and not age_gate: + # We need the embed website after all + if embed_webpage is None: + embed_url = proto + '://www.youtube.com/embed/%s' % video_id + embed_webpage = self._download_webpage( + embed_url, video_id, 'Downloading embed webpage') + jsplayer_url_json = self._search_regex( + ASSETS_RE, embed_webpage, 'JS player URL') + player_url = json.loads(jsplayer_url_json) if player_url is None: player_url_json = self._search_regex( @@ -1682,11 +1694,18 @@ class YoutubeTruncatedURLIE(InfoExtractor): IE_NAME = 'youtube:truncated_url' IE_DESC = False # Do not list _VALID_URL = r'''(?x) - (?:https?://)?[^/]+/watch\?(?: + (?:https?://)? + (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/ + (?:watch\?(?: feature=[a-z_]+| - annotation_id=annotation_[^&]+ - )?$| - (?:https?://)?(?:www\.)?youtube\.com/attribution_link\?a=[^&]+$ + annotation_id=annotation_[^&]+| + x-yt-cl=[0-9]+| + hl=[^&]*| + )? + | + attribution_link\?a=[^&]+ + ) + $ ''' _TESTS = [{ @@ -1695,6 +1714,15 @@ class YoutubeTruncatedURLIE(InfoExtractor): }, { 'url': 'http://www.youtube.com/watch?', 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/watch?feature=foo', + 'only_matching': True, + }, { + 'url': 'https://www.youtube.com/watch?hl=en-GB', + 'only_matching': True, }] def _real_extract(self, url): @@ -1710,7 +1738,7 @@ class YoutubeTruncatedURLIE(InfoExtractor): class YoutubeTruncatedIDIE(InfoExtractor): IE_NAME = 'youtube:truncated_id' IE_DESC = False # Do not list - _VALID_URL = r'https?://(?:www\.)youtube\.com/watch\?v=(?P[0-9A-Za-z_-]{1,10})$' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P[0-9A-Za-z_-]{1,10})$' _TESTS = [{ 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',