[youtube] Don't pollute default query dict (closes #17593)

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 89c8b7f8d9f534134ef8e405c6ba97d7c232f515..e80e36f988196dfa2490e99013134008dc2066bd 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -64,7 +64,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      # If True it will raise an error if no login info is provided
      _LOGIN_REQUIRED = False
  
-    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
+    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
  
      def _set_language(self):
          self._set_cookie(
@@ -178,13 +178,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
              warn('Unable to extract result entry')
              return False
  
-        tfa = try_get(res, lambda x: x[0][0], list)
-        if tfa:
-            tfa_str = try_get(tfa, lambda x: x[2], compat_str)
-            if tfa_str == 'TWO_STEP_VERIFICATION':
+        login_challenge = try_get(res, lambda x: x[0][0], list)
+        if login_challenge:
+            challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
+            if challenge_str == 'TWO_STEP_VERIFICATION':
                  # SEND_SUCCESS - TFA code has been successfully sent to phone
                  # QUOTA_EXCEEDED - reached the limit of TFA codes
-                status = try_get(tfa, lambda x: x[5], compat_str)
+                status = try_get(login_challenge, lambda x: x[5], compat_str)
                  if status == 'QUOTA_EXCEEDED':
                      warn('Exceeded the limit of TFA codes, try later')
                      return False
@@ -228,6 +228,17 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
  
                  check_cookie_url = try_get(
                      tfa_results, lambda x: x[0][-1][2], compat_str)
+            else:
+                CHALLENGES = {
+                    'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
+                    'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
+                    'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
+                }
+                challenge = CHALLENGES.get(
+                    challenge_str,
+                    '%s returned error %s.' % (self.IE_NAME, challenge_str))
+                warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
+                return False
          else:
              check_cookie_url = try_get(res, lambda x: x[2], compat_str)
  
@@ -248,7 +259,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
          return True
  
      def _download_webpage_handle(self, *args, **kwargs):
-        kwargs.setdefault('query', {})['disable_polymer'] = 'true'
+        query = kwargs.get('query', {}).copy()
+        query['disable_polymer'] = 'true'
+        kwargs['query'] = query
          return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
              *args, **compat_kwargs(kwargs))
  
@@ -479,6 +492,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Philipp Hagemeister',
                  'uploader_id': 'phihag',
                  'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+                'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
                  'upload_date': '20121002',
                  'license': 'Standard YouTube License',
                  'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
@@ -1167,7 +1182,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
      def _parse_sig_js(self, jscode):
          funcname = self._search_regex(
              (r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
-             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\('),
+             r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+             r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\('),
              jscode, 'Initial JS player signature function name', group='sig')
  
          jsi = JSInterpreter(jscode)
@@ -1894,6 +1911,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          else:
              self._downloader.report_warning('unable to extract uploader nickname')
  
+        channel_id = self._html_search_meta(
+            'channelId', video_webpage, 'channel id')
+        channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
+
          # thumbnail image
          # We try first to get a high quality image:
          m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@@ -2065,6 +2086,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              'uploader': video_uploader,
              'uploader_id': video_uploader_id,
              'uploader_url': video_uploader_url,
+            'channel_id': channel_id,
+            'channel_url': channel_url,
              'upload_date': upload_date,
              'license': video_license,
              'creator': video_creator or artist,
@@ -2112,7 +2135,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                              youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
                          )
                          (
-                            (?:PL|LL|EC|UU|FL|RD|UL|TL)?[0-9A-Za-z-_]{10,}
+                            (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
                              # Top tracks, they can also include dots
                              |(?:MC)[\w\.]*
                          )
@@ -2250,6 +2273,10 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
      }, {
          'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
          'only_matching': True,
+    }, {
+        # music album playlist
+        'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
+        'only_matching': True,
      }]
  
      def _real_initialize(self):