[vk] Fix extraction (closes #22522)

[youtube-dl] / youtube_dl / extractor / vk.py
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py

index b7ce2fb97471338d293a6ae07a24c3c3432c9081..8b6dc0e244982f46cbc0d65bf5133b3505779fb0 100644 (file)
--- a/youtube_dl/extractor/vk.py
+++ b/youtube_dl/extractor/vk.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
  
  import collections
  import re
-import sys
  
  from .common import InfoExtractor
  from ..compat import compat_urlparse
@@ -45,24 +44,9 @@ class VKBaseIE(InfoExtractor):
              'pass': password.encode('cp1251'),
          })
  
-        # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
-        # and expects the first one to be set rather than second (see
-        # https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201).
-        # As of RFC6265 the newer one cookie should be set into cookie store
-        # what actually happens.
-        # We will workaround this VK issue by resetting the remixlhk cookie to
-        # the first one manually.
-        for header, cookies in url_handle.headers.items():
-            if header.lower() != 'set-cookie':
-                continue
-            if sys.version_info[0] >= 3:
-                cookies = cookies.encode('iso-8859-1')
-            cookies = cookies.decode('utf-8')
-            remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
-            if remixlhk:
-                value, domain = remixlhk.groups()
-                self._set_cookie(domain, 'remixlhk', value)
-                break
+        # vk serves two same remixlhk cookies in Set-Cookie header and expects
+        # first one to be actually set
+        self._apply_first_set_cookie_header(url_handle, 'remixlhk')
  
          login_page = self._download_webpage(
              'https://login.vk.com/?act=login', None,
@@ -419,8 +403,17 @@ class VKIE(VKBaseIE):
              data = self._parse_json(
                  self._search_regex(
                      r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
-                    'player params'),
-                video_id)['params'][0]
+                    'player params', default='{}'),
+                video_id)
+            if data:
+                data = data['params'][0]
+
+        # <!--{...}
+        if not data:
+            data = self._parse_json(
+                self._search_regex(
+                    r'<!--\s*({.+})', info_page, 'payload'),
+                video_id)['payload'][-1][-1]['player']['params'][0]
  
          title = unescapeHTML(data['md_title'])