[vk] Fix extraction (closes #22522)
[youtube-dl] / youtube_dl / extractor / vk.py
index b7ce2fb97471338d293a6ae07a24c3c3432c9081..8b6dc0e244982f46cbc0d65bf5133b3505779fb0 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 import collections
 import re
-import sys
 
 from .common import InfoExtractor
 from ..compat import compat_urlparse
@@ -45,24 +44,9 @@ class VKBaseIE(InfoExtractor):
             'pass': password.encode('cp1251'),
         })
 
-        # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
-        # and expects the first one to be set rather than second (see
-        # https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201).
-        # As of RFC6265 the newer one cookie should be set into cookie store
-        # what actually happens.
-        # We will workaround this VK issue by resetting the remixlhk cookie to
-        # the first one manually.
-        for header, cookies in url_handle.headers.items():
-            if header.lower() != 'set-cookie':
-                continue
-            if sys.version_info[0] >= 3:
-                cookies = cookies.encode('iso-8859-1')
-            cookies = cookies.decode('utf-8')
-            remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
-            if remixlhk:
-                value, domain = remixlhk.groups()
-                self._set_cookie(domain, 'remixlhk', value)
-                break
+        # vk serves two same remixlhk cookies in Set-Cookie header and expects
+        # first one to be actually set
+        self._apply_first_set_cookie_header(url_handle, 'remixlhk')
 
         login_page = self._download_webpage(
             'https://login.vk.com/?act=login', None,
@@ -419,8 +403,17 @@ class VKIE(VKBaseIE):
             data = self._parse_json(
                 self._search_regex(
                     r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
-                    'player params'),
-                video_id)['params'][0]
+                    'player params', default='{}'),
+                video_id)
+            if data:
+                data = data['params'][0]
+
+        # <!--{...}
+        if not data:
+            data = self._parse_json(
+                self._search_regex(
+                    r'<!--\s*({.+})', info_page, 'payload'),
+                video_id)['payload'][-1][-1]['player']['params'][0]
 
         title = unescapeHTML(data['md_title'])