[extractor/common] Move workaround for applying first Set-Cookie header into a separa...
authorSergey M․ <dstftw@gmail.com>
Fri, 17 May 2019 20:17:15 +0000 (03:17 +0700)
committerSergey M․ <dstftw@gmail.com>
Fri, 17 May 2019 20:17:15 +0000 (03:17 +0700)
youtube_dl/extractor/common.py
youtube_dl/extractor/vk.py

index 69c3bc75587884e8b1b9bd961e22503b4953a49d..f994953bc7b4b4bbbb9dd2bc073dbef0f2643bdd 100644 (file)
@@ -2817,6 +2817,29 @@ class InfoExtractor(object):
         self._downloader.cookiejar.add_cookie_header(req)
         return compat_cookies.SimpleCookie(req.get_header('Cookie'))
 
+    def _apply_first_set_cookie_header(self, url_handle, cookie):
+        # Some sites (e.g. [1-3]) may serve two cookies under the same name
+        # in Set-Cookie header and expect the first (old) one to be set rather
+        # than second (new). However, as of RFC6265 the newer one cookie
+        # should be set into cookie store what actually happens.
+        # We will workaround this issue by resetting the cookie to
+        # the first one manually.
+        # 1. https://new.vk.com/
+        # 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
+        # 3. https://learning.oreilly.com/
+        for header, cookies in url_handle.headers.items():
+            if header.lower() != 'set-cookie':
+                continue
+            if sys.version_info[0] >= 3:
+                cookies = cookies.encode('iso-8859-1')
+            cookies = cookies.decode('utf-8')
+            cookie_value = re.search(
+                r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
+            if cookie_value:
+                value, domain = cookie_value.groups()
+                self._set_cookie(domain, cookie, value)
+                break
+
     def get_testcases(self, include_onlymatching=False):
         t = getattr(self, '_TEST', None)
         if t:
index b7ce2fb97471338d293a6ae07a24c3c3432c9081..f57ed228870b1d094c8c5b42e88f6983671670f6 100644 (file)
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
 
 import collections
 import re
-import sys
 
 from .common import InfoExtractor
 from ..compat import compat_urlparse
@@ -45,24 +44,9 @@ class VKBaseIE(InfoExtractor):
             'pass': password.encode('cp1251'),
         })
 
-        # https://new.vk.com/ serves two same remixlhk cookies in Set-Cookie header
-        # and expects the first one to be set rather than second (see
-        # https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201).
-        # As of RFC6265 the newer one cookie should be set into cookie store
-        # what actually happens.
-        # We will workaround this VK issue by resetting the remixlhk cookie to
-        # the first one manually.
-        for header, cookies in url_handle.headers.items():
-            if header.lower() != 'set-cookie':
-                continue
-            if sys.version_info[0] >= 3:
-                cookies = cookies.encode('iso-8859-1')
-            cookies = cookies.decode('utf-8')
-            remixlhk = re.search(r'remixlhk=(.+?);.*?\bdomain=(.+?)(?:[,;]|$)', cookies)
-            if remixlhk:
-                value, domain = remixlhk.groups()
-                self._set_cookie(domain, 'remixlhk', value)
-                break
+        # vk serves two same remixlhk cookies in Set-Cookie header and expects
+        # first one to be actually set
+        self._apply_first_set_cookie_header(url_handle, 'remixlhk')
 
         login_page = self._download_webpage(
             'https://login.vk.com/?act=login', None,