Fix "invalid escape sequences" error on Python 3.6
[youtube-dl] / youtube_dl / extractor / spankwire.py
index 0a47441b184e82e9d53a9ef55ffad75d3997c465..44d8fa52f3071ca00971624db81ce4ad6b2141e3 100644 (file)
@@ -6,9 +6,9 @@ from .common import InfoExtractor
 from ..compat import (
     compat_urllib_parse_unquote,
     compat_urllib_parse_urlparse,
-    compat_urllib_request,
 )
 from ..utils import (
+    sanitized_Request,
     str_to_int,
     unified_strdate,
 )
@@ -16,7 +16,7 @@ from ..aes import aes_decrypt_text
 
 
 class SpankwireIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<videoid>[0-9]+)/?)'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
     _TESTS = [{
         # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
         'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
@@ -49,10 +49,9 @@ class SpankwireIE(InfoExtractor):
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('videoid')
-        url = 'http://www.' + mobj.group('url')
+        video_id = mobj.group('id')
 
-        req = compat_urllib_request.Request(url)
+        req = sanitized_Request('http://www.' + mobj.group('url'))
         req.add_header('Cookie', 'age_verified=1')
         webpage = self._download_webpage(req, video_id)
 
@@ -69,7 +68,7 @@ class SpankwireIE(InfoExtractor):
             r'by:\s*<a [^>]*>(.+?)</a>',
             webpage, 'uploader', fatal=False)
         uploader_id = self._html_search_regex(
-            r'by:\s*<a href="/user/viewProfile\?.*?UserId=(\d+).*?"',
+            r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
             webpage, 'uploader id', fatal=False)
         upload_date = unified_strdate(self._html_search_regex(
             r'</a> on (.+?) at \d+:\d+',
@@ -86,7 +85,7 @@ class SpankwireIE(InfoExtractor):
             r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
         heights = [int(video[0]) for video in videos]
         video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
-        if webpage.find('flashvars\.encrypted = "true"') != -1:
+        if webpage.find(r'flashvars\.encrypted = "true"') != -1:
             password = self._search_regex(
                 r'flashvars\.video_title = "([^"]+)',
                 webpage, 'password').replace('+', ' ')
@@ -97,20 +96,18 @@ class SpankwireIE(InfoExtractor):
         formats = []
         for height, video_url in zip(heights, video_urls):
             path = compat_urllib_parse_urlparse(video_url).path
-            _, quality = path.split('/')[4].split('_')[:2]
-            f = {
+            m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path)
+            if m:
+                tbr = int(m.group('tbr'))
+                height = int(m.group('height'))
+            else:
+                tbr = None
+            formats.append({
                 'url': video_url,
+                'format_id': '%dp' % height,
                 'height': height,
-            }
-            tbr = self._search_regex(r'^(\d+)[Kk]$', quality, 'tbr', default=None)
-            if tbr:
-                f.update({
-                    'tbr': int(tbr),
-                    'format_id': '%dp' % height,
-                })
-            else:
-                f['format_id'] = quality
-            formats.append(f)
+                'tbr': tbr,
+            })
         self._sort_formats(formats)
 
         age_limit = self._rta_search(webpage)