Switch codebase to use sanitized_Request instead of
[youtube-dl] / youtube_dl / extractor / pluralsight.py
index 16de0fd1c6ce73f28544699024c535d336cc88ee..aa7dbcb63ad50b196cd15b2d0c44772ae3d5149e 100644 (file)
@@ -8,13 +8,13 @@ from .common import InfoExtractor
 from ..compat import (
     compat_str,
     compat_urllib_parse,
-    compat_urllib_request,
     compat_urlparse,
 )
 from ..utils import (
     ExtractorError,
     int_or_none,
     parse_duration,
+    sanitized_Request,
 )
 
 
@@ -73,7 +73,7 @@ class PluralsightIE(PluralsightBaseIE):
         if not post_url.startswith('http'):
             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
 
-        request = compat_urllib_request.Request(
+        request = sanitized_Request(
             post_url, compat_urllib_parse.urlencode(login_form).encode('utf-8'))
         request.add_header('Content-Type', 'application/x-www-form-urlencoded')
 
@@ -104,19 +104,29 @@ class PluralsightIE(PluralsightBaseIE):
 
         webpage = self._download_webpage(url, display_id)
 
-        collection = self._parse_json(
-            self._search_regex(
-                r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
-                webpage, 'modules'),
-            display_id)
+        modules = self._search_regex(
+            r'moduleCollection\s*:\s*new\s+ModuleCollection\((\[.+?\])\s*,\s*\$rootScope\)',
+            webpage, 'modules', default=None)
+
+        if modules:
+            collection = self._parse_json(modules, display_id)
+        else:
+            # Webpage may be served in different layout (see
+            # https://github.com/rg3/youtube-dl/issues/7607)
+            collection = self._parse_json(
+                self._search_regex(
+                    r'var\s+initialState\s*=\s*({.+?});\n', webpage, 'initial state'),
+                display_id)['course']['modules']
 
         module, clip = None, None
 
         for module_ in collection:
-            if module_.get('moduleName') == name:
+            if name in (module_.get('moduleName'), module_.get('name')):
                 module = module_
                 for clip_ in module_.get('clips', []):
                     clip_index = clip_.get('clipIndex')
+                    if clip_index is None:
+                        clip_index = clip_.get('index')
                     if clip_index is None:
                         continue
                     if compat_str(clip_index) == clip_id:
@@ -139,6 +149,9 @@ class PluralsightIE(PluralsightBaseIE):
             AllowedQuality('mp4', ('low', 'medium', 'high',)),
         )
 
+        # In order to minimize the number of calls to ViewClip API and reduce
+        # the probability of being throttled or banned by Pluralsight we will request
+        # only single format until formats listing was explicitly requested.
         if self._downloader.params.get('listformats', False):
             allowed_qualities = ALLOWED_QUALITIES
         else:
@@ -168,7 +181,7 @@ class PluralsightIE(PluralsightBaseIE):
                     'mt': ext,
                     'q': '%dx%d' % (f['width'], f['height']),
                 }
-                request = compat_urllib_request.Request(
+                request = sanitized_Request(
                     '%s/training/Player/ViewClip' % self._API_BASE,
                     json.dumps(clip_post).encode('utf-8'))
                 request.add_header('Content-Type', 'application/json;charset=utf-8')