[pluralsight] Improve login detection
[youtube-dl] / youtube_dl / extractor / pluralsight.py
index fd32836ccaf61b171f9dcafbbfce24c7764b18c1..7693282a59ababcf861695fed1ba06d7a7271fab 100644 (file)
@@ -1,6 +1,5 @@
 from __future__ import unicode_literals
 
-import re
 import json
 
 from .common import InfoExtractor
@@ -17,13 +16,18 @@ from ..utils import (
 )
 
 
-class PluralsightIE(InfoExtractor):
+class PluralsightBaseIE(InfoExtractor):
+    _API_BASE = 'http://app.pluralsight.com'
+
+
+class PluralsightIE(PluralsightBaseIE):
     IE_NAME = 'pluralsight'
-    _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/training/player\?author=(?P<author>[^&]+)&name=(?P<name>[^&]+)(?:&mode=live)?&clip=(?P<clip>\d+)&course=(?P<course>[^&]+)'
-    _LOGIN_URL = 'https://www.pluralsight.com/id/'
+    _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?'
+    _LOGIN_URL = 'https://app.pluralsight.com/id/'
+
     _NETRC_MACHINE = 'pluralsight'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas',
         'md5': '4d458cf5cf4c593788672419a8dd4cf8',
         'info_dict': {
@@ -33,7 +37,14 @@ class PluralsightIE(InfoExtractor):
             'duration': 338,
         },
         'skip': 'Requires pluralsight account credentials',
-    }
+    }, {
+        'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live',
+        'only_matching': True,
+    }, {
+        # available without pluralsight account
+        'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started',
+        'only_matching': True,
+    }]
 
     def _real_initialize(self):
         self._login()
@@ -41,7 +52,7 @@ class PluralsightIE(InfoExtractor):
     def _login(self):
         (username, password) = self._get_login_info()
         if username is None:
-            self.raise_login_required('Pluralsight account is required')
+            return
 
         login_page = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login page')
@@ -73,12 +84,19 @@ class PluralsightIE(InfoExtractor):
         if error:
             raise ExtractorError('Unable to login: %s' % error, expected=True)
 
+        if all(p not in response for p in ('__INITIAL_STATE__', '"currentUser"')):
+            raise ExtractorError('Unable to log in')
+
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        author = mobj.group('author')
-        name = mobj.group('name')
-        clip_id = mobj.group('clip')
-        course = mobj.group('course')
+        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+
+        author = qs.get('author', [None])[0]
+        name = qs.get('name', [None])[0]
+        clip_id = qs.get('clip', [None])[0]
+        course = qs.get('course', [None])[0]
+
+        if any(not f for f in (author, name, clip_id, course,)):
+            raise ExtractorError('Invalid URL', expected=True)
 
         display_id = '%s-%s' % (name, clip_id)
 
@@ -132,7 +150,7 @@ class PluralsightIE(InfoExtractor):
                     'q': '%dx%d' % (f['width'], f['height']),
                 }
                 request = compat_urllib_request.Request(
-                    'http://www.pluralsight.com/training/Player/ViewClip',
+                    '%s/training/Player/ViewClip' % self._API_BASE,
                     json.dumps(clip_post).encode('utf-8'))
                 request.add_header('Content-Type', 'application/json;charset=utf-8')
                 format_id = '%s-%s' % (ext, quality)
@@ -163,10 +181,10 @@ class PluralsightIE(InfoExtractor):
         }
 
 
-class PluralsightCourseIE(InfoExtractor):
+class PluralsightCourseIE(PluralsightBaseIE):
     IE_NAME = 'pluralsight:course'
-    _VALID_URL = r'https?://(?:www\.)?pluralsight\.com/courses/(?P<id>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)'
+    _TESTS = [{
         # Free course from Pluralsight Starter Subscription for Microsoft TechNet
         # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz
         'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas',
@@ -176,7 +194,14 @@ class PluralsightCourseIE(InfoExtractor):
             'description': 'md5:61b37e60f21c4b2f91dc621a977d0986',
         },
         'playlist_count': 31,
-    }
+    }, {
+        # available without pluralsight account
+        'url': 'https://www.pluralsight.com/courses/angularjs-get-started',
+        'only_matching': True,
+    }, {
+        'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         course_id = self._match_id(url)
@@ -184,14 +209,14 @@ class PluralsightCourseIE(InfoExtractor):
         # TODO: PSM cookie
 
         course = self._download_json(
-            'http://www.pluralsight.com/data/course/%s' % course_id,
+            '%s/data/course/%s' % (self._API_BASE, course_id),
             course_id, 'Downloading course JSON')
 
         title = course['title']
         description = course.get('description') or course.get('shortDescription')
 
         course_data = self._download_json(
-            'http://www.pluralsight.com/data/course/content/%s' % course_id,
+            '%s/data/course/content/%s' % (self._API_BASE, course_id),
             course_id, 'Downloading course data JSON')
 
         entries = []
@@ -201,7 +226,7 @@ class PluralsightCourseIE(InfoExtractor):
                 if not player_parameters:
                     continue
                 entries.append(self.url_result(
-                    'http://www.pluralsight.com/training/player?%s' % player_parameters,
+                    '%s/training/player?%s' % (self._API_BASE, player_parameters),
                     'Pluralsight'))
 
         return self.playlist_result(entries, course_id, title, description)