[udemy] Improve course enrolling
[youtube-dl] / youtube_dl / extractor / udemy.py
index e19c1f762b4733099e4c837a068a31237829b723..a5634ece908b60fd862bce7b89d4cd3930276c0d 100644 (file)
@@ -5,6 +5,7 @@ from ..compat import (
     compat_HTTPError,
     compat_urllib_parse,
     compat_urllib_request,
+    compat_urlparse,
 )
 from ..utils import (
     ExtractorError,
@@ -35,19 +36,21 @@ class UdemyIE(InfoExtractor):
         'skip': 'Requires udemy account credentials',
     }]
 
-    def _enroll_course(self, webpage, course_id):
+    def _enroll_course(self, base_url, webpage, course_id):
         checkout_url = unescapeHTML(self._search_regex(
             r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/payment/checkout/.+?)\1',
             webpage, 'checkout url', group='url', default=None))
         if checkout_url:
             raise ExtractorError(
-                'Course %s is not free. You have to pay for it before you can download.'
+                'Course %s is not free. You have to pay for it before you can download. '
                 'Use this URL to confirm purchase: %s' % (course_id, checkout_url), expected=True)
 
         enroll_url = unescapeHTML(self._search_regex(
-            r'href=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/course/subscribe/.+?)\1',
+            r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
             webpage, 'enroll url', group='url', default=None))
         if enroll_url:
+            if not enroll_url.startswith('http'):
+                enroll_url = compat_urlparse.urljoin(base_url, enroll_url)
             webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course')
             if '>You have enrolled in' in webpage:
                 self.to_screen('%s: Successfully enrolled in the course' % course_id)
@@ -144,14 +147,15 @@ class UdemyIE(InfoExtractor):
         webpage = self._download_webpage(url, lecture_id)
 
         course_id = self._search_regex(
-            r'data-course-id=["\'](\d+)', webpage, 'course id')
+            (r'data-course-id=["\'](\d+)', r'&quot;id&quot;\s*:\s*(\d+)'),
+            webpage, 'course id')
 
         try:
             lecture = self._download_lecture(course_id, lecture_id)
         except ExtractorError as e:
             # Error could possibly mean we are not enrolled in the course
             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                self._enroll_course(webpage, course_id)
+                self._enroll_course(url, webpage, course_id)
                 lecture = self._download_lecture(course_id, lecture_id)
             else:
                 raise
@@ -243,7 +247,7 @@ class UdemyCourseIE(UdemyIE):
         course_id = response['id']
         course_title = response.get('title')
 
-        self._enroll_course(webpage, course_id)
+        self._enroll_course(url, webpage, course_id)
 
         response = self._download_json(
             'https://www.udemy.com/api-1.1/courses/%s/curriculum' % course_id,