[udemy] Improve course id extraction (closes #14938)
authorSergey M․ <dstftw@gmail.com>
Sat, 9 Dec 2017 12:52:31 +0000 (19:52 +0700)
committerSergey M․ <dstftw@gmail.com>
Sat, 9 Dec 2017 13:02:49 +0000 (20:02 +0700)
youtube_dl/extractor/udemy.py

index c248ea72784cf58b43c74725315ac00aa6c61bf5..b66033923c6c9d3550352d06f4608e31048b19d6 100644 (file)
@@ -62,11 +62,11 @@ class UdemyIE(InfoExtractor):
     def _extract_course_info(self, webpage, video_id):
         course = self._parse_json(
             unescapeHTML(self._search_regex(
-                r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
+                r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
+                webpage, 'course', default='{}')),
             video_id, fatal=False) or {}
         course_id = course.get('id') or self._search_regex(
-            (r'&quot;id&quot;\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
-            webpage, 'course id')
+            r'data-course-id=["\'](\d+)', webpage, 'course id')
         return course_id, course.get('title')
 
     def _enroll_course(self, base_url, webpage, course_id):