Unify coding cookie
[youtube-dl] / youtube_dl / extractor / safari.py
index 6ba91f202baadbfd72160cc739efde868a60d421..8b35fd244addc68bb99b345c949284bc5f170361 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
@@ -75,7 +75,7 @@ class SafariBaseIE(InfoExtractor):
 class SafariIE(SafariBaseIE):
     IE_NAME = 'safari'
     IE_DESC = 'safaribooksonline.com online video'
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html'
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
@@ -92,6 +92,9 @@ class SafariIE(SafariBaseIE):
         # non-digits in course id
         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -100,13 +103,13 @@ class SafariIE(SafariBaseIE):
 
         webpage = self._download_webpage(url, video_id)
         reference_id = self._search_regex(
-            r'data-reference-id=(["\'])(?P<id>.+?)\1',
+            r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura reference id', group='id')
         partner_id = self._search_regex(
-            r'data-partner-id=(["\'])(?P<id>.+?)\1',
+            r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura widget id', group='id')
         ui_id = self._search_regex(
-            r'data-ui-id=(["\'])(?P<id>.+?)\1',
+            r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura uiconf id', group='id')
 
         query = {
@@ -132,12 +135,15 @@ class SafariIE(SafariBaseIE):
 
 class SafariApiIE(SafariBaseIE):
     IE_NAME = 'safari:api'
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
         'only_matching': True,
-    }
+    }, {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)