Merge pull request #2272 from dstftw/master
authorJaime Marquínez Ferrándiz <jaimeMF@users.noreply.github.com>
Wed, 29 Jan 2014 22:58:14 +0000 (14:58 -0800)
committerJaime Marquínez Ferrándiz <jaimeMF@users.noreply.github.com>
Wed, 29 Jan 2014 22:58:14 +0000 (14:58 -0800)
Improve some regexes

youtube_dl/extractor/comedycentral.py
youtube_dl/extractor/generic.py

index 3333d433bc42f89c0dc8dc7ff264010ef5bda9a6..ed3986f313a149f0db4a69dc92762730297ced1a 100644 (file)
@@ -14,7 +14,7 @@ from ..utils import (
 
 
 class ComedyCentralIE(MTVServicesInfoExtractor):
-    _VALID_URL = r'''(?x)https?://(?:www.)?comedycentral.com/
+    _VALID_URL = r'''(?x)https?://(?:www\.)?comedycentral\.com/
         (video-clips|episodes|cc-studios|video-collections)
         /(?P<title>.*)'''
     _FEED_URL = 'http://comedycentral.com/feeds/mrss/'
@@ -86,7 +86,7 @@ class ComedyCentralShowsIE(InfoExtractor):
 
     @staticmethod
     def _transform_rtmp_url(rtmp_video_url):
-        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url)
+        m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp\.comedystor/.*)$', rtmp_video_url)
         if not m:
             raise ExtractorError('Cannot transform RTMP url')
         base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/'
index 48de379b723741ed826e19bc1d4db308cf7bed8e..e0d51996ded5f1604bebe5d0c03c1dbc815f31ce 100644 (file)
@@ -254,7 +254,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded (iframe) Vimeo player
         mobj = re.search(
-            r'<iframe[^>]+?src="((?:https?:)?//player.vimeo.com/video/.+?)"', webpage)
+            r'<iframe[^>]+?src="((?:https?:)?//player\.vimeo\.com/video/.+?)"', webpage)
         if mobj:
             player_url = unescapeHTML(mobj.group(1))
             surl = smuggle_url(player_url, {'Referer': url})
@@ -262,7 +262,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded (swf embed) Vimeo player
         mobj = re.search(
-            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo.com/moogaloop.swf.+?)"', webpage)
+            r'<embed[^>]+?src="(https?://(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
         if mobj:
             return self.url_result(mobj.group(1), 'Vimeo')
 
@@ -332,7 +332,7 @@ class GenericIE(InfoExtractor):
             return self.url_result(mobj.group(1), 'Aparat')
 
         # Look for MPORA videos
-        mobj = re.search(r'<iframe .*?src="(http://mpora\.com/videos/[^"]+)"', webpage)
+        mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
         if mobj is not None:
             return self.url_result(mobj.group(1), 'Mpora')
 
@@ -350,7 +350,7 @@ class GenericIE(InfoExtractor):
 
         # Look for embedded Huffington Post player
         mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live.huffingtonpost\.com/.+?)\1', webpage)
+            r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
         if mobj is not None:
             return self.url_result(mobj.group('url'), 'HuffPost')