X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=325c5ecd4c580018fd168f3812c3aafb885e5c57;hb=7da5556ac2985c246de0f0e4df982585577f44ba;hp=e0c50c402279d9b32c58e7168b3ce6f32f919120;hpb=46bfb422584b46462ab4859f400758e0e7e977ad;p=youtube-dl

diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index e0c50c402..325c5ecd4 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -124,8 +124,8 @@ class InfoExtractor(object):
                 errnote = u'Unable to download webpage'
             raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
 
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
-        """ Returns the data of the page as a string """
+    def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None):
+        """ Returns a tuple (page content as string, URL handle) """
         urlh = self._request_webpage(url_or_request, video_id, note, errnote)
         content_type = urlh.headers.get('Content-Type', '')
         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
@@ -142,7 +142,12 @@ class InfoExtractor(object):
             self.to_screen(u'Dumping request to ' + url)
             dump = base64.b64encode(webpage_bytes).decode('ascii')
             self._downloader.to_screen(dump)
-        return webpage_bytes.decode(encoding, 'replace')
+        content = webpage_bytes.decode(encoding, 'replace')
+        return (content, urlh)
+
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
+        """ Returns the data of the page as a string """
+        return self._download_webpage_handle(url_or_request, video_id, note, errnote)[0]
 
     def to_screen(self, msg):
         """Print msg to screen, prefixing it with '[ie_name]'"""
@@ -206,7 +211,7 @@ class YoutubeIE(InfoExtractor):
                      ([0-9A-Za-z_-]+)                                         # here is it! the YouTube video ID
                      (?(1).+)?                                                # if we found the ID, everything can follow
                      $"""
-    _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
+    _LANG_URL = r'https://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
     _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
     _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
     _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
@@ -422,7 +427,7 @@ class YoutubeIE(InfoExtractor):
 
         # Log in
         login_form_strs = {
-                u'continue': u'http://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
+                u'continue': u'https://www.youtube.com/signin?action_handle_signin=true&feature=sign_in_button&hl=en_US&nomobiletemp=1',
                 u'Email': username,
                 u'GALX': galx,
                 u'Passwd': password,
@@ -482,12 +487,12 @@ class YoutubeIE(InfoExtractor):
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         mobj = re.search(self._NEXT_URL_RE, url)
         if mobj:
-            url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
+            url = 'https://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/')
         video_id = self._extract_id(url)
 
         # Get video webpage
         self.report_video_webpage_download(video_id)
-        url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
+        url = 'https://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
         request = compat_urllib_request.Request(url)
         try:
             video_webpage_bytes = compat_urllib_request.urlopen(request).read()
@@ -779,7 +784,6 @@ class MetacafeIE(InfoExtractor):
             'ext':      video_extension.decode('utf-8'),
         }]
 
-
 class DailymotionIE(InfoExtractor):
     """Information Extractor for Dailymotion"""
 
@@ -2266,16 +2270,14 @@ class ComedyCentralIE(InfoExtractor):
                 epTitle = mobj.group('episode')
 
         self.report_extraction(epTitle)
-        webpage = self._download_webpage(url, epTitle)
+        webpage,htmlHandle = self._download_webpage_handle(url, epTitle)
         if dlNewest:
             url = htmlHandle.geturl()
             mobj = re.match(self._VALID_URL, url, re.VERBOSE)
             if mobj is None:
-                self._downloader.report_error(u'Invalid redirected URL: ' + url)
-                return
+                raise ExtractorError(u'Invalid redirected URL: ' + url)
             if mobj.group('episode') == '':
-                self._downloader.report_error(u'Redirected URL is still not specific: ' + url)
-                return
+                raise ExtractorError(u'Redirected URL is still not specific: ' + url)
             epTitle = mobj.group('episode')
 
         mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage)
@@ -2287,8 +2289,7 @@ class ComedyCentralIE(InfoExtractor):
 
             altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage)
             if len(altMovieParams) == 0:
-                self._downloader.report_error(u'unable to find Flash URL in webpage ' + url)
-                return
+                raise ExtractorError(u'unable to find Flash URL in webpage ' + url)
             else:
                 mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
 
@@ -3486,7 +3487,7 @@ class WorldStarHipHopIE(InfoExtractor):
     IE_NAME = u'WorldStarHipHop'
 
     def _real_extract(self, url):
-        _src_url = r"""(http://(hw-videos|hw-post1).*(?:mp4|flv))"""
+        _src_url = r'so\.addVariable\("file","(.*?)"\)'
 
         m = re.match(self._VALID_URL, url)
         video_id = m.group('id')
@@ -3496,7 +3497,7 @@ class WorldStarHipHopIE(InfoExtractor):
         mobj = re.search(_src_url, webpage_src)
 
         if mobj is not None:
-            video_url = mobj.group()
+            video_url = mobj.group(1)
             if 'mp4' in video_url:
                 ext = 'mp4'
             else:
@@ -3854,6 +3855,7 @@ class TEDIE(InfoExtractor):
                         |
                         ((?P<type_talk>talks)) # We have a simple talk
                    )
+                   (/lang/(.*?))? # The url may contain the language
                    /(?P<name>\w+) # Here goes the name and then ".html"
                    '''
 
@@ -4183,6 +4185,37 @@ class BandcampIE(InfoExtractor):
 
         return [track_info]
 
+class RedTubeIE(InfoExtractor):
+    """Information Extractor for redtube"""
+    _VALID_URL = r'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
+
+    def _real_extract(self,url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+
+        video_id = mobj.group('id')
+        video_extension = 'mp4'        
+        webpage = self._download_webpage(url, video_id)
+        self.report_extraction(video_id)
+        mobj = re.search(r'<source src="'+'(.+)'+'" type="video/mp4">',webpage)
+
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract media URL')
+
+        video_url = mobj.group(1)
+        mobj = re.search('<h1 class="videoTitle slidePanelMovable">(.+)</h1>',webpage)
+        if mobj is None:
+            raise ExtractorError(u'Unable to extract title')
+        video_title = mobj.group(1)
+
+        return [{
+            'id':       video_id,
+            'url':      video_url,
+            'ext':      video_extension,
+            'title':    video_title,
+        }]
+
 
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
@@ -4239,6 +4272,7 @@ def gen_extractors():
         ARDIE(),
         TumblrIE(),
         BandcampIE(),
+        RedTubeIE(),
         GenericIE()
     ]