Merge remote-tracking branch 'joelverhagen/master'

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 447695fda798b39c8c3fdac5bf45fdca36fde007..13b04ab5bcce4ee1e57e46afab0b198f1a477991 100644 (file)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -2291,7 +2291,6 @@ class ComedyCentralIE(InfoExtractor):
                         print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
  
  
-
         def _real_extract(self, url):
                 mobj = re.match(self._VALID_URL, url)
                 if mobj is None:
@@ -2332,10 +2331,19 @@ class ComedyCentralIE(InfoExtractor):
                         epTitle = mobj.group('episode')
  
                 mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
+
                 if len(mMovieParams) == 0:
-                       self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
-                       return
+                       # The Colbert Report embeds the information in a without
+                       # a URL prefix; so extract the alternate reference
+                       # and then add the URL prefix manually.
  
+                       altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
+                       if len(altMovieParams) == 0:
+                               self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
+                               return
+                       else:
+                               mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
+               
                 playerUrl_raw = mMovieParams[0][0]
                 self.report_player_url(epTitle)
                 try:
@@ -2384,33 +2392,30 @@ class ComedyCentralIE(InfoExtractor):
                         if len(turls) == 0:
                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                                 continue
-
-                       # For now, just pick the highest bitrate
                         
-                       print turls
-
                         if self._downloader.params.get('listformats', None):
-                           self._print_formats([i[0] for i in turls])
-                           return
+                               self._print_formats([i[0] for i in turls])
+                               return
+
+                       # For now, just pick the highest bitrate
+                       format,video_url = turls[-1]
  
-                       format,video_url = turls[-1]
+                       # Get the format arg from the arg stream
                         req_format = self._downloader.params.get('format', None)
  
+                       # Select format if we can find one
                         for f,v in turls:
-                           if f == req_format:
-                             format, video_url = f, v
-                             break
-
-                       # Patch to download from alternative CDN, which does not 
-                        # break on current RTMPDump builds
-            
+                               if f == req_format:
+                                       format, video_url = f, v
+                                       break
  
+                       # Patch to download from alternative CDN, which does not
+                       # break on current RTMPDump builds
                         broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
                         better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
-            
+
                         if video_url.startswith(broken_cdn):
-                            video_url = video_url.replace(broken_cdn, better_cdn)
-                    
+                               video_url = video_url.replace(broken_cdn, better_cdn)
  
                         effTitle = showId + u'-' + epTitle
                         info = {