Merge remote-tracking branch 'joelverhagen/master'
[youtube-dl] / youtube_dl / InfoExtractors.py
index 35ba6cc5c9d59752621178f568473f49a7357156..13b04ab5bcce4ee1e57e46afab0b198f1a477991 100644 (file)
@@ -2253,6 +2253,25 @@ class ComedyCentralIE(InfoExtractor):
        _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
        IE_NAME = u'comedycentral'
 
+       _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
+
+       _video_extensions = {
+               '3500': 'mp4',
+               '2200': 'mp4',
+               '1700': 'mp4',
+               '1200': 'mp4',
+               '750': 'mp4',
+               '400': 'mp4',
+       }
+       _video_dimensions = {
+               '3500': '1280x720',
+               '2200': '960x540',
+               '1700': '768x432',
+               '1200': '640x360',
+               '750': '512x288',
+               '400': '384x216',
+       }
+
        def report_extraction(self, episode_id):
                self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
 
@@ -2265,6 +2284,13 @@ class ComedyCentralIE(InfoExtractor):
        def report_player_url(self, episode_id):
                self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
 
+
+       def _print_formats(self, formats):
+               print('Available formats:')
+               for x in formats:
+                       print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
+
+
        def _real_extract(self, url):
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
@@ -2305,10 +2331,19 @@ class ComedyCentralIE(InfoExtractor):
                        epTitle = mobj.group('episode')
 
                mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
+
                if len(mMovieParams) == 0:
-                       self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
-                       return
+                       # The Colbert Report embeds the information in a without
+                       # a URL prefix; so extract the alternate reference
+                       # and then add the URL prefix manually.
 
+                       altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
+                       if len(altMovieParams) == 0:
+                               self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
+                               return
+                       else:
+                               mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
+               
                playerUrl_raw = mMovieParams[0][0]
                self.report_player_url(epTitle)
                try:
@@ -2357,10 +2392,31 @@ class ComedyCentralIE(InfoExtractor):
                        if len(turls) == 0:
                                self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                                continue
+                       
+                       if self._downloader.params.get('listformats', None):
+                               self._print_formats([i[0] for i in turls])
+                               return
 
                        # For now, just pick the highest bitrate
                        format,video_url = turls[-1]
 
+                       # Get the format arg from the arg stream
+                       req_format = self._downloader.params.get('format', None)
+
+                       # Select format if we can find one
+                       for f,v in turls:
+                               if f == req_format:
+                                       format, video_url = f, v
+                                       break
+
+                       # Patch to download from alternative CDN, which does not
+                       # break on current RTMPDump builds
+                       broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
+                       better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
+
+                       if video_url.startswith(broken_cdn):
+                               video_url = video_url.replace(broken_cdn, better_cdn)
+
                        effTitle = showId + u'-' + epTitle
                        info = {
                                'id': shortMediaId,
@@ -2372,7 +2428,7 @@ class ComedyCentralIE(InfoExtractor):
                                'format': format,
                                'thumbnail': None,
                                'description': officialTitle,
-                               'player_url': playerUrl
+                               'player_url': None #playerUrl
                        }
 
                        results.append(info)