info_dict['upload_date'] is documented in --output, IEs MUST specify it
[youtube-dl] / youtube_dl / InfoExtractors.py
index d2d8d7b778107e0321bd7b684bc89ec19a88de0b..8ea03ea0105792069d7bee50e525f079bb958a5b 100644 (file)
@@ -29,33 +29,35 @@ class InfoExtractor(object):
        """Information Extractor class.
 
        Information extractors are the classes that, given a URL, extract
-       information from the video (or videos) the URL refers to. This
-       information includes the real video URL, the video title and simplified
-       title, author and others. The information is stored in a dictionary
-       which is then passed to the FileDownloader. The FileDownloader
-       processes this information possibly downloading the video to the file
-       system, among other possible outcomes. The dictionaries must include
-       the following fields:
-
-       id:             Video identifier.
-       url:            Final video URL.
-       uploader:       Nickname of the video uploader.
-       title:          Literal title.
-       ext:            Video filename extension.
-       format:         Video format.
-       player_url:     SWF Player URL (may be None).
-
-       The following fields are optional. Their primary purpose is to allow
-       youtube-dl to serve as the backend for a video search function, such
-       as the one in youtube2mp3.  They are only used when their respective
-       forced printing functions are called:
-
-       thumbnail:      Full URL to a video thumbnail image.
-       description:    One-line video description.
+       information about the video (or videos) the URL refers to. This
+       information includes the real video URL, the video title, author and
+       others. The information is stored in a dictionary which is then 
+       passed to the FileDownloader. The FileDownloader processes this
+       information possibly downloading the video to the file system, among
+       other possible outcomes.
+
+       The dictionaries must include the following fields:
+
+       id:             Video identifier.
+       url:            Final video URL.
+       uploader:       Nickname of the video uploader.
+       upload_date:    Video upload date (YYYYMMDD).
+       title:          Video title, unescaped.
+       ext:            Video filename extension.
+
+       The following fields are optional:
+
+       format:         The video format, defaults to ext (used for --get-format)
+       thumbnail:      Full URL to a video thumbnail image.
+       description:    One-line video description.
+       player_url:     SWF Player URL (used for rtmpdump).
 
        Subclasses of this one should re-define the _real_initialize() and
        _real_extract() methods and define a _VALID_URL regexp.
        Probably, they should also be added to the list of extractors.
+
+       _real_extract() must return a *list* of information dictionaries as
+       described above.
        """
 
        _ready = False
@@ -213,9 +215,9 @@ class YoutubeIE(InfoExtractor):
                return srt
 
        def _print_formats(self, formats):
-               print 'Available formats:'
+               print('Available formats:')
                for x in formats:
-                       print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
+                       print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')))
 
        def _real_initialize(self):
                if self._downloader is None:
@@ -238,7 +240,7 @@ class YoutubeIE(InfoExtractor):
                                else:
                                        raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
                        except (IOError, netrc.NetrcParseError), err:
-                               self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+                               self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
                                return
 
                # Set language
@@ -247,7 +249,7 @@ class YoutubeIE(InfoExtractor):
                        self.report_lang()
                        urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
+                       self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
                        return
 
                # No authentication to be performed
@@ -270,7 +272,7 @@ class YoutubeIE(InfoExtractor):
                                self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
                                return
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+                       self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
                        return
 
                # Confirm age
@@ -283,7 +285,7 @@ class YoutubeIE(InfoExtractor):
                        self.report_age_confirmation()
                        age_results = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
                        return
 
        def _real_extract(self, url):
@@ -305,7 +307,7 @@ class YoutubeIE(InfoExtractor):
                try:
                        video_webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                        return
 
                # Attempt to extract SWF player URL
@@ -327,7 +329,7 @@ class YoutubeIE(InfoExtractor):
                                if 'token' in video_info:
                                        break
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
                                return
                if 'token' not in video_info:
                        if 'reason' in video_info:
@@ -390,7 +392,7 @@ class YoutubeIE(InfoExtractor):
                                try:
                                        srt_list = urllib2.urlopen(request).read()
                                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                                       raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+                                       raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
                                srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
                                srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
                                if not srt_lang_list:
@@ -407,7 +409,7 @@ class YoutubeIE(InfoExtractor):
                                try:
                                        srt_xml = urllib2.urlopen(request).read()
                                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                                       raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err))
+                                       raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
                                if not srt_xml:
                                        raise Trouble(u'WARNING: unable to download video subtitles')
                                video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
@@ -475,6 +477,9 @@ class YoutubeIE(InfoExtractor):
                        # Extension
                        video_extension = self._video_extensions.get(format_param, 'flv')
 
+                       video_format = '{} - {}'.format(format_param.decode('utf-8') if format_param else video_extension.decode('utf-8'),
+                                                           self._video_dimensions.get(format_param, '???'))
+
                        results.append({
                                'id':           video_id.decode('utf-8'),
                                'url':          video_real_url.decode('utf-8'),
@@ -482,7 +487,7 @@ class YoutubeIE(InfoExtractor):
                                'upload_date':  upload_date,
                                'title':        video_title,
                                'ext':          video_extension.decode('utf-8'),
-                               'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
+                               'format':       video_format,
                                'thumbnail':    video_thumbnail.decode('utf-8'),
                                'description':  video_description,
                                'player_url':   player_url,
@@ -526,7 +531,7 @@ class MetacafeIE(InfoExtractor):
                        self.report_disclaimer()
                        disclaimer = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err))
                        return
 
                # Confirm age
@@ -539,7 +544,7 @@ class MetacafeIE(InfoExtractor):
                        self.report_age_confirmation()
                        disclaimer = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
                        return
 
        def _real_extract(self, url):
@@ -563,7 +568,7 @@ class MetacafeIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
                        return
 
                # Extract URL, uploader and title from webpage
@@ -616,8 +621,6 @@ class MetacafeIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
-                       'player_url':   None,
                }]
 
 
@@ -656,7 +659,7 @@ class DailymotionIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
                        return
 
                # Extract URL, uploader and title from webpage
@@ -715,8 +718,6 @@ class DailymotionIE(InfoExtractor):
                        'upload_date':  video_upload_date,
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
-                       'player_url':   None,
                }]
 
 
@@ -754,7 +755,7 @@ class GoogleIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
 
                # Extract URL, uploader, and title from webpage
@@ -793,7 +794,7 @@ class GoogleIE(InfoExtractor):
                        try:
                                webpage = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                                return
                        mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
                        if mobj is None:
@@ -810,8 +811,6 @@ class GoogleIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
-                       'player_url':   None,
                }]
 
 
@@ -849,7 +848,7 @@ class PhotobucketIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
 
                # Extract URL, uploader, and title from webpage
@@ -877,8 +876,6 @@ class PhotobucketIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
-                       'player_url':   None,
                }]
 
 
@@ -919,7 +916,7 @@ class YahooIE(InfoExtractor):
                        try:
                                webpage = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                                return
 
                        mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
@@ -943,7 +940,7 @@ class YahooIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
 
                # Extract uploader and title from webpage
@@ -1001,7 +998,7 @@ class YahooIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
 
                # Extract media URL from playlist XML
@@ -1022,7 +1019,6 @@ class YahooIE(InfoExtractor):
                        'thumbnail':    video_thumbnail.decode('utf-8'),
                        'description':  video_description,
                        'thumbnail':    video_thumbnail,
-                       'player_url':   None,
                }]
 
 
@@ -1059,7 +1055,7 @@ class VimeoIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
 
                # Now we begin extracting as much information as we can from what we
@@ -1136,7 +1132,6 @@ class VimeoIE(InfoExtractor):
                        'ext':          video_extension,
                        'thumbnail':    video_thumbnail,
                        'description':  video_description,
-                       'player_url':   None,
                }]
 
 
@@ -1225,7 +1220,7 @@ class GenericIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
                except ValueError, err:
                        # since this is the last-resort InfoExtractor, if
@@ -1282,8 +1277,6 @@ class GenericIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
-                       'player_url':   None,
                }]
 
 
@@ -1346,7 +1339,7 @@ class YoutubeSearchIE(InfoExtractor):
                        try:
                                data = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
                                return
                        api_response = json.loads(data)['data']
 
@@ -1423,7 +1416,7 @@ class GoogleSearchIE(InfoExtractor):
                        try:
                                page = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                return
 
                        # Extract video identifiers
@@ -1506,7 +1499,7 @@ class YahooSearchIE(InfoExtractor):
                        try:
                                page = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                return
 
                        # Extract video identifiers
@@ -1576,7 +1569,7 @@ class YoutubePlaylistIE(InfoExtractor):
                        try:
                                page = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                return
 
                        # Extract video identifiers
@@ -1633,7 +1626,7 @@ class YoutubeChannelIE(InfoExtractor):
                        try:
                                page = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                return
 
                        # Extract video identifiers
@@ -1696,7 +1689,7 @@ class YoutubeUserIE(InfoExtractor):
                        try:
                                page = urllib2.urlopen(request).read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                                return
 
                        # Extract video identifiers
@@ -1768,7 +1761,7 @@ class BlipTVUserIE(InfoExtractor):
                        mobj = re.search(r'data-users-id="([^"]+)"', page)
                        page_base = page_base % mobj.group(1)
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                        return
 
 
@@ -1856,7 +1849,7 @@ class DepositFilesIE(InfoExtractor):
                        self.report_download_webpage(file_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err))
                        return
 
                # Search for the real file URL
@@ -1888,8 +1881,6 @@ class DepositFilesIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        file_title,
                        'ext':          file_extension.decode('utf-8'),
-                       'format':       u'NA',
-                       'player_url':   None,
                }]
 
 
@@ -1973,7 +1964,7 @@ class FacebookIE(InfoExtractor):
                                else:
                                        raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
                        except (IOError, netrc.NetrcParseError), err:
-                               self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+                               self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
                                return
 
                if useremail is None:
@@ -1993,7 +1984,7 @@ class FacebookIE(InfoExtractor):
                                self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                                return
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+                       self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
                        return
 
        def _real_extract(self, url):
@@ -2010,7 +2001,7 @@ class FacebookIE(InfoExtractor):
                        page = urllib2.urlopen(request)
                        video_webpage = page.read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                        return
 
                # Start extracting information
@@ -2095,7 +2086,6 @@ class FacebookIE(InfoExtractor):
                                'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
                                'thumbnail':    video_thumbnail.decode('utf-8'),
                                'description':  video_description.decode('utf-8'),
-                               'player_url':   None,
                        })
                return results
 
@@ -2144,13 +2134,13 @@ class BlipTVIE(InfoExtractor):
                                        'urlhandle': urlh
                                }
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
                        return
                if info is None: # Regular URL
                        try:
                                json_code = urlh.read()
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
+                               self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err))
                                return
 
                        try:
@@ -2218,7 +2208,7 @@ class MyVideoIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
 
                self.report_extraction(video_id)
@@ -2243,8 +2233,6 @@ class MyVideoIE(InfoExtractor):
                        'upload_date':  u'NA',
                        'title':        video_title,
                        'ext':          u'flv',
-                       'format':       u'NA',
-                       'player_url':   None,
                }]
 
 class ComedyCentralIE(InfoExtractor):
@@ -2253,6 +2241,25 @@ class ComedyCentralIE(InfoExtractor):
        _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
        IE_NAME = u'comedycentral'
 
+       _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
+
+       _video_extensions = {
+               '3500': 'mp4',
+               '2200': 'mp4',
+               '1700': 'mp4',
+               '1200': 'mp4',
+               '750': 'mp4',
+               '400': 'mp4',
+       }
+       _video_dimensions = {
+               '3500': '1280x720',
+               '2200': '960x540',
+               '1700': '768x432',
+               '1200': '640x360',
+               '750': '512x288',
+               '400': '384x216',
+       }
+
        def report_extraction(self, episode_id):
                self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
 
@@ -2265,6 +2272,13 @@ class ComedyCentralIE(InfoExtractor):
        def report_player_url(self, episode_id):
                self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
 
+
+       def _print_formats(self, formats):
+               print('Available formats:')
+               for x in formats:
+                       print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???')))
+
+
        def _real_extract(self, url):
                mobj = re.match(self._VALID_URL, url)
                if mobj is None:
@@ -2305,10 +2319,19 @@ class ComedyCentralIE(InfoExtractor):
                        epTitle = mobj.group('episode')
 
                mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
+
                if len(mMovieParams) == 0:
-                       self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
-                       return
+                       # The Colbert Report embeds the information in a without
+                       # a URL prefix; so extract the alternate reference
+                       # and then add the URL prefix manually.
 
+                       altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html)
+                       if len(altMovieParams) == 0:
+                               self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
+                               return
+                       else:
+                               mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
+               
                playerUrl_raw = mMovieParams[0][0]
                self.report_player_url(epTitle)
                try:
@@ -2357,10 +2380,31 @@ class ComedyCentralIE(InfoExtractor):
                        if len(turls) == 0:
                                self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                                continue
+                       
+                       if self._downloader.params.get('listformats', None):
+                               self._print_formats([i[0] for i in turls])
+                               return
 
                        # For now, just pick the highest bitrate
                        format,video_url = turls[-1]
 
+                       # Get the format arg from the arg stream
+                       req_format = self._downloader.params.get('format', None)
+
+                       # Select format if we can find one
+                       for f,v in turls:
+                               if f == req_format:
+                                       format, video_url = f, v
+                                       break
+
+                       # Patch to download from alternative CDN, which does not
+                       # break on current RTMPDump builds
+                       broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"
+                       better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/"
+
+                       if video_url.startswith(broken_cdn):
+                               video_url = video_url.replace(broken_cdn, better_cdn)
+
                        effTitle = showId + u'-' + epTitle
                        info = {
                                'id': shortMediaId,
@@ -2372,7 +2416,7 @@ class ComedyCentralIE(InfoExtractor):
                                'format': format,
                                'thumbnail': None,
                                'description': officialTitle,
-                               'player_url': playerUrl
+                               'player_url': None #playerUrl
                        }
 
                        results.append(info)
@@ -2445,7 +2489,6 @@ class EscapistIE(InfoExtractor):
                        'upload_date': None,
                        'title': showName,
                        'ext': 'flv',
-                       'format': 'flv',
                        'thumbnail': imgUrl,
                        'description': description,
                        'player_url': playerUrl,
@@ -2480,7 +2523,7 @@ class CollegeHumorIE(InfoExtractor):
                try:
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                        return
 
                m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
@@ -2499,7 +2542,7 @@ class CollegeHumorIE(InfoExtractor):
                try:
                        metaXml = urllib2.urlopen(xmlUrl).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err))
                        return
 
                mdoc = xml.etree.ElementTree.fromstring(metaXml)
@@ -2510,7 +2553,6 @@ class CollegeHumorIE(InfoExtractor):
                        info['url'] = videoNode.findall('./file')[0].text
                        info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
                        info['ext'] = info['url'].rpartition('.')[2]
-                       info['format'] = info['ext']
                except IndexError:
                        self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
                        return
@@ -2545,7 +2587,7 @@ class XVideosIE(InfoExtractor):
                try:
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                        return
 
                self.report_extraction(video_id)
@@ -2581,10 +2623,8 @@ class XVideosIE(InfoExtractor):
                        'upload_date': None,
                        'title': video_title,
                        'ext': 'flv',
-                       'format': 'flv',
                        'thumbnail': video_thumbnail,
                        'description': None,
-                       'player_url': None,
                }
 
                return [info]
@@ -2631,7 +2671,7 @@ class SoundcloudIE(InfoExtractor):
                try:
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                        return
 
                self.report_extraction('%s/%s' % (uploader, slug_title))
@@ -2658,7 +2698,7 @@ class SoundcloudIE(InfoExtractor):
                mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
                if mobj:
                        description = mobj.group(1)
-               
+
                # upload date
                upload_date = None
                mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
@@ -2666,7 +2706,7 @@ class SoundcloudIE(InfoExtractor):
                        try:
                                upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
                        except Exception, e:
-                               self._downloader.to_stderr(str(e))
+                               self._downloader.to_stderr(compat_str(e))
 
                # for soundcloud, a request to a cross domain is required for cookies
                request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
@@ -2678,8 +2718,6 @@ class SoundcloudIE(InfoExtractor):
                        'upload_date':  upload_date,
                        'title':        title,
                        'ext':          u'mp3',
-                       'format':       u'NA',
-                       'player_url':   None,
                        'description': description.decode('utf-8')
                }]
 
@@ -2710,7 +2748,7 @@ class InfoQIE(InfoExtractor):
                try:
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                        return
 
                self.report_extraction(url)
@@ -2746,11 +2784,9 @@ class InfoQIE(InfoExtractor):
                        'uploader': None,
                        'upload_date': None,
                        'title': video_title,
-                       'ext': extension,
-                       'format': extension, # Extension is always(?) mp4, but seems to be flv
+                       'ext': extension, # Extension is always(?) mp4, but seems to be flv
                        'thumbnail': None,
                        'description': video_description,
-                       'player_url': None,
                }
 
                return [info]
@@ -2796,15 +2832,15 @@ class MixcloudIE(InfoExtractor):
                return None
 
        def _print_formats(self, formats):
-               print 'Available formats:'
+               print('Available formats:')
                for fmt in formats.keys():
                        for b in formats[fmt]:
                                try:
                                        ext = formats[fmt][b][0]
-                                       print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
+                                       print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]))
                                except TypeError: # we have no bitrate info
                                        ext = formats[fmt][0]
-                                       print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
+                                       print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]))
                                        break
 
        def _real_extract(self, url):
@@ -2824,7 +2860,7 @@ class MixcloudIE(InfoExtractor):
                        self.report_download_json(file_url)
                        jsonData = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err))
                        return
 
                # parse JSON
@@ -2911,7 +2947,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
                                self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
                                return
                        info['ext'] = info['url'].rpartition('.')[2]
-                       info['format'] = info['ext']
                        return [info]
                elif mobj.group('course'): # A course page
                        course = mobj.group('course')
@@ -3008,7 +3043,7 @@ class MTVIE(InfoExtractor):
                try:
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
                        return
 
                mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
@@ -3041,7 +3076,7 @@ class MTVIE(InfoExtractor):
                try:
                        metadataXml = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err))
                        return
 
                mdoc = xml.etree.ElementTree.fromstring(metadataXml)
@@ -3128,7 +3163,7 @@ class YoukuIE(InfoExtractor):
                        self.report_download_webpage(video_id)
                        jsondata = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error) as err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
 
                self.report_extraction(video_id)
@@ -3185,7 +3220,6 @@ class YoukuIE(InfoExtractor):
                                'uploader': None,
                                'title': video_title,
                                'ext': ext,
-                               'format': u'NA'
                        }
                        files_info.append(info)
 
@@ -3243,18 +3277,16 @@ class XNXXIE(InfoExtractor):
                        return
                video_thumbnail = result.group(1).decode('utf-8')
 
-               info = {'id': video_id,
-                               'url': video_url,
-                               'uploader': None,
-                               'upload_date': None,
-                               'title': video_title,
-                               'ext': 'flv',
-                               'format': 'flv',
-                               'thumbnail': video_thumbnail,
-                               'description': None,
-                               'player_url': None}
-
-               return [info]
+               return [{
+                       'id': video_id,
+                       'url': video_url,
+                       'uploader': None,
+                       'upload_date': None,
+                       'title': video_title,
+                       'ext': 'flv',
+                       'thumbnail': video_thumbnail,
+                       'description': None,
+               }]
 
 
 class GooglePlusIE(InfoExtractor):
@@ -3304,7 +3336,7 @@ class GooglePlusIE(InfoExtractor):
                try:
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
                        return
 
                # Extract update date
@@ -3346,7 +3378,7 @@ class GooglePlusIE(InfoExtractor):
                try:
                        webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
                        return
                self.report_extract_vid_page(video_page)
 
@@ -3376,6 +3408,4 @@ class GooglePlusIE(InfoExtractor):
                        'upload_date':  upload_date.decode('utf-8'),
                        'title':        video_title.decode('utf-8'),
                        'ext':          video_extension.decode('utf-8'),
-                       'format':       u'NA',
-                       'player_url':   None,
                }]