X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=697c031c5119176874558aef23eadb163e45fcdd;hb=f0648fc18c91fde0c2db2e52c1cf78fe6bf3bbc4;hp=c5ab8907b34f1778c0e686fe5788f6e61783c10d;hpb=0b40544f290de329679aebf06e98056e707dd7e1;p=youtube-dl

diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index c5ab8907b..697c031c5 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -23,7 +23,7 @@ class InfoExtractor(object):
     Information extractors are the classes that, given a URL, extract
     information about the video (or videos) the URL refers to. This
     information includes the real video URL, the video title, author and
-    others. The information is stored in a dictionary which is then 
+    others. The information is stored in a dictionary which is then
     passed to the FileDownloader. The FileDownloader processes this
     information possibly downloading the video to the file system, among
     other possible outcomes.
@@ -32,7 +32,7 @@ class InfoExtractor(object):
 
     id:             Video identifier.
     url:            Final video URL.
-    uploader:       Nickname of the video uploader, unescaped.
+    uploader:       Full name of the video uploader, unescaped.
     upload_date:    Video upload date (YYYYMMDD).
     title:          Video title, unescaped.
     ext:            Video filename extension.
@@ -42,6 +42,7 @@ class InfoExtractor(object):
     format:         The video format, defaults to ext (used for --get-format)
     thumbnail:      Full URL to a video thumbnail image.
     description:    One-line video description.
+    uploader_id:    Nickname or id of the video uploader.
     player_url:     SWF Player URL (used for rtmpdump).
     subtitles:      The .srt file contents.
     urlhandle:      [internal] The urlHandle to be used to download the file,
@@ -159,7 +160,7 @@ class YoutubeIE(InfoExtractor):
         '44': '480x854',
         '45': '720x1280',
         '46': '1080x1920',
-    }   
+    }
     IE_NAME = u'youtube'
 
     def suitable(self, url):
@@ -219,6 +220,34 @@ class YoutubeIE(InfoExtractor):
             srt += caption + '\n\n'
         return srt
 
+    def _extract_subtitles(self, video_id):
+        self.report_video_subtitles_download(video_id)
+        request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+        try:
+            srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+        srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
+        srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
+        if not srt_lang_list:
+            return (u'WARNING: video has no closed captions', None)
+        if self._downloader.params.get('subtitleslang', False):
+            srt_lang = self._downloader.params.get('subtitleslang')
+        elif 'en' in srt_lang_list:
+            srt_lang = 'en'
+        else:
+            srt_lang = list(srt_lang_list.keys())[0]
+        if not srt_lang in srt_lang_list:
+            return (u'WARNING: no closed captions found in the specified language', None)
+        request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
+        try:
+            srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+        if not srt_xml:
+            return (u'WARNING: unable to download video subtitles', None)
+        return (None, self._closed_captions_xml_to_srt(srt_xml))
+
     def _print_formats(self, formats):
         print('Available formats:')
         for x in formats:
@@ -272,7 +301,7 @@ class YoutubeIE(InfoExtractor):
         request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form))
         try:
             self.report_login()
-            login_results = compat_urllib_request.urlopen(request).read()
+            login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
             if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
                 return
@@ -288,7 +317,7 @@ class YoutubeIE(InfoExtractor):
         request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form))
         try:
             self.report_age_confirmation()
-            age_results = compat_urllib_request.urlopen(request).read()
+            age_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err))
             return
@@ -356,10 +385,18 @@ class YoutubeIE(InfoExtractor):
 
         # uploader
         if 'author' not in video_info:
-            self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+            self._downloader.trouble(u'ERROR: unable to extract uploader name')
             return
         video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
 
+        # uploader_id
+        video_uploader_id = None
+        mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/user/([^"]+)">', video_webpage)
+        if mobj is not None:
+            video_uploader_id = mobj.group(1)
+        else:
+            self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
+
         # title
         if 'title' not in video_info:
             self._downloader.trouble(u'ERROR: unable to extract video title')
@@ -395,35 +432,9 @@ class YoutubeIE(InfoExtractor):
         # closed captions
         video_subtitles = None
         if self._downloader.params.get('writesubtitles', False):
-            try:
-                self.report_video_subtitles_download(video_id)
-                request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
-                try:
-                    srt_list = compat_urllib_request.urlopen(request).read()
-                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                    raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
-                srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
-                srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
-                if not srt_lang_list:
-                    raise Trouble(u'WARNING: video has no closed captions')
-                if self._downloader.params.get('subtitleslang', False):
-                    srt_lang = self._downloader.params.get('subtitleslang')
-                elif 'en' in srt_lang_list:
-                    srt_lang = 'en'
-                else:
-                    srt_lang = srt_lang_list.keys()[0]
-                if not srt_lang in srt_lang_list:
-                    raise Trouble(u'WARNING: no closed captions found in the specified language')
-                request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
-                try:
-                    srt_xml = compat_urllib_request.urlopen(request).read()
-                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                    raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
-                if not srt_xml:
-                    raise Trouble(u'WARNING: unable to download video subtitles')
-                video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8'))
-            except Trouble as trouble:
-                self._downloader.trouble(trouble[0])
+            (srt_error, video_subtitles) = self._extract_subtitles(video_id)
+            if srt_error:
+                self._downloader.trouble(srt_error)
 
         if 'length_seconds' not in video_info:
             self._downloader.trouble(u'WARNING: unable to extract video duration')
@@ -443,7 +454,7 @@ class YoutubeIE(InfoExtractor):
         elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
             url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
             url_data = [compat_parse_qs(uds) for uds in url_data_strs]
-            url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
+            url_data = [ud for ud in url_data if 'itag' in ud and 'url' in ud]
             url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data)
 
             format_limit = self._downloader.params.get('format_limit', None)
@@ -493,6 +504,7 @@ class YoutubeIE(InfoExtractor):
                 'id':       video_id,
                 'url':      video_real_url,
                 'uploader': video_uploader,
+                'uploader_id': video_uploader_id,
                 'upload_date':  upload_date,
                 'title':    video_title,
                 'ext':      video_extension,
@@ -731,99 +743,6 @@ class DailymotionIE(InfoExtractor):
         }]
 
 
-class GoogleIE(InfoExtractor):
-    """Information extractor for video.google.com."""
-
-    _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
-    IE_NAME = u'video.google'
-
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def report_download_webpage(self, video_id):
-        """Report webpage download."""
-        self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
-
-    def report_extraction(self, video_id):
-        """Report information extraction."""
-        self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
-
-    def _real_extract(self, url):
-        # Extract id from URL
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
-            return
-
-        video_id = mobj.group(1)
-
-        video_extension = 'mp4'
-
-        # Retrieve video webpage to extract further information
-        request = compat_urllib_request.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
-        try:
-            self.report_download_webpage(video_id)
-            webpage = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
-            return
-
-        # Extract URL, uploader, and title from webpage
-        self.report_extraction(video_id)
-        mobj = re.search(r"download_url:'([^']+)'", webpage)
-        if mobj is None:
-            video_extension = 'flv'
-            mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: unable to extract media URL')
-            return
-        mediaURL = compat_urllib_parse.unquote(mobj.group(1))
-        mediaURL = mediaURL.replace('\\x3d', '\x3d')
-        mediaURL = mediaURL.replace('\\x26', '\x26')
-
-        video_url = mediaURL
-
-        mobj = re.search(r'<title>(.*)</title>', webpage)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: unable to extract title')
-            return
-        video_title = mobj.group(1).decode('utf-8')
-
-        # Extract video description
-        mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: unable to extract video description')
-            return
-        video_description = mobj.group(1).decode('utf-8')
-        if not video_description:
-            video_description = 'No description available.'
-
-        # Extract video thumbnail
-        if self._downloader.params.get('forcethumbnail', False):
-            request = compat_urllib_request.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
-            try:
-                webpage = compat_urllib_request.urlopen(request).read()
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
-                return
-            mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
-            if mobj is None:
-                self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
-                return
-            video_thumbnail = mobj.group(1)
-        else:   # we need something to pass to process_info
-            video_thumbnail = ''
-
-        return [{
-            'id':       video_id.decode('utf-8'),
-            'url':      video_url.decode('utf-8'),
-            'uploader': None,
-            'upload_date':  None,
-            'title':    video_title,
-            'ext':      video_extension.decode('utf-8'),
-        }]
-
-
 class PhotobucketIE(InfoExtractor):
     """Information extractor for photobucket.com."""
 
@@ -892,6 +811,7 @@ class PhotobucketIE(InfoExtractor):
 class YahooIE(InfoExtractor):
     """Information extractor for video.yahoo.com."""
 
+    _WORKING = False
     # _VALID_URL matches all Yahoo! Video URLs
     # _VPAGE_URL matches only the extractable '/watch/' URLs
     _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
@@ -1080,26 +1000,27 @@ class VimeoIE(InfoExtractor):
         except:
             self._downloader.trouble(u'ERROR: unable to extract info section')
             return
-        
+
         # Extract title
         video_title = config["video"]["title"]
 
-        # Extract uploader
+        # Extract uploader and uploader_id
         video_uploader = config["video"]["owner"]["name"]
+        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1]
 
         # Extract video thumbnail
         video_thumbnail = config["video"]["thumbnail"]
 
         # Extract video description
-        video_description = get_element_by_id("description", webpage)
+        video_description = get_element_by_attribute("itemprop", "description", webpage)
         if video_description: video_description = clean_html(video_description)
         else: video_description = ''
 
         # Extract upload date
         video_upload_date = None
-        mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
+        mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
         if mobj is not None:
-            video_upload_date = mobj.group(1)
+            video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
 
         # Vimeo specific: extract request signature and timestamp
         sig = config['request']['signature']
@@ -1137,6 +1058,7 @@ class VimeoIE(InfoExtractor):
             'id':       video_id,
             'url':      video_url,
             'uploader': video_uploader,
+            'uploader_id': video_uploader_id,
             'upload_date':  video_upload_date,
             'title':    video_title,
             'ext':      video_extension,
@@ -1263,7 +1185,7 @@ class ArteTvIE(InfoExtractor):
             'url':          compat_urllib_parse.unquote(info.get('url')),
             'uploader':     u'arte.tv',
             'upload_date':  info.get('date'),
-            'title':        info.get('title'),
+            'title':        info.get('title').decode('utf-8'),
             'ext':          u'mp4',
             'format':       u'NA',
             'player_url':   None,
@@ -1303,7 +1225,7 @@ class GenericIE(InfoExtractor):
     def report_following_redirect(self, new_url):
         """Report information extraction."""
         self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
-        
+
     def _test_redirect(self, url):
         """Check if it is a redirect, like url shorteners, in case restart chain."""
         class HeadRequest(compat_urllib_request.Request):
@@ -1312,38 +1234,38 @@ class GenericIE(InfoExtractor):
 
         class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
             """
-            Subclass the HTTPRedirectHandler to make it use our 
+            Subclass the HTTPRedirectHandler to make it use our
             HeadRequest also on the redirected URL
             """
-            def redirect_request(self, req, fp, code, msg, headers, newurl): 
+            def redirect_request(self, req, fp, code, msg, headers, newurl):
                 if code in (301, 302, 303, 307):
-                    newurl = newurl.replace(' ', '%20') 
+                    newurl = newurl.replace(' ', '%20')
                     newheaders = dict((k,v) for k,v in req.headers.items()
                                       if k.lower() not in ("content-length", "content-type"))
-                    return HeadRequest(newurl, 
+                    return HeadRequest(newurl,
                                        headers=newheaders,
-                                       origin_req_host=req.get_origin_req_host(), 
-                                       unverifiable=True) 
-                else: 
-                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) 
+                                       origin_req_host=req.get_origin_req_host(),
+                                       unverifiable=True)
+                else:
+                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
 
         class HTTPMethodFallback(compat_urllib_request.BaseHandler):
             """
             Fallback to GET if HEAD is not allowed (405 HTTP error)
             """
-            def http_error_405(self, req, fp, code, msg, headers): 
+            def http_error_405(self, req, fp, code, msg, headers):
                 fp.read()
                 fp.close()
 
                 newheaders = dict((k,v) for k,v in req.headers.items()
                                   if k.lower() not in ("content-length", "content-type"))
-                return self.parent.open(compat_urllib_request.Request(req.get_full_url(), 
-                                                 headers=newheaders, 
-                                                 origin_req_host=req.get_origin_req_host(), 
+                return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
+                                                 headers=newheaders,
+                                                 origin_req_host=req.get_origin_req_host(),
                                                  unverifiable=True))
 
         # Build our opener
-        opener = compat_urllib_request.OpenerDirector() 
+        opener = compat_urllib_request.OpenerDirector()
         for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
                         HTTPMethodFallback, HEADRedirectHandler,
                         compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
@@ -1588,6 +1510,8 @@ class GoogleSearchIE(InfoExtractor):
 
 class YahooSearchIE(InfoExtractor):
     """Information Extractor for Yahoo! Video search queries."""
+
+    _WORKING = False
     _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
     _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
     _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
@@ -1715,7 +1639,7 @@ class YoutubePlaylistIE(InfoExtractor):
             url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
             request = compat_urllib_request.Request(url)
             try:
-                page = compat_urllib_request.urlopen(request).read().decode('utf8')
+                page = compat_urllib_request.urlopen(request).read().decode('utf-8')
             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                 return
@@ -1844,7 +1768,7 @@ class YoutubeUserIE(InfoExtractor):
             request = compat_urllib_request.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
 
             try:
-                page = compat_urllib_request.urlopen(request).read()
+                page = compat_urllib_request.urlopen(request).read().decode('utf-8')
             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                 return
@@ -2203,7 +2127,7 @@ class FacebookIE(InfoExtractor):
         video_description = video_info.get('description', 'No description available.')
 
         url_map = video_info['video_urls']
-        if len(url_map.keys()) > 0:
+        if len(list(url_map.keys())) > 0:
             # Decide which formats to download
             req_format = self._downloader.params.get('format', None)
             format_limit = self._downloader.params.get('format_limit', None)
@@ -2346,7 +2270,7 @@ class MyVideoIE(InfoExtractor):
 
     def __init__(self, downloader=None):
         InfoExtractor.__init__(self, downloader)
-    
+
     def report_download_webpage(self, video_id):
         """Report webpage download."""
         self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
@@ -2367,7 +2291,7 @@ class MyVideoIE(InfoExtractor):
         request = compat_urllib_request.Request('http://www.myvideo.de/watch/%s' % video_id)
         try:
             self.report_download_webpage(video_id)
-            webpage = compat_urllib_request.urlopen(request).read()
+            webpage = compat_urllib_request.urlopen(request).read().decode('utf-8')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
             return
@@ -2400,10 +2324,10 @@ class ComedyCentralIE(InfoExtractor):
     """Information extractor for The Daily Show and Colbert Report """
 
     # urls can be abbreviations like :thedailyshow or :colbert
-    # urls for episodes like: 
+    # urls for episodes like:
     # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
     #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
-    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524    
+    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
     _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
                       |(https?://)?(www\.)?
                           (?P<showname>thedailyshow|colbertnation)\.com/
@@ -2411,7 +2335,7 @@ class ComedyCentralIE(InfoExtractor):
                           (?P<clip>
                               (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                               |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
-                     $"""                        
+                     $"""
     IE_NAME = u'comedycentral'
 
     _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@@ -2515,7 +2439,7 @@ class ComedyCentralIE(InfoExtractor):
                 return
             else:
                 mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
-        
+
         playerUrl_raw = mMovieParams[0][0]
         self.report_player_url(epTitle)
         try:
@@ -2564,7 +2488,7 @@ class ComedyCentralIE(InfoExtractor):
             if len(turls) == 0:
                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                 continue
-            
+
             if self._downloader.params.get('listformats', None):
                 self._print_formats([i[0] for i in turls])
                 return
@@ -2604,7 +2528,7 @@ class ComedyCentralIE(InfoExtractor):
             }
 
             results.append(info)
-            
+
         return results
 
 
@@ -2649,7 +2573,9 @@ class EscapistIE(InfoExtractor):
 
         self.report_config_download(showName)
         try:
-            configJSON = compat_urllib_request.urlopen(configUrl).read()
+            configJSON = compat_urllib_request.urlopen(configUrl)
+            m = re.match(r'text/html; charset="?([^"]+)"?', configJSON.headers['Content-Type'])
+            configJSON = configJSON.read().decode(m.group(1) if m else 'utf-8')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             self._downloader.trouble(u'ERROR: unable to download configuration: ' + compat_str(err))
             return
@@ -2969,6 +2895,8 @@ class InfoQIE(InfoExtractor):
 
 class MixcloudIE(InfoExtractor):
     """Information extractor for www.mixcloud.com"""
+
+    _WORKING = False # New API, but it seems good http://www.mixcloud.com/developers/documentation/
     _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
     IE_NAME = u'mixcloud'
 
@@ -3059,7 +2987,7 @@ class MixcloudIE(InfoExtractor):
                 if file_url is not None:
                     break # got it!
         else:
-            if req_format not in formats.keys():
+            if req_format not in list(formats.keys()):
                 self._downloader.trouble(u'ERROR: format is not available')
                 return
 
@@ -3164,7 +3092,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
                 assert entry['type'] == 'reference'
                 results += self.extract(entry['url'])
             return results
-            
+
         else: # Root page
             info = {
                 'id': 'Stanford OpenClassroom',
@@ -3238,7 +3166,7 @@ class MTVIE(InfoExtractor):
             self._downloader.trouble(u'ERROR: unable to extract performer')
             return
         performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
-        video_title = performer + ' - ' + song_name 
+        video_title = performer + ' - ' + song_name
 
         mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
         if mobj is None:
@@ -3358,7 +3286,7 @@ class YoukuIE(InfoExtractor):
             seed = config['data'][0]['seed']
 
             format = self._downloader.params.get('format', None)
-            supported_format = config['data'][0]['streamfileids'].keys()
+            supported_format = list(config['data'][0]['streamfileids'].keys())
 
             if format is None or format == 'best':
                 if 'hd2' in supported_format:
@@ -3471,7 +3399,7 @@ class XNXXIE(InfoExtractor):
 class GooglePlusIE(InfoExtractor):
     """Information extractor for plus.google.com."""
 
-    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:\w+/)*?(\d+)/posts/(\w+)'
+    _VALID_URL = r'(?:https://)?plus\.google\.com/(?:[^/]+/)*?posts/(\w+)'
     IE_NAME = u'plus.google'
 
     def __init__(self, downloader=None):
@@ -3479,7 +3407,7 @@ class GooglePlusIE(InfoExtractor):
 
     def report_extract_entry(self, url):
         """Report downloading extry"""
-        self._downloader.to_screen(u'[plus.google] Downloading entry: %s' % url.decode('utf-8'))
+        self._downloader.to_screen(u'[plus.google] Downloading entry: %s' % url)
 
     def report_date(self, upload_date):
         """Report downloading extry"""
@@ -3487,15 +3415,15 @@ class GooglePlusIE(InfoExtractor):
 
     def report_uploader(self, uploader):
         """Report downloading extry"""
-        self._downloader.to_screen(u'[plus.google] Uploader: %s' % uploader.decode('utf-8'))
+        self._downloader.to_screen(u'[plus.google] Uploader: %s' % uploader)
 
     def report_title(self, video_title):
         """Report downloading extry"""
-        self._downloader.to_screen(u'[plus.google] Title: %s' % video_title.decode('utf-8'))
+        self._downloader.to_screen(u'[plus.google] Title: %s' % video_title)
 
     def report_extract_vid_page(self, video_page):
         """Report information extraction."""
-        self._downloader.to_screen(u'[plus.google] Extracting video page: %s' % video_page.decode('utf-8'))
+        self._downloader.to_screen(u'[plus.google] Extracting video page: %s' % video_page)
 
     def _real_extract(self, url):
         # Extract id from URL
@@ -3505,7 +3433,7 @@ class GooglePlusIE(InfoExtractor):
             return
 
         post_url = mobj.group(0)
-        video_id = mobj.group(2)
+        video_id = mobj.group(1)
 
         video_extension = 'flv'
 
@@ -3513,7 +3441,7 @@ class GooglePlusIE(InfoExtractor):
         self.report_extract_entry(post_url)
         request = compat_urllib_request.Request(post_url)
         try:
-            webpage = compat_urllib_request.urlopen(request).read()
+            webpage = compat_urllib_request.urlopen(request).read().decode('utf-8')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err))
             return
@@ -3555,7 +3483,7 @@ class GooglePlusIE(InfoExtractor):
         video_page = mobj.group(1)
         request = compat_urllib_request.Request(video_page)
         try:
-            webpage = compat_urllib_request.urlopen(request).read()
+            webpage = compat_urllib_request.urlopen(request).read().decode('utf-8')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
             return
@@ -3577,16 +3505,19 @@ class GooglePlusIE(InfoExtractor):
         # Only get the url. The resolution part in the tuple has no use anymore
         video_url = video_url[-1]
         # Treat escaped \u0026 style hex
-        video_url = unicode(video_url, "unicode_escape")
+        try:
+            video_url = video_url.decode("unicode_escape")
+        except AttributeError: # Python 3
+            video_url = bytes(video_url, 'ascii').decode('unicode-escape')
 
 
         return [{
-            'id':       video_id.decode('utf-8'),
+            'id':       video_id,
             'url':      video_url,
-            'uploader': uploader.decode('utf-8'),
-            'upload_date':  upload_date.decode('utf-8'),
-            'title':    video_title.decode('utf-8'),
-            'ext':      video_extension.decode('utf-8'),
+            'uploader': uploader,
+            'upload_date':  upload_date,
+            'title':    video_title,
+            'ext':      video_extension,
         }]
 
 class NBAIE(InfoExtractor):
@@ -3637,43 +3568,34 @@ class NBAIE(InfoExtractor):
 
 class JustinTVIE(InfoExtractor):
     """Information extractor for justin.tv and twitch.tv"""
-    
-#    _VALID_URL = r"""^(?:http(?:s?)://)?www\.(?:justin|twitch)\.tv/
-#        ([^/]+)(?:/b/([^/]+))?/?(?:#.*)?$"""
-    _VALID_URL = r'^http://www.twitch.tv/(.*)$'
+    # TODO: One broadcast may be split into multiple videos. The key
+    # 'broadcast_id' is the same for all parts, and 'broadcast_part'
+    # starts at 1 and increases. Can we treat all parts as one video?
+
+    _VALID_URL = r"""(?x)^(?:http://)?(?:www\.)?(?:twitch|justin)\.tv/
+        ([^/]+)(?:/b/([^/]+))?/?(?:\#.*)?$"""
+    _JUSTIN_PAGE_LIMIT = 100
     IE_NAME = u'justin.tv'
 
     def report_extraction(self, file_id):
         """Report information extraction."""
         self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
 
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
-            return
-        
-        api = 'http://api.justin.tv'
-        video_id = mobj.group(mobj.lastindex)
-        if mobj.lastindex == 1:
-            api += '/channel/archives/%s.json?limit=100'
-        else:
-            api += '/clip/show/%s.json'
-        api = api % (video_id,)
-        
-        self.report_extraction(video_id)
-        # TODO: multiple pages
-        # TODO: One broadcast may be split into multiple videos. The key
-        # 'broadcast_id' is the same for all parts, and 'broadcast_part'
-        # starts at 1 and increases. Can we treat all parts as one video?
+    def report_download_page(self, channel, offset):
+        """Report attempt to download a single page of videos."""
+        self._downloader.to_screen(u'[%s] %s: Downloading video information from %d to %d' %
+                (self.IE_NAME, channel, offset, offset + self._JUSTIN_PAGE_LIMIT))
+
+    # Return count of items, list of *valid* items
+    def _parse_page(self, url):
         try:
-            urlh = compat_urllib_request.urlopen(api)
+            urlh = compat_urllib_request.urlopen(url)
             webpage_bytes = urlh.read()
             webpage = webpage_bytes.decode('utf-8', 'ignore')
         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
             self._downloader.trouble(u'ERROR: unable to download video info JSON: %s' % compat_str(err))
             return
-        
+
         response = json.loads(webpage)
         info = []
         for clip in response:
@@ -3685,8 +3607,89 @@ class JustinTVIE(InfoExtractor):
                     'id': clip['id'],
                     'url': video_url,
                     'title': clip['title'],
-                    'uploader': clip['user_id'] or clip['channel_id'],
+                    'uploader': clip.get('user_id', clip.get('channel_id')),
                     'upload_date': video_date,
                     'ext': video_extension,
                 })
+        return (len(response), info)
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+            return
+
+        api = 'http://api.justin.tv'
+        video_id = mobj.group(mobj.lastindex)
+        paged = False
+        if mobj.lastindex == 1:
+            paged = True
+            api += '/channel/archives/%s.json'
+        else:
+            api += '/clip/show/%s.json'
+        api = api % (video_id,)
+
+        self.report_extraction(video_id)
+
+        info = []
+        offset = 0
+        limit = self._JUSTIN_PAGE_LIMIT
+        while True:
+            if paged:
+                self.report_download_page(video_id, offset)
+            page_url = api + ('?offset=%d&limit=%d' % (offset, limit))
+            page_count, page_info = self._parse_page(page_url)
+            info.extend(page_info)
+            if not paged or page_count != limit:
+                break
+            offset += limit
         return info
+
+class FunnyOrDieIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
+    IE_NAME = u'FunnyOrDie'
+
+    def report_extraction(self, video_id):
+        self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+            return
+
+        video_id = mobj.group('id')
+        self.report_extraction(video_id)
+        try:
+            urlh = compat_urllib_request.urlopen(url)
+            webpage_bytes = urlh.read()
+            webpage = webpage_bytes.decode('utf-8', 'ignore')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+            return
+
+        m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL)
+        if not m:
+            self._downloader.trouble(u'ERROR: unable to find video information')
+        video_url = unescapeHTML(m.group('url'))
+        print(video_url)
+
+        m = re.search(r"class='player_page_h1'>\s+<a.*?>(?P<title>.*?)</a>", webpage)
+        if not m:
+            self._downloader.trouble(u'Cannot find video title')
+        title = unescapeHTML(m.group('title'))
+
+        m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
+        if m:
+            desc = unescapeHTML(m.group('desc'))
+        else:
+            desc = None
+
+        info = {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': title,
+            'description': desc,
+        }
+        return [info]