SoundcloudSetIE

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 51b263383d9b7dd1f6adc5c6d095c0409442a8de..87a926068dfae169bd380c62b3742d1ef093f254 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -15,6 +15,7 @@ import email.utils
  import xml.etree.ElementTree
  import random
  import math
+import operator
  
  from .utils import *
  
@@ -47,7 +48,7 @@ class InfoExtractor(object):
      uploader_id:    Nickname or id of the video uploader.
      location:       Physical location of the video.
      player_url:     SWF Player URL (used for rtmpdump).
-    subtitles:      The .srt file contents.
+    subtitles:      The subtitle file contents.
      urlhandle:      [internal] The urlHandle to be used to download the file,
                      like returned by urllib.request.urlopen
  
@@ -73,13 +74,15 @@ class InfoExtractor(object):
          self._ready = False
          self.set_downloader(downloader)
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url) is not None
+        return re.match(cls._VALID_URL, url) is not None
  
-    def working(self):
+    @classmethod
+    def working(cls):
          """Getter method for _WORKING."""
-        return self._WORKING
+        return cls._WORKING
  
      def initialize(self):
          """Initializes an instance (authentication, etc)."""
@@ -123,8 +126,14 @@ class InfoExtractor(object):
      def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
          """ Returns the data of the page as a string """
          urlh = self._request_webpage(url_or_request, video_id, note, errnote)
+        content_type = urlh.headers.get('Content-Type', '')
+        m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+        if m:
+            encoding = m.group(1)
+        else:
+            encoding = 'utf-8'
          webpage_bytes = urlh.read()
-        return webpage_bytes.decode('utf-8', 'replace')
+        return webpage_bytes.decode(encoding, 'replace')
  
  
  class YoutubeIE(InfoExtractor):
@@ -136,7 +145,6 @@ class YoutubeIE(InfoExtractor):
                           (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/|
                              tube\.majestyc\.net/)                             # the various hostnames, with wildcard subdomains
                           (?:.*?\#/)?                                          # handle anchor (#/) redirect urls
-                         (?!view_play_list|my_playlists|artist|playlist)      # ignore playlist URLs
                           (?:                                                  # the various things that can precede the ID:
                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                               |(?:                                             # or the v= param in all its forms
@@ -188,9 +196,11 @@ class YoutubeIE(InfoExtractor):
      }
      IE_NAME = u'youtube'
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        if YoutubePlaylistIE.suitable(url): return False
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def report_lang(self):
          """Report attempt to set language."""
@@ -214,7 +224,16 @@ class YoutubeIE(InfoExtractor):
  
      def report_video_subtitles_download(self, video_id):
          """Report attempt to download video info webpage."""
-        self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles' % video_id)
+        self._downloader.to_screen(u'[youtube] %s: Checking available subtitles' % video_id)
+
+    def report_video_subtitles_request(self, video_id, sub_lang, format):
+        """Report attempt to download video info webpage."""
+        self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
+
+    def report_video_subtitles_available(self, video_id, sub_lang_list):
+        """Report available subtitles."""
+        sub_lang = ",".join(list(sub_lang_list.keys()))
+        self._downloader.to_screen(u'[youtube] %s: Available subtitles for video: %s' % (video_id, sub_lang))
  
      def report_information_extraction(self, video_id):
          """Report attempt to extract video information."""
@@ -228,40 +247,63 @@ class YoutubeIE(InfoExtractor):
          """Indicate the download will use the RTMP protocol."""
          self._downloader.to_screen(u'[youtube] RTMP download detected')
  
-
-    def _extract_subtitles(self, video_id):
+    def _get_available_subtitles(self, video_id):
          self.report_video_subtitles_download(video_id)
          request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
          try:
-            srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+            sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
-        srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
-        srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
-        if not srt_lang_list:
-            return (u'WARNING: video has no closed captions', None)
-        if self._downloader.params.get('subtitleslang', False):
-            srt_lang = self._downloader.params.get('subtitleslang')
-        elif 'en' in srt_lang_list:
-            srt_lang = 'en'
-        else:
-            srt_lang = list(srt_lang_list.keys())[0]
-        if not srt_lang in srt_lang_list:
-            return (u'WARNING: no closed captions found in the specified language "%s"' % srt_lang, None)
+        sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
+        sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
+        if not sub_lang_list:
+            return (u'WARNING: video doesn\'t have subtitles', None)
+        return sub_lang_list
+
+    def _list_available_subtitles(self, video_id):
+        sub_lang_list = self._get_available_subtitles(video_id)
+        self.report_video_subtitles_available(video_id, sub_lang_list)
+
+    def _request_subtitle(self, sub_lang, sub_name, video_id, format):
+        self.report_video_subtitles_request(video_id, sub_lang, format)
          params = compat_urllib_parse.urlencode({
-            'lang': srt_lang,
-            'name': srt_lang_list[srt_lang].encode('utf-8'),
+            'lang': sub_lang,
+            'name': sub_name,
              'v': video_id,
-            'fmt': 'srt',
+            'fmt': format,
          })
          url = 'http://www.youtube.com/api/timedtext?' + params
          try:
-            srt = compat_urllib_request.urlopen(url).read().decode('utf-8')
+            sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
-        if not srt:
+        if not sub:
              return (u'WARNING: Did not fetch video subtitles', None)
-        return (None, srt)
+        return (None, sub_lang, sub)
+
+    def _extract_subtitle(self, video_id):
+        sub_lang_list = self._get_available_subtitles(video_id)
+        sub_format = self._downloader.params.get('subtitlesformat')
+        if self._downloader.params.get('subtitleslang', False):
+            sub_lang = self._downloader.params.get('subtitleslang')
+        elif 'en' in sub_lang_list:
+            sub_lang = 'en'
+        else:
+            sub_lang = list(sub_lang_list.keys())[0]
+        if not sub_lang in sub_lang_list:
+            return (u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None)
+
+        subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
+        return [subtitle]
+
+    def _extract_all_subtitles(self, video_id):
+        sub_lang_list = self._get_available_subtitles(video_id)
+        sub_format = self._downloader.params.get('subtitlesformat')
+        subtitles = []
+        for sub_lang in sub_lang_list:
+            subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
+            subtitles.append(subtitle)
+        return subtitles
  
      def _print_formats(self, formats):
          print('Available formats:')
@@ -289,7 +331,7 @@ class YoutubeIE(InfoExtractor):
                  else:
                      raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
              except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
+                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
                  return
  
          # Set language
@@ -298,7 +340,7 @@ class YoutubeIE(InfoExtractor):
              self.report_lang()
              compat_urllib_request.urlopen(request).read()
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
              return
  
          # No authentication to be performed
@@ -309,7 +351,7 @@ class YoutubeIE(InfoExtractor):
          try:
              login_page = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.to_stderr(u'WARNING: unable to fetch login page: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to fetch login page: %s' % compat_str(err))
              return
  
          galx = None
@@ -353,10 +395,10 @@ class YoutubeIE(InfoExtractor):
              self.report_login()
              login_results = compat_urllib_request.urlopen(request).read().decode('utf-8')
              if re.search(r'(?i)<form[^>]* id="gaia_loginform"', login_results) is not None:
-                self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
+                self._downloader.report_warning(u'unable to log in: bad username or password')
                  return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
              return
  
          # Confirm age
@@ -482,16 +524,26 @@ class YoutubeIE(InfoExtractor):
          else:
              video_description = ''
  
-        # closed captions
+        # subtitles
          video_subtitles = None
-        if self._downloader.params.get('subtitleslang', False):
-            self._downloader.params['writesubtitles'] = True
-        if self._downloader.params.get('onlysubtitles', False):
-            self._downloader.params['writesubtitles'] = True
+
          if self._downloader.params.get('writesubtitles', False):
-            (srt_error, video_subtitles) = self._extract_subtitles(video_id)
-            if srt_error:
-                self._downloader.trouble(srt_error)
+            video_subtitles = self._extract_subtitle(video_id)
+            if video_subtitles:
+                (sub_error, sub_lang, sub) = video_subtitles[0]
+                if sub_error:
+                    self._downloader.trouble(sub_error)
+
+        if self._downloader.params.get('allsubtitles', False):
+            video_subtitles = self._extract_all_subtitles(video_id)
+            for video_subtitle in video_subtitles:
+                (sub_error, sub_lang, sub) = video_subtitle
+                if sub_error:
+                    self._downloader.trouble(sub_error)
+
+        if self._downloader.params.get('listsubtitles', False):
+            sub_lang_list = self._list_available_subtitles(video_id)
+            return
  
          if 'length_seconds' not in video_info:
              self._downloader.trouble(u'WARNING: unable to extract video duration')
@@ -1266,7 +1318,8 @@ class GenericIE(InfoExtractor):
  
      def report_download_webpage(self, video_id):
          """Report webpage download."""
-        self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
+        if not self._downloader.params.get('test', False):
+            self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
          self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
  
      def report_extraction(self, video_id):
@@ -1336,13 +1389,8 @@ class GenericIE(InfoExtractor):
          if self._test_redirect(url): return
  
          video_id = url.split('/')[-1]
-        request = compat_urllib_request.Request(url)
          try:
-            self.report_download_webpage(video_id)
-            webpage = compat_urllib_request.urlopen(request).read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
-            return
+            webpage = self._download_webpage(url, video_id)
          except ValueError as err:
              # since this is the last-resort InfoExtractor, if
              # this error is thrown, it'll be thrown here
@@ -1441,7 +1489,7 @@ class YoutubeSearchIE(InfoExtractor):
                      self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                      return
                  elif n > self._max_youtube_results:
-                    self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
+                    self._downloader.report_warning(u'ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
                      n = self._max_youtube_results
                  self._download_n_results(query, n)
                  return
@@ -1461,12 +1509,16 @@ class YoutubeSearchIE(InfoExtractor):
              result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
              request = compat_urllib_request.Request(result_url)
              try:
-                data = compat_urllib_request.urlopen(request).read()
+                data = compat_urllib_request.urlopen(request).read().decode('utf-8')
              except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                  self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err))
                  return
              api_response = json.loads(data)['data']
  
+            if not 'items' in api_response:
+                self._downloader.trouble(u'[youtube] No video results')
+                return
+
              new_ids = list(video['id'] for video in api_response['items'])
              video_ids += new_ids
  
@@ -1519,7 +1571,7 @@ class GoogleSearchIE(InfoExtractor):
                      self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                      return
                  elif n > self._max_google_results:
-                    self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
+                    self._downloader.report_warning(u'gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
                      n = self._max_google_results
                  self._download_n_results(query, n)
                  return
@@ -1603,7 +1655,7 @@ class YahooSearchIE(InfoExtractor):
                      self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
                      return
                  elif n > self._max_yahoo_results:
-                    self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
+                    self._downloader.report_warning(u'yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
                      n = self._max_yahoo_results
                  self._download_n_results(query, n)
                  return
@@ -1651,80 +1703,94 @@ class YahooSearchIE(InfoExtractor):
  class YoutubePlaylistIE(InfoExtractor):
      """Information Extractor for YouTube playlists."""
  
-    _VALID_URL = r'(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL|EC)?|PL|EC)([0-9A-Za-z-_]{10,})(?:/.*?/([0-9A-Za-z_-]+))?.*'
-    _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
-    _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&amp;([^&"]+&amp;)*list=.*?%s'
-    _MORE_PAGES_INDICATOR = u"Next \N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}"
+    _VALID_URL = r"""(?:
+                        (?:https?://)?
+                        (?:\w+\.)?
+                        youtube\.com/
+                        (?:
+                           (?:course|view_play_list|my_playlists|artist|playlist|watch)
+                           \? (?:.*?&)*? (?:p|a|list)=
+                        |  user/.*?/user/
+                        |  p/
+                        |  user/.*?#[pg]/c/
+                        )
+                        ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
+                        .*
+                     |
+                        ((?:PL|EC|UU)[0-9A-Za-z-_]{10,})
+                     )"""
+    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json'
+    _MAX_RESULTS = 50
      IE_NAME = u'youtube:playlist'
  
      def __init__(self, downloader=None):
          InfoExtractor.__init__(self, downloader)
  
+    @classmethod
+    def suitable(cls, url):
+        """Receives a URL and returns True if suitable for this IE."""
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
+
      def report_download_page(self, playlist_id, pagenum):
          """Report attempt to download playlist page with given number."""
          self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
  
      def _real_extract(self, url):
          # Extract playlist id
-        mobj = re.match(self._VALID_URL, url)
+        mobj = re.match(self._VALID_URL, url, re.VERBOSE)
          if mobj is None:
              self._downloader.trouble(u'ERROR: invalid url: %s' % url)
              return
  
-        # Single video case
-        if mobj.group(3) is not None:
-            self._downloader.download([mobj.group(3)])
-            return
-
-        # Download playlist pages
-        # prefix is 'p' as default for playlists but there are other types that need extra care
-        playlist_prefix = mobj.group(1)
-        if playlist_prefix == 'a':
-            playlist_access = 'artist'
-        else:
-            playlist_prefix = 'p'
-            playlist_access = 'view_play_list'
-        playlist_id = mobj.group(2)
-        video_ids = []
-        pagenum = 1
+        # Download playlist videos from API
+        playlist_id = mobj.group(1) or mobj.group(2)
+        page_num = 1
+        videos = []
  
          while True:
-            self.report_download_page(playlist_id, pagenum)
-            url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
-            request = compat_urllib_request.Request(url)
+            self.report_download_page(playlist_id, page_num)
+
+            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, self._MAX_RESULTS * (page_num - 1) + 1)
              try:
-                page = compat_urllib_request.urlopen(request).read().decode('utf-8')
+                page = compat_urllib_request.urlopen(url).read().decode('utf8')
              except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                  self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
                  return
  
-            # Extract video identifiers
-            ids_in_page = []
-            for mobj in re.finditer(self._VIDEO_INDICATOR_TEMPLATE % playlist_id, page):
-                if mobj.group(1) not in ids_in_page:
-                    ids_in_page.append(mobj.group(1))
-            video_ids.extend(ids_in_page)
+            try:
+                response = json.loads(page)
+            except ValueError as err:
+                self._downloader.trouble(u'ERROR: Invalid JSON in API response: ' + compat_str(err))
+                return
  
-            if self._MORE_PAGES_INDICATOR not in page:
+            if not 'feed' in response or not 'entry' in response['feed']:
+                self._downloader.trouble(u'ERROR: Got a malformed response from YouTube API')
+                return
+            videos += [ (entry['yt$position']['$t'], entry['content']['src'])
+                        for entry in response['feed']['entry']
+                        if 'content' in entry ]
+
+            if len(response['feed']['entry']) < self._MAX_RESULTS:
                  break
-            pagenum = pagenum + 1
+            page_num += 1
  
-        total = len(video_ids)
+        videos = [v[1] for v in sorted(videos)]
+        total = len(videos)
  
          playliststart = self._downloader.params.get('playliststart', 1) - 1
          playlistend = self._downloader.params.get('playlistend', -1)
          if playlistend == -1:
-            video_ids = video_ids[playliststart:]
+            videos = videos[playliststart:]
          else:
-            video_ids = video_ids[playliststart:playlistend]
+            videos = videos[playliststart:playlistend]
  
-        if len(video_ids) == total:
+        if len(videos) == total:
              self._downloader.to_screen(u'[youtube] PL %s: Found %i videos' % (playlist_id, total))
          else:
-            self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(video_ids)))
+            self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos)))
  
-        for id in video_ids:
-            self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
+        for video in videos:
+            self._downloader.download([video])
          return
  
  
@@ -1910,9 +1976,8 @@ class BlipTVUserIE(InfoExtractor):
  
          while True:
              self.report_download_page(username, pagenum)
-
-            request = compat_urllib_request.Request( page_base + "&page=" + str(pagenum) )
-
+            url = page_base + "&page=" + str(pagenum)
+            request = compat_urllib_request.Request( url )
              try:
                  page = compat_urllib_request.urlopen(request).read().decode('utf-8')
              except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -2048,7 +2113,7 @@ class FacebookIE(InfoExtractor):
                  else:
                      raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
              except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err))
+                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
                  return
  
          if useremail is None:
@@ -2065,10 +2130,10 @@ class FacebookIE(InfoExtractor):
              self.report_login()
              login_results = compat_urllib_request.urlopen(request).read()
              if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
-                self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
+                self._downloader.report_warning(u'unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
                  return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err))
+            self._downloader.report_warning(u'unable to log in: %s' % compat_str(err))
              return
  
      def _real_extract(self, url):
@@ -2133,6 +2198,17 @@ class BlipTVIE(InfoExtractor):
              self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
              return
  
+        urlp = compat_urllib_parse_urlparse(url)
+        if urlp.path.startswith('/play/'):
+            request = compat_urllib_request.Request(url)
+            response = compat_urllib_request.urlopen(request)
+            redirecturl = response.geturl()
+            rurlp = compat_urllib_parse_urlparse(redirecturl)
+            file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2]
+            url = 'http://blip.tv/a/a-' + file_id
+            return self._real_extract(url)
+
+
          if '?' in url:
              cchar = '&'
          else:
@@ -2288,9 +2364,10 @@ class ComedyCentralIE(InfoExtractor):
          '400': '384x216',
      }
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def report_extraction(self, episode_id):
          self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
@@ -2513,7 +2590,7 @@ class EscapistIE(InfoExtractor):
              'uploader': showName,
              'upload_date': None,
              'title': showName,
-            'ext': 'flv',
+            'ext': 'mp4',
              'thumbnail': imgUrl,
              'description': description,
              'player_url': playerUrl,
@@ -2725,6 +2802,87 @@ class SoundcloudIE(InfoExtractor):
              'description': info['description'],
          }]
  
+class SoundcloudSetIE(InfoExtractor):
+    """Information extractor for soundcloud.com sets
+       To access the media, the uid of the song and a stream token
+       must be extracted from the page source and the script must make
+       a request to media.soundcloud.com/crossdomain.xml. Then
+       the media can be grabbed by requesting from an url composed
+       of the stream token and uid
+     """
+
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)'
+    IE_NAME = u'soundcloud'
+
+    def __init__(self, downloader=None):
+        InfoExtractor.__init__(self, downloader)
+
+    def report_resolve(self, video_id):
+        """Report information extraction."""
+        self._downloader.to_screen(u'[%s] %s: Resolving id' % (self.IE_NAME, video_id))
+
+    def report_extraction(self, video_id):
+        """Report information extraction."""
+        self._downloader.to_screen(u'[%s] %s: Retrieving stream' % (self.IE_NAME, video_id))
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+            return
+
+        # extract uploader (which is in the url)
+        uploader = mobj.group(1)
+        # extract simple title (uploader + slug of song title)
+        slug_title =  mobj.group(2)
+        simple_title = uploader + u'-' + slug_title
+
+        self.report_resolve('%s/sets/%s' % (uploader, slug_title))
+
+        url = 'http://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
+        resolv_url = 'http://api.soundcloud.com/resolve.json?url=' + url + '&client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
+        request = compat_urllib_request.Request(resolv_url)
+        try:
+            info_json_bytes = compat_urllib_request.urlopen(request).read()
+            info_json = info_json_bytes.decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
+            return
+
+        videos = []
+        info = json.loads(info_json)
+        if 'errors' in info:
+            for err in info['errors']:
+                self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err['error_message']))
+            return
+
+        for track in info['tracks']:
+            video_id = track['id']
+            self.report_extraction('%s/sets/%s' % (uploader, slug_title))
+
+            streams_url = 'https://api.sndcdn.com/i1/tracks/' + str(video_id) + '/streams?client_id=b45b1aa10f1ac2941910a7f0d10f8e28'
+            request = compat_urllib_request.Request(streams_url)
+            try:
+                stream_json_bytes = compat_urllib_request.urlopen(request).read()
+                stream_json = stream_json_bytes.decode('utf-8')
+            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+                self._downloader.trouble(u'ERROR: unable to download stream definitions: %s' % compat_str(err))
+                return
+
+            streams = json.loads(stream_json)
+            mediaURL = streams['http_mp3_128_url']
+
+            videos.append({
+                'id':       video_id,
+                'url':      mediaURL,
+                'uploader': track['user']['username'],
+                'upload_date':  track['created_at'],
+                'title':    track['title'],
+                'ext':      u'mp3',
+                'description': track['description'],
+            })
+        return videos
+
  
  class InfoQIE(InfoExtractor):
      """Information extractor for infoq.com"""
@@ -3547,65 +3705,17 @@ class FunnyOrDieIE(InfoExtractor):
          }
          return [info]
  
-class TweetReelIE(InfoExtractor):
-    _VALID_URL = r'^(?:https?://)?(?:www\.)?tweetreel\.com/[?](?P<id>[0-9a-z]+)$'
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        if mobj is None:
-            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
-            return
-
-        video_id = mobj.group('id')
-        webpage = self._download_webpage(url, video_id)
-
-        m = re.search(r'<div id="left" status_id="([0-9]+)">', webpage)
-        if not m:
-            self._downloader.trouble(u'ERROR: Cannot find status ID')
-        status_id = m.group(1)
-
-        m = re.search(r'<div class="tweet_text">(.*?)</div>', webpage, flags=re.DOTALL)
-        if not m:
-            self._downloader.trouble(u'WARNING: Cannot find description')
-        desc = unescapeHTML(re.sub('<a.*?</a>', '', m.group(1))).strip()
-
-        m = re.search(r'<div class="tweet_info">.*?from <a target="_blank" href="https?://twitter.com/(?P<uploader_id>.+?)">(?P<uploader>.+?)</a>', webpage, flags=re.DOTALL)
-        if not m:
-            self._downloader.trouble(u'ERROR: Cannot find uploader')
-        uploader = unescapeHTML(m.group('uploader'))
-        uploader_id = unescapeHTML(m.group('uploader_id'))
-
-        m = re.search(r'<span unixtime="([0-9]+)"', webpage)
-        if not m:
-            self._downloader.trouble(u'ERROR: Cannot find upload date')
-        upload_date = datetime.datetime.fromtimestamp(int(m.group(1))).strftime('%Y%m%d')
-
-        title = desc
-        video_url = 'http://files.tweetreel.com/video/' + status_id + '.mov'
-
-        info = {
-            'id': video_id,
-            'url': video_url,
-            'ext': 'mov',
-            'title': title,
-            'description': desc,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'internal_id': status_id,
-            'upload_date': upload_date
-        }
-        return [info]
-        
  class SteamIE(InfoExtractor):
-    _VALID_URL = r"""http://store.steampowered.com/ 
+    _VALID_URL = r"""http://store.steampowered.com/
                  (?P<urltype>video|app)/ #If the page is only for videos or for a game
                  (?P<gameID>\d+)/?
                  (?P<videoID>\d*)(?P<extra>\??) #For urltype == video we sometimes get the videoID
                  """
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -3616,18 +3726,22 @@ class SteamIE(InfoExtractor):
          mweb = re.finditer(urlRE, webpage)
          namesRE = r'<span class="title">(?P<videoName>.+?)</span>'
          titles = re.finditer(namesRE, webpage)
+        thumbsRE = r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">'
+        thumbs = re.finditer(thumbsRE, webpage)
          videos = []
-        for vid,vtitle in zip(mweb,titles):
+        for vid,vtitle,thumb in zip(mweb,titles,thumbs):
              video_id = vid.group('videoID')
              title = vtitle.group('videoName')
              video_url = vid.group('videoURL')
+            video_thumb = thumb.group('thumbnail')
              if not video_url:
                  self._downloader.trouble(u'ERROR: Cannot find video url for %s' % video_id)
              info = {
                  'id':video_id,
                  'url':video_url,
                  'ext': 'flv',
-                'title': unescapeHTML(title)
+                'title': unescapeHTML(title),
+                'thumbnail': video_thumb
                    }
              videos.append(info)
          return videos
@@ -3693,7 +3807,7 @@ class RBMARadioIE(InfoExtractor):
  class YouPornIE(InfoExtractor):
      """Information extractor for youporn.com."""
      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
-   
+
      def _print_formats(self, formats):
          """Print all available formats"""
          print(u'Available formats:')
@@ -3729,7 +3843,7 @@ class YouPornIE(InfoExtractor):
          # Get the video date
          result = re.search(r'Date:</label>(?P<date>.*) </li>', webpage)
          if result is None:
-            self._downloader.to_stderr(u'WARNING: unable to extract video date')
+            self._downloader.report_warning(u'unable to extract video date')
              upload_date = None
          else:
              upload_date = result.group('date').strip()
@@ -3737,7 +3851,7 @@ class YouPornIE(InfoExtractor):
          # Get the video uploader
          result = re.search(r'Submitted:</label>(?P<uploader>.*)</li>', webpage)
          if result is None:
-            self._downloader.to_stderr(u'WARNING: unable to extract uploader')
+            self._downloader.report_warning(u'unable to extract uploader')
              video_uploader = None
          else:
              video_uploader = result.group('uploader').strip()
@@ -3755,8 +3869,8 @@ class YouPornIE(InfoExtractor):
          links = re.findall(LINK_RE, download_list_html)
          if(len(links) == 0):
              raise ExtractorError(u'ERROR: no known formats available for video')
-        
-        self._downloader.to_screen(u'[youporn] Links found: %d' % len(links))   
+
+        self._downloader.to_screen(u'[youporn] Links found: %d' % len(links))
  
          formats = []
          for link in links:
@@ -3807,7 +3921,7 @@ class YouPornIE(InfoExtractor):
                  return
              return [format]
  
-        
+
  
  class PornotubeIE(InfoExtractor):
      """Information extractor for pornotube.com."""
@@ -3879,7 +3993,7 @@ class YouJizzIE(InfoExtractor):
  
          embed_page_url = result.group(0).strip()
          video_id = result.group('videoid')
-    
+
          webpage = self._download_webpage(embed_page_url, video_id)
  
          # Get the video URL
@@ -3953,11 +4067,11 @@ class KeekIE(InfoExtractor):
          webpage = self._download_webpage(url, video_id)
          m = re.search(r'<meta property="og:title" content="(?P<title>.+)"', webpage)
          title = unescapeHTML(m.group('title'))
-        m = re.search(r'<div class="bio-names-and-report">[\s\n]+<h4>(?P<uploader>\w+)</h4>', webpage)
-        uploader = unescapeHTML(m.group('uploader'))
+        m = re.search(r'<div class="user-name-and-bio">[\S\s]+?<h2>(?P<uploader>.+?)</h2>', webpage)
+        uploader = clean_html(m.group('uploader'))
          info = {
-                'id':video_id,
-                'url':video_url,
+                'id': video_id,
+                'url': video_url,
                  'ext': 'mp4',
                  'title': title,
                  'thumbnail': thumbnail,
@@ -3975,9 +4089,10 @@ class TEDIE(InfoExtractor):
                     /(?P<name>\w+) # Here goes the name and then ".html"
                     '''
  
-    def suitable(self, url):
+    @classmethod
+    def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        return re.match(self._VALID_URL, url, re.VERBOSE) is not None
+        return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def _real_extract(self, url):
          m=re.match(self._VALID_URL, url, re.VERBOSE)
@@ -4000,31 +4115,30 @@ class TEDIE(InfoExtractor):
                       ([.\s]*?)data-playlist_item_id="(\d+)"
                       ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)"
                       '''
-        video_name_RE=r'<p\ class="talk-title"><a href="/talks/(.+).html">(?P<fullname>.+?)</a></p>'
+        video_name_RE=r'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>'
          webpage=self._download_webpage(url, playlist_id, 'Downloading playlist webpage')
          m_videos=re.finditer(video_RE,webpage,re.VERBOSE)
          m_names=re.finditer(video_name_RE,webpage)
          info=[]
          for m_video, m_name in zip(m_videos,m_names):
-            video_dic={
-                       'id': m_video.group('video_id'),
-                       'url': self._talk_video_link(m_video.group('mediaSlug')),
-                       'ext': 'mp4',
-                       'title': m_name.group('fullname')
-                       }
-            info.append(video_dic)
+            video_id=m_video.group('video_id')
+            talk_url='http://www.ted.com%s' % m_name.group('talk_url')
+            info.append(self._talk_info(talk_url,video_id))
          return info
+
      def _talk_info(self, url, video_id=0):
          """Return the video for the talk in the url"""
          m=re.match(self._VALID_URL, url,re.VERBOSE)
          videoName=m.group('name')
          webpage=self._download_webpage(url, video_id, 'Downloading \"%s\" page' % videoName)
          # If the url includes the language we get the title translated
-        title_RE=r'<h1><span id="altHeadline" >(?P<title>[\s\w:/\.\?=\+-\\\']*)</span></h1>'
+        title_RE=r'<h1><span id="altHeadline" >(?P<title>.*)</span></h1>'
          title=re.search(title_RE, webpage).group('title')
          info_RE=r'''<script\ type="text/javascript">var\ talkDetails\ =(.*?)
                          "id":(?P<videoID>[\d]+).*?
                          "mediaSlug":"(?P<mediaSlug>[\w\d]+?)"'''
+        thumb_RE=r'</span>[\s.]*</div>[\s.]*<img src="(?P<thumbnail>.*?)"'
+        thumb_match=re.search(thumb_RE,webpage)
          info_match=re.search(info_RE,webpage,re.VERBOSE)
          video_id=info_match.group('videoID')
          mediaSlug=info_match.group('mediaSlug')
@@ -4033,13 +4147,14 @@ class TEDIE(InfoExtractor):
                  'id': video_id,
                  'url': video_url,
                  'ext': 'mp4',
-                'title': title
+                'title': title,
+                'thumbnail': thumb_match.group('thumbnail')
                  }
          return info
  
  class MySpassIE(InfoExtractor):
      _VALID_URL = r'http://www.myspass.de/.*'
-    
+
      def _real_extract(self, url):
          META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
  
@@ -4049,12 +4164,12 @@ class MySpassIE(InfoExtractor):
          url_parent_path, video_id = os.path.split(url_path)
          if not video_id:
              _, video_id = os.path.split(url_parent_path)
-        
+
          # get metadata
          metadata_url = META_DATA_URL_TEMPLATE % video_id
          metadata_text = self._download_webpage(metadata_url, video_id)
          metadata = xml.etree.ElementTree.fromstring(metadata_text.encode('utf-8'))
-        
+
          # extract values from metadata
          url_flv_el = metadata.find('url_flv')
          if url_flv_el is None:
@@ -4093,6 +4208,40 @@ class MySpassIE(InfoExtractor):
          }
          return [info]
  
+class SpiegelIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?$'
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('videoID')
+
+        webpage = self._download_webpage(url, video_id)
+        m = re.search(r'<div class="spVideoTitle">(.*?)</div>', webpage)
+        if not m:
+            raise ExtractorError(u'Cannot find title')
+        video_title = unescapeHTML(m.group(1))
+
+        xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml'
+        xml_code = self._download_webpage(xml_url, video_id,
+                    note=u'Downloading XML', errnote=u'Failed to download XML')
+
+        idoc = xml.etree.ElementTree.fromstring(xml_code)
+        last_type = idoc[-1]
+        filename = last_type.findall('./filename')[0].text
+        duration = float(last_type.findall('./duration')[0].text)
+
+        video_url = 'http://video2.spiegel.de/flash/' + filename
+        video_ext = filename.rpartition('.')[2]
+        info = {
+            'id': video_id,
+            'url': video_url,
+            'ext': video_ext,
+            'title': video_title,
+            'duration': duration,
+        }
+        return [info]
+
+
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
      The order does matter; the first extractor matched is the one handling the URL.
@@ -4119,6 +4268,7 @@ def gen_extractors():
          EscapistIE(),
          CollegeHumorIE(),
          XVideosIE(),
+        SoundcloudSetIE(),
          SoundcloudIE(),
          InfoQIE(),
          MixcloudIE(),
@@ -4134,7 +4284,6 @@ def gen_extractors():
          NBAIE(),
          JustinTVIE(),
          FunnyOrDieIE(),
-        TweetReelIE(),
          SteamIE(),
          UstreamIE(),
          RBMARadioIE(),
@@ -4142,6 +4291,7 @@ def gen_extractors():
          KeekIE(),
          TEDIE(),
          MySpassIE(),
+        SpiegelIE(),
          GenericIE()
      ]