Support direct vimeo links (Closes #666)

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 136e4183ec8b89524a37cdeb617cc28a106a6fca..ac3ecea9252d56f9bc751be13cd6ad3ea5324753 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -5,6 +5,7 @@ from __future__ import absolute_import
  
  import base64
  import datetime
  
  import base64
  import datetime
+import itertools
  import netrc
  import os
  import re
  import netrc
  import os
  import re
@@ -35,15 +36,16 @@ class InfoExtractor(object):
      url:            Final video URL.
      title:          Video title, unescaped.
      ext:            Video filename extension.
      url:            Final video URL.
      title:          Video title, unescaped.
      ext:            Video filename extension.
-    uploader:       Full name of the video uploader.
-    upload_date:    Video upload date (YYYYMMDD).
  
      The following fields are optional:
  
      format:         The video format, defaults to ext (used for --get-format)
      thumbnail:      Full URL to a video thumbnail image.
      description:    One-line video description.
  
      The following fields are optional:
  
      format:         The video format, defaults to ext (used for --get-format)
      thumbnail:      Full URL to a video thumbnail image.
      description:    One-line video description.
+    uploader:       Full name of the video uploader.
+    upload_date:    Video upload date (YYYYMMDD).
      uploader_id:    Nickname or id of the video uploader.
      uploader_id:    Nickname or id of the video uploader.
+    location:       Physical location of the video.
      player_url:     SWF Player URL (used for rtmpdump).
      subtitles:      The .srt file contents.
      urlhandle:      [internal] The urlHandle to be used to download the file,
      player_url:     SWF Player URL (used for rtmpdump).
      subtitles:      The .srt file contents.
      urlhandle:      [internal] The urlHandle to be used to download the file,
@@ -106,19 +108,24 @@ class InfoExtractor(object):
      def IE_NAME(self):
          return type(self).__name__[:-2]
  
      def IE_NAME(self):
          return type(self).__name__[:-2]
  
-    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
+    def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
+        """ Returns the response handle """
          if note is None:
              note = u'Downloading video webpage'
          self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
          try:
          if note is None:
              note = u'Downloading video webpage'
          self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
          try:
-            urlh = compat_urllib_request.urlopen(url_or_request)
-            webpage_bytes = urlh.read()
-            return webpage_bytes.decode('utf-8', 'replace')
+            return compat_urllib_request.urlopen(url_or_request)
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              if errnote is None:
                  errnote = u'Unable to download webpage'
              raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
  
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              if errnote is None:
                  errnote = u'Unable to download webpage'
              raise ExtractorError(u'%s: %s' % (errnote, compat_str(err)), sys.exc_info()[2])
  
+    def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
+        """ Returns the data of the page as a string """
+        urlh = self._request_webpage(url_or_request, video_id, note, errnote)
+        webpage_bytes = urlh.read()
+        return webpage_bytes.decode('utf-8', 'replace')
+
  
  class YoutubeIE(InfoExtractor):
      """Information extractor for youtube.com."""
  
  class YoutubeIE(InfoExtractor):
      """Information extractor for youtube.com."""
@@ -257,13 +264,18 @@ class YoutubeIE(InfoExtractor):
              srt_lang = list(srt_lang_list.keys())[0]
          if not srt_lang in srt_lang_list:
              return (u'WARNING: no closed captions found in the specified language', None)
              srt_lang = list(srt_lang_list.keys())[0]
          if not srt_lang in srt_lang_list:
              return (u'WARNING: no closed captions found in the specified language', None)
-        request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
+        params = compat_urllib_parse.urlencode({
+            'lang': srt_lang,
+            'name': srt_lang_list[srt_lang].encode('utf-8'),
+            'v': video_id,
+        })
+        url = 'http://www.youtube.com/api/timedtext?' + params
          try:
          try:
-            srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8')
+            srt_xml = compat_urllib_request.urlopen(url).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
          if not srt_xml:
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
          if not srt_xml:
-            return (u'WARNING: unable to download video subtitles', None)
+            return (u'WARNING: Did not fetch video subtitles', None)
          return (None, self._closed_captions_xml_to_srt(srt_xml))
  
      def _print_formats(self, formats):
          return (None, self._closed_captions_xml_to_srt(srt_xml))
  
      def _print_formats(self, formats):
@@ -966,7 +978,7 @@ class VimeoIE(InfoExtractor):
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
      """Information extractor for vimeo.com."""
  
      # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)'
      IE_NAME = u'vimeo'
  
      def __init__(self, downloader=None):
      IE_NAME = u'vimeo'
  
      def __init__(self, downloader=None):
@@ -987,7 +999,11 @@ class VimeoIE(InfoExtractor):
              self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
              return
  
              self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
              return
  
-        video_id = mobj.group(1)
+        video_id = mobj.group('id')
+        if not mobj.group('proto'):
+            url = 'https://' + url
+        if mobj.group('direct_link'):
+            url = 'https://vimeo.com/' + video_id
  
          # Retrieve video webpage to extract further information
          request = compat_urllib_request.Request(url, None, std_headers)
  
          # Retrieve video webpage to extract further information
          request = compat_urllib_request.Request(url, None, std_headers)
@@ -1974,62 +1990,14 @@ class DepositFilesIE(InfoExtractor):
  class FacebookIE(InfoExtractor):
      """Information Extractor for Facebook"""
  
  class FacebookIE(InfoExtractor):
      """Information Extractor for Facebook"""
  
-    _WORKING = False
      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
      _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
      _NETRC_MACHINE = 'facebook'
      _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
      _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
      _NETRC_MACHINE = 'facebook'
-    _available_formats = ['video', 'highqual', 'lowqual']
-    _video_extensions = {
-        'video': 'mp4',
-        'highqual': 'mp4',
-        'lowqual': 'mp4',
-    }
      IE_NAME = u'facebook'
  
      IE_NAME = u'facebook'
  
-    def __init__(self, downloader=None):
-        InfoExtractor.__init__(self, downloader)
-
-    def _reporter(self, message):
-        """Add header and report message."""
-        self._downloader.to_screen(u'[facebook] %s' % message)
-
      def report_login(self):
          """Report attempt to log in."""
      def report_login(self):
          """Report attempt to log in."""
-        self._reporter(u'Logging in')
-
-    def report_video_webpage_download(self, video_id):
-        """Report attempt to download video webpage."""
-        self._reporter(u'%s: Downloading video webpage' % video_id)
-
-    def report_information_extraction(self, video_id):
-        """Report attempt to extract video information."""
-        self._reporter(u'%s: Extracting video information' % video_id)
-
-    def _parse_page(self, video_webpage):
-        """Extract video information from page"""
-        # General data
-        data = {'title': r'\("video_title", "(.*?)"\)',
-            'description': r'<div class="datawrap">(.*?)</div>',
-            'owner': r'\("video_owner_name", "(.*?)"\)',
-            'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
-            }
-        video_info = {}
-        for piece in data.keys():
-            mobj = re.search(data[piece], video_webpage)
-            if mobj is not None:
-                video_info[piece] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
-
-        # Video urls
-        video_urls = {}
-        for fmt in self._available_formats:
-            mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
-            if mobj is not None:
-                # URL is in a Javascript segment inside an escaped Unicode format within
-                # the generally utf-8 page
-                video_urls[fmt] = compat_urllib_parse.unquote_plus(mobj.group(1).decode("unicode_escape"))
-        video_info['video_urls'] = video_urls
-
-        return video_info
+        self._downloader.to_screen(u'[%s] Logging in' % self.IE_NAME)
  
      def _real_initialize(self):
          if self._downloader is None:
  
      def _real_initialize(self):
          if self._downloader is None:
@@ -2082,100 +2050,35 @@ class FacebookIE(InfoExtractor):
              return
          video_id = mobj.group('ID')
  
              return
          video_id = mobj.group('ID')
  
-        # Get video webpage
-        self.report_video_webpage_download(video_id)
-        request = compat_urllib_request.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
-        try:
-            page = compat_urllib_request.urlopen(request)
-            video_webpage = page.read()
-        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
-            return
-
-        # Start extracting information
-        self.report_information_extraction(video_id)
-
-        # Extract information
-        video_info = self._parse_page(video_webpage)
-
-        # uploader
-        if 'owner' not in video_info:
-            self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
-            return
-        video_uploader = video_info['owner']
-
-        # title
-        if 'title' not in video_info:
-            self._downloader.trouble(u'ERROR: unable to extract video title')
-            return
-        video_title = video_info['title']
-        video_title = video_title.decode('utf-8')
-
-        # thumbnail image
-        if 'thumbnail' not in video_info:
-            self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
-            video_thumbnail = ''
-        else:
-            video_thumbnail = video_info['thumbnail']
-
-        # upload date
-        upload_date = None
-        if 'upload_date' in video_info:
-            upload_time = video_info['upload_date']
-            timetuple = email.utils.parsedate_tz(upload_time)
-            if timetuple is not None:
-                try:
-                    upload_date = time.strftime('%Y%m%d', timetuple[0:9])
-                except:
-                    pass
-
-        # description
-        video_description = video_info.get('description', 'No description available.')
-
-        url_map = video_info['video_urls']
-        if url_map:
-            # Decide which formats to download
-            req_format = self._downloader.params.get('format', None)
-            format_limit = self._downloader.params.get('format_limit', None)
+        url = 'https://www.facebook.com/video/video.php?v=%s' % video_id
+        webpage = self._download_webpage(url, video_id)
  
  
-            if format_limit is not None and format_limit in self._available_formats:
-                format_list = self._available_formats[self._available_formats.index(format_limit):]
-            else:
-                format_list = self._available_formats
-            existing_formats = [x for x in format_list if x in url_map]
-            if len(existing_formats) == 0:
-                self._downloader.trouble(u'ERROR: no known formats available for video')
-                return
-            if req_format is None:
-                video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
-            elif req_format == 'worst':
-                video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
-            elif req_format == '-1':
-                video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
-            else:
-                # Specific format
-                if req_format not in url_map:
-                    self._downloader.trouble(u'ERROR: requested format not available')
-                    return
-                video_url_list = [(req_format, url_map[req_format])] # Specific format
+        BEFORE = '[["allowFullScreen","true"],["allowScriptAccess","always"],["salign","tl"],["scale","noscale"],["wmode","opaque"]].forEach(function(param) {swf.addParam(param[0], param[1]);});\n'
+        AFTER = '.forEach(function(variable) {swf.addVariable(variable[0], variable[1]);});'
+        m = re.search(re.escape(BEFORE) + '(.*?)' + re.escape(AFTER), webpage)
+        if not m:
+            raise ExtractorError(u'Cannot parse data')
+        data = dict(json.loads(m.group(1)))
+        params_raw = compat_urllib_parse.unquote(data['params'])
+        params = json.loads(params_raw)
+        video_url = params['hd_src']
+        video_duration = int(params['video_duration'])
+
+        m = re.search('<h2 class="uiHeaderTitle">([^<]+)</h2>', webpage)
+        if not m:
+            raise ExtractorError(u'Cannot find title in webpage')
+        video_title = unescapeHTML(m.group(1))
  
  
-        results = []
-        for format_param, video_real_url in video_url_list:
-            # Extension
-            video_extension = self._video_extensions.get(format_param, 'mp4')
+        info = {
+            'id': video_id,
+            'title': video_title,
+            'url': video_url,
+            'ext': 'mp4',
+            'duration': video_duration,
+            'thumbnail': params['thumbnail_src'],
+        }
+        return [info]
  
  
-            results.append({
-                'id':       video_id.decode('utf-8'),
-                'url':      video_real_url.decode('utf-8'),
-                'uploader': video_uploader.decode('utf-8'),
-                'upload_date':  upload_date,
-                'title':    video_title,
-                'ext':      video_extension.decode('utf-8'),
-                'format':   (format_param is None and u'NA' or format_param.decode('utf-8')),
-                'thumbnail':    video_thumbnail.decode('utf-8'),
-                'description':  video_description.decode('utf-8'),
-            })
-        return results
  
  class BlipTVIE(InfoExtractor):
      """Information extractor for blip.tv"""
  
  class BlipTVIE(InfoExtractor):
      """Information extractor for blip.tv"""
@@ -2204,6 +2107,7 @@ class BlipTVIE(InfoExtractor):
              cchar = '?'
          json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
          request = compat_urllib_request.Request(json_url)
              cchar = '?'
          json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
          request = compat_urllib_request.Request(json_url)
+        request.add_header('User-Agent', 'iTunes/10.6.1')
          self.report_extraction(mobj.group(1))
          info = None
          try:
          self.report_extraction(mobj.group(1))
          info = None
          try:
@@ -2224,8 +2128,7 @@ class BlipTVIE(InfoExtractor):
                      'urlhandle': urlh
                  }
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                      'urlhandle': urlh
                  }
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
-            return
+            raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err))
          if info is None: # Regular URL
              try:
                  json_code_bytes = urlh.read()
          if info is None: # Regular URL
              try:
                  json_code_bytes = urlh.read()
@@ -2258,13 +2161,13 @@ class BlipTVIE(InfoExtractor):
                      'format': data['media']['mimeType'],
                      'thumbnail': data['thumbnailUrl'],
                      'description': data['description'],
                      'format': data['media']['mimeType'],
                      'thumbnail': data['thumbnailUrl'],
                      'description': data['description'],
-                    'player_url': data['embedUrl']
+                    'player_url': data['embedUrl'],
+                    'user_agent': 'iTunes/10.6.1',
                  }
              except (ValueError,KeyError) as err:
                  self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
                  return
  
                  }
              except (ValueError,KeyError) as err:
                  self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
                  return
  
-        std_headers['User-Agent'] = 'iTunes/10.6.1'
          return [info]
  
  
          return [info]
  
  
@@ -2977,8 +2880,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          if mobj is None:
-            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
-            return
+            raise ExtractorError(u'Invalid URL: %s' % url)
  
          if mobj.group('course') and mobj.group('video'): # A specific video
              course = mobj.group('course')
  
          if mobj.group('course') and mobj.group('video'): # A specific video
              course = mobj.group('course')
@@ -3015,12 +2917,9 @@ class StanfordOpenClassroomIE(InfoExtractor):
                  'upload_date': None,
              }
  
                  'upload_date': None,
              }
  
-            self.report_download_webpage(info['id'])
-            try:
-                coursepage = compat_urllib_request.urlopen(url).read()
-            except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                self._downloader.trouble(u'ERROR: unable to download course info page: ' + compat_str(err))
-                return
+            coursepage = self._download_webpage(url, info['id'],
+                                        note='Downloading course info page',
+                                        errnote='Unable to download course info page')
  
              m = re.search('<h1>([^<]+)</h1>', coursepage)
              if m:
  
              m = re.search('<h1>([^<]+)</h1>', coursepage)
              if m:
@@ -3044,7 +2943,6 @@ class StanfordOpenClassroomIE(InfoExtractor):
                  assert entry['type'] == 'reference'
                  results += self.extract(entry['url'])
              return results
                  assert entry['type'] == 'reference'
                  results += self.extract(entry['url'])
              return results
-
          else: # Root page
              info = {
                  'id': 'Stanford OpenClassroom',
          else: # Root page
              info = {
                  'id': 'Stanford OpenClassroom',
@@ -3272,7 +3170,7 @@ class YoukuIE(InfoExtractor):
  class XNXXIE(InfoExtractor):
      """Information extractor for xnxx.com"""
  
  class XNXXIE(InfoExtractor):
      """Information extractor for xnxx.com"""
  
-    _VALID_URL = r'^http://video\.xnxx\.com/video([0-9]+)/(.*)'
+    _VALID_URL = r'^(?:https?://)?video\.xnxx\.com/video([0-9]+)/(.*)'
      IE_NAME = u'xnxx'
      VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
      VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
      IE_NAME = u'xnxx'
      VIDEO_URL_RE = r'flv_url=(.*?)&amp;'
      VIDEO_TITLE_RE = r'<title>(.*?)\s+-\s+XNXX.COM'
@@ -3524,17 +3422,25 @@ class JustinTVIE(InfoExtractor):
              return
  
          response = json.loads(webpage)
              return
  
          response = json.loads(webpage)
+        if type(response) != list:
+            error_text = response.get('error', 'unknown error')
+            self._downloader.trouble(u'ERROR: Justin.tv API: %s' % error_text)
+            return
          info = []
          for clip in response:
              video_url = clip['video_file_url']
              if video_url:
                  video_extension = os.path.splitext(video_url)[1][1:]
          info = []
          for clip in response:
              video_url = clip['video_file_url']
              if video_url:
                  video_extension = os.path.splitext(video_url)[1][1:]
-                video_date = re.sub('-', '', clip['created_on'][:10])
+                video_date = re.sub('-', '', clip['start_time'][:10])
+                video_uploader_id = clip.get('user_id', clip.get('channel_id'))
+                video_id = clip['id']
+                video_title = clip.get('title', video_id)
                  info.append({
                  info.append({
-                    'id': clip['id'],
+                    'id': video_id,
                      'url': video_url,
                      'url': video_url,
-                    'title': clip['title'],
-                    'uploader': clip.get('user_id', clip.get('channel_id')),
+                    'title': video_title,
+                    'uploader': clip.get('channel_name', video_uploader_id),
+                    'uploader_id': video_uploader_id,
                      'upload_date': video_date,
                      'ext': video_extension,
                  })
                      'upload_date': video_date,
                      'ext': video_extension,
                  })
@@ -3553,7 +3459,7 @@ class JustinTVIE(InfoExtractor):
              paged = True
              api += '/channel/archives/%s.json'
          else:
              paged = True
              api += '/channel/archives/%s.json'
          else:
-            api += '/clip/show/%s.json'
+            api += '/broadcast/by_archive/%s.json'
          api = api % (video_id,)
  
          self.report_extraction(video_id)
          api = api % (video_id,)
  
          self.report_extraction(video_id)
@@ -3693,11 +3599,11 @@ class SteamIE(InfoExtractor):
                    }
              videos.append(info)
          return videos
                    }
              videos.append(info)
          return videos
-        
+
  class UstreamIE(InfoExtractor):
  class UstreamIE(InfoExtractor):
-    _VALID_URL = r'http://www.ustream.tv/recorded/(?P<videoID>\d+)'
+    _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)'
      IE_NAME = u'ustream'
      IE_NAME = u'ustream'
-    
+
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
          video_id = m.group('videoID')
      def _real_extract(self, url):
          m = re.match(self._VALID_URL, url)
          video_id = m.group('videoID')
@@ -3716,6 +3622,292 @@ class UstreamIE(InfoExtractor):
                    }
          return [info]
  
                    }
          return [info]
  
+class RBMARadioIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
+
+    def _real_extract(self, url):
+        m = re.match(self._VALID_URL, url)
+        video_id = m.group('videoID')
+
+        webpage = self._download_webpage(url, video_id)
+        m = re.search(r'<script>window.gon = {.*?};gon\.show=(.+?);</script>', webpage)
+        if not m:
+            raise ExtractorError(u'Cannot find metadata')
+        json_data = m.group(1)
+
+        try:
+            data = json.loads(json_data)
+        except ValueError as e:
+            raise ExtractorError(u'Invalid JSON: ' + str(e))
+
+        video_url = data['akamai_url'] + '&cbr=256'
+        url_parts = compat_urllib_parse_urlparse(video_url)
+        video_ext = url_parts.path.rpartition('.')[2]
+        info = {
+                'id': video_id,
+                'url': video_url,
+                'ext': video_ext,
+                'title': data['title'],
+                'description': data.get('teaser_text'),
+                'location': data.get('country_of_origin'),
+                'uploader': data.get('host', {}).get('name'),
+                'uploader_id': data.get('host', {}).get('slug'),
+                'thumbnail': data.get('image', {}).get('large_url_2x'),
+                'duration': data.get('duration'),
+        }
+        return [info]
+
+
+class YouPornIE(InfoExtractor):
+    """Information extractor for youporn.com."""
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youporn\.com/watch/(?P<videoid>[0-9]+)/(?P<title>[^/]+)'
+   
+    def _print_formats(self, formats):
+        """Print all available formats"""
+        print(u'Available formats:')
+        print(u'ext\t\tformat')
+        print(u'---------------------------------')
+        for format in formats:
+            print(u'%s\t\t%s'  % (format['ext'], format['format']))
+
+    def _specific(self, req_format, formats):
+        for x in formats:
+            if(x["format"]==req_format):
+                return x
+        return None
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+            return
+
+        video_id = mobj.group('videoid')
+
+        req = compat_urllib_request.Request(url)
+        req.add_header('Cookie', 'age_verified=1')
+        webpage = self._download_webpage(req, video_id)
+
+        # Get the video title
+        result = re.search(r'videoTitleArea">(?P<title>.*)</h1>', webpage)
+        if result is None:
+            raise ExtractorError(u'ERROR: unable to extract video title')
+        video_title = result.group('title').strip()
+
+        # Get the video date
+        result = re.search(r'Date:</b>(?P<date>.*)</li>', webpage)
+        if result is None:
+            self._downloader.to_stderr(u'WARNING: unable to extract video date')
+            upload_date = None
+        else:
+            upload_date = result.group('date').strip()
+
+        # Get the video uploader
+        result = re.search(r'Submitted:</b>(?P<uploader>.*)</li>', webpage)
+        if result is None:
+            self._downloader.to_stderr(u'ERROR: unable to extract uploader')
+            video_uploader = None
+        else:
+            video_uploader = result.group('uploader').strip()
+            video_uploader = clean_html( video_uploader )
+
+        # Get all of the formats available
+        DOWNLOAD_LIST_RE = r'(?s)<ul class="downloadList">(?P<download_list>.*?)</ul>'
+        result = re.search(DOWNLOAD_LIST_RE, webpage)
+        if result is None:
+            raise ExtractorError(u'Unable to extract download list')
+        download_list_html = result.group('download_list').strip()
+
+        # Get all of the links from the page
+        LINK_RE = r'(?s)<a href="(?P<url>[^"]+)">'
+        links = re.findall(LINK_RE, download_list_html)
+        if(len(links) == 0):
+            raise ExtractorError(u'ERROR: no known formats available for video')
+        
+        self._downloader.to_screen(u'[youporn] Links found: %d' % len(links))   
+
+        formats = []
+        for link in links:
+
+            # A link looks like this:
+            # http://cdn1.download.youporn.phncdn.com/201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4?nvb=20121113051249&nva=20121114051249&ir=1200&sr=1200&hash=014b882080310e95fb6a0
+            # A path looks like this:
+            # /201210/31/8004515/480p_370k_8004515/YouPorn%20-%20Nubile%20Films%20The%20Pillow%20Fight.mp4
+            video_url = unescapeHTML( link )
+            path = compat_urllib_parse_urlparse( video_url ).path
+            extension = os.path.splitext( path )[1][1:]
+            format = path.split('/')[4].split('_')[:2]
+            size = format[0]
+            bitrate = format[1]
+            format = "-".join( format )
+            title = u'%s-%s-%s' % (video_title, size, bitrate)
+
+            formats.append({
+                'id': video_id,
+                'url': video_url,
+                'uploader': video_uploader,
+                'upload_date': upload_date,
+                'title': title,
+                'ext': extension,
+                'format': format,
+                'thumbnail': None,
+                'description': None,
+                'player_url': None
+            })
+
+        if self._downloader.params.get('listformats', None):
+            self._print_formats(formats)
+            return
+
+        req_format = self._downloader.params.get('format', None)
+        self._downloader.to_screen(u'[youporn] Format: %s' % req_format)
+
+        if req_format is None or req_format == 'best':
+            return [formats[0]]
+        elif req_format == 'worst':
+            return [formats[-1]]
+        elif req_format in ('-1', 'all'):
+            return formats
+        else:
+            format = self._specific( req_format, formats )
+            if result is None:
+                self._downloader.trouble(u'ERROR: requested format not available')
+                return
+            return [format]
+
+        
+
+class PornotubeIE(InfoExtractor):
+    """Information extractor for pornotube.com."""
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?pornotube\.com(/c/(?P<channel>[0-9]+))?(/m/(?P<videoid>[0-9]+))(/(?P<title>.+))$'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+            return
+
+        video_id = mobj.group('videoid')
+        video_title = mobj.group('title')
+
+        # Get webpage content
+        webpage = self._download_webpage(url, video_id)
+
+        # Get the video URL
+        VIDEO_URL_RE = r'url: "(?P<url>http://video[0-9].pornotube.com/.+\.flv)",'
+        result = re.search(VIDEO_URL_RE, webpage)
+        if result is None:
+            self._downloader.trouble(u'ERROR: unable to extract video url')
+            return
+        video_url = compat_urllib_parse.unquote(result.group('url'))
+
+        #Get the uploaded date
+        VIDEO_UPLOADED_RE = r'<div class="video_added_by">Added (?P<date>[0-9\/]+) by'
+        result = re.search(VIDEO_UPLOADED_RE, webpage)
+        if result is None:
+            self._downloader.trouble(u'ERROR: unable to extract video title')
+            return
+        upload_date = result.group('date')
+
+        info = {'id': video_id,
+                'url': video_url,
+                'uploader': None,
+                'upload_date': upload_date,
+                'title': video_title,
+                'ext': 'flv',
+                'format': 'flv'}
+
+        return [info]
+
+class YouJizzIE(InfoExtractor):
+    """Information extractor for youjizz.com."""
+    _VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/(?P<videoid>[^.]+).html$'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+            return
+
+        video_id = mobj.group('videoid')
+
+        # Get webpage content
+        webpage = self._download_webpage(url, video_id)
+
+        # Get the video title
+        result = re.search(r'<title>(?P<title>.*)</title>', webpage)
+        if result is None:
+            raise ExtractorError(u'ERROR: unable to extract video title')
+        video_title = result.group('title').strip()
+
+        # Get the embed page
+        result = re.search(r'https?://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)', webpage)
+        if result is None:
+            raise ExtractorError(u'ERROR: unable to extract embed page')
+
+        embed_page_url = result.group(0).strip()
+        video_id = result.group('videoid')
+    
+        webpage = self._download_webpage(embed_page_url, video_id)
+
+        # Get the video URL
+        result = re.search(r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);', webpage)
+        if result is None:
+            raise ExtractorError(u'ERROR: unable to extract video url')
+        video_url = result.group('source')
+
+        info = {'id': video_id,
+                'url': video_url,
+                'title': video_title,
+                'ext': 'flv',
+                'format': 'flv',
+                'player_url': embed_page_url}
+
+        return [info]
+
+class EightTracksIE(InfoExtractor):
+    IE_NAME = '8tracks'
+    _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            raise ExtractorError(u'Invalid URL: %s' % url)
+        playlist_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, playlist_id)
+
+        m = re.search(r"new TRAX.Mix\((.*?)\);\n*\s*TRAX.initSearchAutocomplete\('#search'\);", webpage, flags=re.DOTALL)
+        if not m:
+            raise ExtractorError(u'Cannot find trax information')
+        json_like = m.group(1)
+        data = json.loads(json_like)
+
+        session = str(random.randint(0, 1000000000))
+        mix_id = data['id']
+        track_count = data['tracks_count']
+        first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
+        next_url = first_url
+        res = []
+        for i in itertools.count():
+            api_json = self._download_webpage(next_url, playlist_id,
+                note=u'Downloading song information %s/%s' % (str(i+1), track_count),
+                errnote=u'Failed to download song information')
+            api_data = json.loads(api_json)
+            track_data = api_data[u'set']['track']
+            info = {
+                'id': track_data['id'],
+                'url': track_data['track_file_stream_url'],
+                'title': track_data['performer'] + u' - ' + track_data['name'],
+                'raw_title': track_data['name'],
+                'uploader_id': data['user']['login'],
+                'ext': 'm4a',
+            }
+            res.append(info)
+            if api_data['set']['at_last_track']:
+                break
+            next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
+        return res
  
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
  
  def gen_extractors():
      """ Return a list of an instance of every supported extractor.
@@ -3750,6 +3942,9 @@ def gen_extractors():
          MTVIE(),
          YoukuIE(),
          XNXXIE(),
          MTVIE(),
          YoukuIE(),
          XNXXIE(),
+        YouJizzIE(),
+        PornotubeIE(),
+        YouPornIE(),
          GooglePlusIE(),
          ArteTvIE(),
          NBAIE(),
          GooglePlusIE(),
          ArteTvIE(),
          NBAIE(),
@@ -3758,6 +3953,8 @@ def gen_extractors():
          TweetReelIE(),
          SteamIE(),
          UstreamIE(),
          TweetReelIE(),
          SteamIE(),
          UstreamIE(),
+        RBMARadioIE(),
+        EightTracksIE(),
          GenericIE()
      ]
  
          GenericIE()
      ]