Merge pull request #601 from paullik/no-post-overwrites

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index b53071c43373b31492fb79d0a479b47af59f8578..697c031c5119176874558aef23eadb163e45fcdd 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -23,7 +23,7 @@ class InfoExtractor(object):
      Information extractors are the classes that, given a URL, extract
      information about the video (or videos) the URL refers to. This
      information includes the real video URL, the video title, author and
      Information extractors are the classes that, given a URL, extract
      information about the video (or videos) the URL refers to. This
      information includes the real video URL, the video title, author and
-    others. The information is stored in a dictionary which is then 
+    others. The information is stored in a dictionary which is then
      passed to the FileDownloader. The FileDownloader processes this
      information possibly downloading the video to the file system, among
      other possible outcomes.
      passed to the FileDownloader. The FileDownloader processes this
      information possibly downloading the video to the file system, among
      other possible outcomes.
@@ -32,7 +32,7 @@ class InfoExtractor(object):
  
      id:             Video identifier.
      url:            Final video URL.
  
      id:             Video identifier.
      url:            Final video URL.
-    uploader:       Nickname of the video uploader, unescaped.
+    uploader:       Full name of the video uploader, unescaped.
      upload_date:    Video upload date (YYYYMMDD).
      title:          Video title, unescaped.
      ext:            Video filename extension.
      upload_date:    Video upload date (YYYYMMDD).
      title:          Video title, unescaped.
      ext:            Video filename extension.
@@ -42,6 +42,7 @@ class InfoExtractor(object):
      format:         The video format, defaults to ext (used for --get-format)
      thumbnail:      Full URL to a video thumbnail image.
      description:    One-line video description.
      format:         The video format, defaults to ext (used for --get-format)
      thumbnail:      Full URL to a video thumbnail image.
      description:    One-line video description.
+    uploader_id:    Nickname or id of the video uploader.
      player_url:     SWF Player URL (used for rtmpdump).
      subtitles:      The .srt file contents.
      urlhandle:      [internal] The urlHandle to be used to download the file,
      player_url:     SWF Player URL (used for rtmpdump).
      subtitles:      The .srt file contents.
      urlhandle:      [internal] The urlHandle to be used to download the file,
@@ -159,7 +160,7 @@ class YoutubeIE(InfoExtractor):
          '44': '480x854',
          '45': '720x1280',
          '46': '1080x1920',
          '44': '480x854',
          '45': '720x1280',
          '46': '1080x1920',
-    }   
+    }
      IE_NAME = u'youtube'
  
      def suitable(self, url):
      IE_NAME = u'youtube'
  
      def suitable(self, url):
@@ -219,6 +220,34 @@ class YoutubeIE(InfoExtractor):
              srt += caption + '\n\n'
          return srt
  
              srt += caption + '\n\n'
          return srt
  
+    def _extract_subtitles(self, video_id):
+        self.report_video_subtitles_download(video_id)
+        request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
+        try:
+            srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+        srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
+        srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
+        if not srt_lang_list:
+            return (u'WARNING: video has no closed captions', None)
+        if self._downloader.params.get('subtitleslang', False):
+            srt_lang = self._downloader.params.get('subtitleslang')
+        elif 'en' in srt_lang_list:
+            srt_lang = 'en'
+        else:
+            srt_lang = list(srt_lang_list.keys())[0]
+        if not srt_lang in srt_lang_list:
+            return (u'WARNING: no closed captions found in the specified language', None)
+        request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
+        try:
+            srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+        if not srt_xml:
+            return (u'WARNING: unable to download video subtitles', None)
+        return (None, self._closed_captions_xml_to_srt(srt_xml))
+
      def _print_formats(self, formats):
          print('Available formats:')
          for x in formats:
      def _print_formats(self, formats):
          print('Available formats:')
          for x in formats:
@@ -356,10 +385,18 @@ class YoutubeIE(InfoExtractor):
  
          # uploader
          if 'author' not in video_info:
  
          # uploader
          if 'author' not in video_info:
-            self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+            self._downloader.trouble(u'ERROR: unable to extract uploader name')
              return
          video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
  
              return
          video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0])
  
+        # uploader_id
+        video_uploader_id = None
+        mobj = re.search(r'<link itemprop="url" href="http://www.youtube.com/user/([^"]+)">', video_webpage)
+        if mobj is not None:
+            video_uploader_id = mobj.group(1)
+        else:
+            self._downloader.trouble(u'WARNING: unable to extract uploader nickname')
+
          # title
          if 'title' not in video_info:
              self._downloader.trouble(u'ERROR: unable to extract video title')
          # title
          if 'title' not in video_info:
              self._downloader.trouble(u'ERROR: unable to extract video title')
@@ -395,35 +432,9 @@ class YoutubeIE(InfoExtractor):
          # closed captions
          video_subtitles = None
          if self._downloader.params.get('writesubtitles', False):
          # closed captions
          video_subtitles = None
          if self._downloader.params.get('writesubtitles', False):
-            try:
-                self.report_video_subtitles_download(video_id)
-                request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id)
-                try:
-                    srt_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
-                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                    raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
-                srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list)
-                srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list)
-                if not srt_lang_list:
-                    raise Trouble(u'WARNING: video has no closed captions')
-                if self._downloader.params.get('subtitleslang', False):
-                    srt_lang = self._downloader.params.get('subtitleslang')
-                elif 'en' in srt_lang_list:
-                    srt_lang = 'en'
-                else:
-                    srt_lang = srt_lang_list.keys()[0]
-                if not srt_lang in srt_lang_list:
-                    raise Trouble(u'WARNING: no closed captions found in the specified language')
-                request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id))
-                try:
-                    srt_xml = compat_urllib_request.urlopen(request).read().decode('utf-8')
-                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                    raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err))
-                if not srt_xml:
-                    raise Trouble(u'WARNING: unable to download video subtitles')
-                video_subtitles = self._closed_captions_xml_to_srt(srt_xml)
-            except Trouble as trouble:
-                self._downloader.trouble(str(trouble))
+            (srt_error, video_subtitles) = self._extract_subtitles(video_id)
+            if srt_error:
+                self._downloader.trouble(srt_error)
  
          if 'length_seconds' not in video_info:
              self._downloader.trouble(u'WARNING: unable to extract video duration')
  
          if 'length_seconds' not in video_info:
              self._downloader.trouble(u'WARNING: unable to extract video duration')
@@ -443,7 +454,7 @@ class YoutubeIE(InfoExtractor):
          elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
              url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
              url_data = [compat_parse_qs(uds) for uds in url_data_strs]
          elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
              url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
              url_data = [compat_parse_qs(uds) for uds in url_data_strs]
-            url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
+            url_data = [ud for ud in url_data if 'itag' in ud and 'url' in ud]
              url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data)
  
              format_limit = self._downloader.params.get('format_limit', None)
              url_map = dict((ud['itag'][0], ud['url'][0] + '&signature=' + ud['sig'][0]) for ud in url_data)
  
              format_limit = self._downloader.params.get('format_limit', None)
@@ -493,6 +504,7 @@ class YoutubeIE(InfoExtractor):
                  'id':       video_id,
                  'url':      video_real_url,
                  'uploader': video_uploader,
                  'id':       video_id,
                  'url':      video_real_url,
                  'uploader': video_uploader,
+                'uploader_id': video_uploader_id,
                  'upload_date':  upload_date,
                  'title':    video_title,
                  'ext':      video_extension,
                  'upload_date':  upload_date,
                  'title':    video_title,
                  'ext':      video_extension,
@@ -988,26 +1000,27 @@ class VimeoIE(InfoExtractor):
          except:
              self._downloader.trouble(u'ERROR: unable to extract info section')
              return
          except:
              self._downloader.trouble(u'ERROR: unable to extract info section')
              return
-        
+
          # Extract title
          video_title = config["video"]["title"]
  
          # Extract title
          video_title = config["video"]["title"]
  
-        # Extract uploader
+        # Extract uploader and uploader_id
          video_uploader = config["video"]["owner"]["name"]
          video_uploader = config["video"]["owner"]["name"]
+        video_uploader_id = config["video"]["owner"]["url"].split('/')[-1]
  
          # Extract video thumbnail
          video_thumbnail = config["video"]["thumbnail"]
  
          # Extract video description
  
          # Extract video thumbnail
          video_thumbnail = config["video"]["thumbnail"]
  
          # Extract video description
-        video_description = get_element_by_id("description", webpage)
+        video_description = get_element_by_attribute("itemprop", "description", webpage)
          if video_description: video_description = clean_html(video_description)
          else: video_description = ''
  
          # Extract upload date
          video_upload_date = None
          if video_description: video_description = clean_html(video_description)
          else: video_description = ''
  
          # Extract upload date
          video_upload_date = None
-        mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
+        mobj = re.search(r'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage)
          if mobj is not None:
          if mobj is not None:
-            video_upload_date = mobj.group(1)
+            video_upload_date = mobj.group(1) + mobj.group(2) + mobj.group(3)
  
          # Vimeo specific: extract request signature and timestamp
          sig = config['request']['signature']
  
          # Vimeo specific: extract request signature and timestamp
          sig = config['request']['signature']
@@ -1045,6 +1058,7 @@ class VimeoIE(InfoExtractor):
              'id':       video_id,
              'url':      video_url,
              'uploader': video_uploader,
              'id':       video_id,
              'url':      video_url,
              'uploader': video_uploader,
+            'uploader_id': video_uploader_id,
              'upload_date':  video_upload_date,
              'title':    video_title,
              'ext':      video_extension,
              'upload_date':  video_upload_date,
              'title':    video_title,
              'ext':      video_extension,
@@ -1211,7 +1225,7 @@ class GenericIE(InfoExtractor):
      def report_following_redirect(self, new_url):
          """Report information extraction."""
          self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
      def report_following_redirect(self, new_url):
          """Report information extraction."""
          self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
-        
+
      def _test_redirect(self, url):
          """Check if it is a redirect, like url shorteners, in case restart chain."""
          class HeadRequest(compat_urllib_request.Request):
      def _test_redirect(self, url):
          """Check if it is a redirect, like url shorteners, in case restart chain."""
          class HeadRequest(compat_urllib_request.Request):
@@ -1220,38 +1234,38 @@ class GenericIE(InfoExtractor):
  
          class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
              """
  
          class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
              """
-            Subclass the HTTPRedirectHandler to make it use our 
+            Subclass the HTTPRedirectHandler to make it use our
              HeadRequest also on the redirected URL
              """
              HeadRequest also on the redirected URL
              """
-            def redirect_request(self, req, fp, code, msg, headers, newurl): 
+            def redirect_request(self, req, fp, code, msg, headers, newurl):
                  if code in (301, 302, 303, 307):
                  if code in (301, 302, 303, 307):
-                    newurl = newurl.replace(' ', '%20') 
+                    newurl = newurl.replace(' ', '%20')
                      newheaders = dict((k,v) for k,v in req.headers.items()
                                        if k.lower() not in ("content-length", "content-type"))
                      newheaders = dict((k,v) for k,v in req.headers.items()
                                        if k.lower() not in ("content-length", "content-type"))
-                    return HeadRequest(newurl, 
+                    return HeadRequest(newurl,
                                         headers=newheaders,
                                         headers=newheaders,
-                                       origin_req_host=req.get_origin_req_host(), 
-                                       unverifiable=True) 
-                else: 
-                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp) 
+                                       origin_req_host=req.get_origin_req_host(),
+                                       unverifiable=True)
+                else:
+                    raise compat_urllib_error.HTTPError(req.get_full_url(), code, msg, headers, fp)
  
          class HTTPMethodFallback(compat_urllib_request.BaseHandler):
              """
              Fallback to GET if HEAD is not allowed (405 HTTP error)
              """
  
          class HTTPMethodFallback(compat_urllib_request.BaseHandler):
              """
              Fallback to GET if HEAD is not allowed (405 HTTP error)
              """
-            def http_error_405(self, req, fp, code, msg, headers): 
+            def http_error_405(self, req, fp, code, msg, headers):
                  fp.read()
                  fp.close()
  
                  newheaders = dict((k,v) for k,v in req.headers.items()
                                    if k.lower() not in ("content-length", "content-type"))
                  fp.read()
                  fp.close()
  
                  newheaders = dict((k,v) for k,v in req.headers.items()
                                    if k.lower() not in ("content-length", "content-type"))
-                return self.parent.open(compat_urllib_request.Request(req.get_full_url(), 
-                                                 headers=newheaders, 
-                                                 origin_req_host=req.get_origin_req_host(), 
+                return self.parent.open(compat_urllib_request.Request(req.get_full_url(),
+                                                 headers=newheaders,
+                                                 origin_req_host=req.get_origin_req_host(),
                                                   unverifiable=True))
  
          # Build our opener
                                                   unverifiable=True))
  
          # Build our opener
-        opener = compat_urllib_request.OpenerDirector() 
+        opener = compat_urllib_request.OpenerDirector()
          for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
                          HTTPMethodFallback, HEADRedirectHandler,
                          compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
          for handler in [compat_urllib_request.HTTPHandler, compat_urllib_request.HTTPDefaultErrorHandler,
                          HTTPMethodFallback, HEADRedirectHandler,
                          compat_urllib_error.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
@@ -2113,7 +2127,7 @@ class FacebookIE(InfoExtractor):
          video_description = video_info.get('description', 'No description available.')
  
          url_map = video_info['video_urls']
          video_description = video_info.get('description', 'No description available.')
  
          url_map = video_info['video_urls']
-        if len(url_map.keys()) > 0:
+        if len(list(url_map.keys())) > 0:
              # Decide which formats to download
              req_format = self._downloader.params.get('format', None)
              format_limit = self._downloader.params.get('format_limit', None)
              # Decide which formats to download
              req_format = self._downloader.params.get('format', None)
              format_limit = self._downloader.params.get('format_limit', None)
@@ -2256,7 +2270,7 @@ class MyVideoIE(InfoExtractor):
  
      def __init__(self, downloader=None):
          InfoExtractor.__init__(self, downloader)
  
      def __init__(self, downloader=None):
          InfoExtractor.__init__(self, downloader)
-    
+
      def report_download_webpage(self, video_id):
          """Report webpage download."""
          self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
      def report_download_webpage(self, video_id):
          """Report webpage download."""
          self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
@@ -2310,10 +2324,10 @@ class ComedyCentralIE(InfoExtractor):
      """Information extractor for The Daily Show and Colbert Report """
  
      # urls can be abbreviations like :thedailyshow or :colbert
      """Information extractor for The Daily Show and Colbert Report """
  
      # urls can be abbreviations like :thedailyshow or :colbert
-    # urls for episodes like: 
+    # urls for episodes like:
      # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
      #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
      # or urls for clips like: http://www.thedailyshow.com/watch/mon-december-10-2012/any-given-gun-day
      #                     or: http://www.colbertnation.com/the-colbert-report-videos/421667/november-29-2012/moon-shattering-news
-    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524    
+    #                     or: http://www.colbertnation.com/the-colbert-report-collections/422008/festival-of-lights/79524
      _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
                        |(https?://)?(www\.)?
                            (?P<showname>thedailyshow|colbertnation)\.com/
      _VALID_URL = r"""^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport)
                        |(https?://)?(www\.)?
                            (?P<showname>thedailyshow|colbertnation)\.com/
@@ -2321,7 +2335,7 @@ class ComedyCentralIE(InfoExtractor):
                            (?P<clip>
                                (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                                |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
                            (?P<clip>
                                (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?))
                                |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*)))))
-                     $"""                        
+                     $"""
      IE_NAME = u'comedycentral'
  
      _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
      IE_NAME = u'comedycentral'
  
      _available_formats = ['3500', '2200', '1700', '1200', '750', '400']
@@ -2425,7 +2439,7 @@ class ComedyCentralIE(InfoExtractor):
                  return
              else:
                  mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
                  return
              else:
                  mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])]
-        
+
          playerUrl_raw = mMovieParams[0][0]
          self.report_player_url(epTitle)
          try:
          playerUrl_raw = mMovieParams[0][0]
          self.report_player_url(epTitle)
          try:
@@ -2474,7 +2488,7 @@ class ComedyCentralIE(InfoExtractor):
              if len(turls) == 0:
                  self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                  continue
              if len(turls) == 0:
                  self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
                  continue
-            
+
              if self._downloader.params.get('listformats', None):
                  self._print_formats([i[0] for i in turls])
                  return
              if self._downloader.params.get('listformats', None):
                  self._print_formats([i[0] for i in turls])
                  return
@@ -2514,7 +2528,7 @@ class ComedyCentralIE(InfoExtractor):
              }
  
              results.append(info)
              }
  
              results.append(info)
-            
+
          return results
  
  
          return results
  
  
@@ -2973,7 +2987,7 @@ class MixcloudIE(InfoExtractor):
                  if file_url is not None:
                      break # got it!
          else:
                  if file_url is not None:
                      break # got it!
          else:
-            if req_format not in formats.keys():
+            if req_format not in list(formats.keys()):
                  self._downloader.trouble(u'ERROR: format is not available')
                  return
  
                  self._downloader.trouble(u'ERROR: format is not available')
                  return
  
@@ -3078,7 +3092,7 @@ class StanfordOpenClassroomIE(InfoExtractor):
                  assert entry['type'] == 'reference'
                  results += self.extract(entry['url'])
              return results
                  assert entry['type'] == 'reference'
                  results += self.extract(entry['url'])
              return results
-            
+
          else: # Root page
              info = {
                  'id': 'Stanford OpenClassroom',
          else: # Root page
              info = {
                  'id': 'Stanford OpenClassroom',
@@ -3152,7 +3166,7 @@ class MTVIE(InfoExtractor):
              self._downloader.trouble(u'ERROR: unable to extract performer')
              return
          performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
              self._downloader.trouble(u'ERROR: unable to extract performer')
              return
          performer = unescapeHTML(mobj.group(1).decode('iso-8859-1'))
-        video_title = performer + ' - ' + song_name 
+        video_title = performer + ' - ' + song_name
  
          mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
          if mobj is None:
  
          mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
          if mobj is None:
@@ -3272,7 +3286,7 @@ class YoukuIE(InfoExtractor):
              seed = config['data'][0]['seed']
  
              format = self._downloader.params.get('format', None)
              seed = config['data'][0]['seed']
  
              format = self._downloader.params.get('format', None)
-            supported_format = config['data'][0]['streamfileids'].keys()
+            supported_format = list(config['data'][0]['streamfileids'].keys())
  
              if format is None or format == 'best':
                  if 'hd2' in supported_format:
  
              if format is None or format == 'best':
                  if 'hd2' in supported_format:
@@ -3581,7 +3595,7 @@ class JustinTVIE(InfoExtractor):
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: unable to download video info JSON: %s' % compat_str(err))
              return
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
              self._downloader.trouble(u'ERROR: unable to download video info JSON: %s' % compat_str(err))
              return
-        
+
          response = json.loads(webpage)
          info = []
          for clip in response:
          response = json.loads(webpage)
          info = []
          for clip in response:
@@ -3604,7 +3618,7 @@ class JustinTVIE(InfoExtractor):
          if mobj is None:
              self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
              return
          if mobj is None:
              self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
              return
-        
+
          api = 'http://api.justin.tv'
          video_id = mobj.group(mobj.lastindex)
          paged = False
          api = 'http://api.justin.tv'
          video_id = mobj.group(mobj.lastindex)
          paged = False
@@ -3614,9 +3628,9 @@ class JustinTVIE(InfoExtractor):
          else:
              api += '/clip/show/%s.json'
          api = api % (video_id,)
          else:
              api += '/clip/show/%s.json'
          api = api % (video_id,)
-        
+
          self.report_extraction(video_id)
          self.report_extraction(video_id)
-        
+
          info = []
          offset = 0
          limit = self._JUSTIN_PAGE_LIMIT
          info = []
          offset = 0
          limit = self._JUSTIN_PAGE_LIMIT
@@ -3630,3 +3644,52 @@ class JustinTVIE(InfoExtractor):
                  break
              offset += limit
          return info
                  break
              offset += limit
          return info
+
+class FunnyOrDieIE(InfoExtractor):
+    _VALID_URL = r'^(?:https?://)?(?:www\.)?funnyordie\.com/videos/(?P<id>[0-9a-f]+)/.*$'
+    IE_NAME = u'FunnyOrDie'
+
+    def report_extraction(self, video_id):
+        self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        if mobj is None:
+            self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+            return
+
+        video_id = mobj.group('id')
+        self.report_extraction(video_id)
+        try:
+            urlh = compat_urllib_request.urlopen(url)
+            webpage_bytes = urlh.read()
+            webpage = webpage_bytes.decode('utf-8', 'ignore')
+        except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+            self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err))
+            return
+
+        m = re.search(r'<video[^>]*>\s*<source[^>]*>\s*<source src="(?P<url>[^"]+)"', webpage, re.DOTALL)
+        if not m:
+            self._downloader.trouble(u'ERROR: unable to find video information')
+        video_url = unescapeHTML(m.group('url'))
+        print(video_url)
+
+        m = re.search(r"class='player_page_h1'>\s+<a.*?>(?P<title>.*?)</a>", webpage)
+        if not m:
+            self._downloader.trouble(u'Cannot find video title')
+        title = unescapeHTML(m.group('title'))
+
+        m = re.search(r'<meta property="og:description" content="(?P<desc>.*?)"', webpage)
+        if m:
+            desc = unescapeHTML(m.group('desc'))
+        else:
+            desc = None
+
+        info = {
+            'id': video_id,
+            'url': video_url,
+            'ext': 'mp4',
+            'title': title,
+            'description': desc,
+        }
+        return [info]