Merge branch 'master' into extract_info_rewrite
authorJaime Marquínez Ferrándiz <jaimemf93@gmail.com>
Thu, 28 Mar 2013 12:02:04 +0000 (13:02 +0100)
committerJaime Marquínez Ferrándiz <jaimemf93@gmail.com>
Thu, 28 Mar 2013 12:20:33 +0000 (13:20 +0100)
1  2 
youtube_dl/FileDownloader.py
youtube_dl/InfoExtractors.py

Simple merge
index e714fa6b078a87f0520c661d9a73db71bf78df6b,835428f3232afb8a6aaeca5c72b64bf1c3cd11b7..dd4a776e4a50adf7e71ab4580bbbc2fcac81e65d
@@@ -126,26 -126,14 +126,32 @@@ class InfoExtractor(object)
      def _download_webpage(self, url_or_request, video_id, note=None, errnote=None):
          """ Returns the data of the page as a string """
          urlh = self._request_webpage(url_or_request, video_id, note, errnote)
+         content_type = urlh.headers.get('Content-Type', '')
+         m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+         if m:
+             encoding = m.group(1)
+         else:
+             encoding = 'utf-8'
          webpage_bytes = urlh.read()
-         return webpage_bytes.decode('utf-8', 'replace')
+         return webpage_bytes.decode(encoding, 'replace')
 +        
 +    #Methods for following #608
 +    #They set the correct value of the '_type' key
 +    def video_result(self, video_info):
 +        """Returns a video"""
 +        video_info['_type'] = 'video'
 +        return video_info
 +    def url_result(self, url, ie=None):
 +        """Returns a url that points to a page that should be processed"""
 +        #TODO: ie should be the class used for getting the info
 +        video_info = {'_type': 'url',
 +                      'url': url}
 +        return video_info
 +    def playlist_result(self, entries):
 +        """Returns a playlist"""
 +        video_info = {'_type': 'playlist',
 +                      'entries': entries}
 +        return video_info
  
  
  class YoutubeIE(InfoExtractor):
@@@ -1362,20 -1382,15 +1400,15 @@@ class GenericIE(InfoExtractor)
              return False
  
          self.report_following_redirect(new_url)
 -        self._downloader.download([new_url])
 -        return True
 +        return new_url
  
      def _real_extract(self, url):
 -        if self._test_redirect(url): return
 +        new_url = self._test_redirect(url)
 +        if new_url: return [self.url_result(new_url)]
  
          video_id = url.split('/')[-1]
-         request = compat_urllib_request.Request(url)
          try:
-             self.report_download_webpage(video_id)
-             webpage = compat_urllib_request.urlopen(request).read()
-         except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-             self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
-             return
+             webpage = self._download_webpage(url, video_id)
          except ValueError as err:
              # since this is the last-resort InfoExtractor, if
              # this error is thrown, it'll be thrown here