Merge branch 'master' into extract_info_rewrite

[youtube-dl] / youtube_dl / InfoExtractors.py
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py

index 8caace3af4fe55618537de001841bb6fbe2aa350..81eaddc7260333e70a01be3ea354edfc7513f339 100755 (executable)
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -134,6 +134,28 @@ class InfoExtractor(object):
              encoding = 'utf-8'
          webpage_bytes = urlh.read()
          return webpage_bytes.decode(encoding, 'replace')
+        
+    #Methods for following #608
+    #They set the correct value of the '_type' key
+    def video_result(self, video_info):
+        """Returns a video"""
+        video_info['_type'] = 'video'
+        return video_info
+    def url_result(self, url, ie=None):
+        """Returns a url that points to a page that should be processed"""
+        #TODO: ie should be the class used for getting the info
+        video_info = {'_type': 'url',
+                      'url': url}
+        return video_info
+    def playlist_result(self, entries, playlist_id=None, playlist_title=None):
+        """Returns a playlist"""
+        video_info = {'_type': 'playlist',
+                      'entries': entries}
+        if playlist_id:
+            video_info['id'] = playlist_id
+        if playlist_title:
+            video_info['title'] = playlist_title
+        return video_info
  
  
  class YoutubeIE(InfoExtractor):
@@ -253,11 +275,11 @@ class YoutubeIE(InfoExtractor):
          try:
              sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+            return (u'unable to download video subtitles: %s' % compat_str(err), None)
          sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
          sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
          if not sub_lang_list:
-            return (u'WARNING: video doesn\'t have subtitles', None)
+            return (u'video doesn\'t have subtitles', None)
          return sub_lang_list
  
      def _list_available_subtitles(self, video_id):
@@ -280,9 +302,9 @@ class YoutubeIE(InfoExtractor):
          try:
              sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
          except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-            return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None, None)
+            return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
          if not sub:
-            return (u'WARNING: Did not fetch video subtitles', None, None)
+            return (u'Did not fetch video subtitles', None, None)
          return (None, sub_lang, sub)
  
      def _extract_subtitle(self, video_id):
@@ -301,7 +323,7 @@ class YoutubeIE(InfoExtractor):
          else:
              sub_lang = list(sub_lang_list.keys())[0]
          if not sub_lang in sub_lang_list:
-            return [(u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None, None)]
+            return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
  
          subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
          return [subtitle]
@@ -309,6 +331,8 @@ class YoutubeIE(InfoExtractor):
      def _extract_all_subtitles(self, video_id):
          sub_lang_list = self._get_available_subtitles(video_id)
          sub_format = self._downloader.params.get('subtitlesformat')
+        if  isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
+            return [(sub_lang_list[0], None, None)]
          subtitles = []
          for sub_lang in sub_lang_list:
              subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
@@ -542,14 +566,14 @@ class YoutubeIE(InfoExtractor):
              if video_subtitles:
                  (sub_error, sub_lang, sub) = video_subtitles[0]
                  if sub_error:
-                    self._downloader.trouble(sub_error)
+                    self._downloader.report_error(sub_error)
  
          if self._downloader.params.get('allsubtitles', False):
              video_subtitles = self._extract_all_subtitles(video_id)
              for video_subtitle in video_subtitles:
                  (sub_error, sub_lang, sub) = video_subtitle
                  if sub_error:
-                    self._downloader.trouble(sub_error)
+                    self._downloader.report_error(sub_error)
  
          if self._downloader.params.get('listsubtitles', False):
              sub_lang_list = self._list_available_subtitles(video_id)
@@ -1341,7 +1365,7 @@ class GenericIE(InfoExtractor):
          self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)
  
      def _test_redirect(self, url):
-        """Check if it is a redirect, like url shorteners, in case restart chain."""
+        """Check if it is a redirect, like url shorteners, in case return the new url."""
          class HeadRequest(compat_urllib_request.Request):
              def get_method(self):
                  return "HEAD"
@@ -1392,11 +1416,11 @@ class GenericIE(InfoExtractor):
              return False
  
          self.report_following_redirect(new_url)
-        self._downloader.download([new_url])
-        return True
+        return new_url
  
      def _real_extract(self, url):
-        if self._test_redirect(url): return
+        new_url = self._test_redirect(url)
+        if new_url: return [self.url_result(new_url)]
  
          video_id = url.split('/')[-1]
          try:
@@ -1720,9 +1744,7 @@ class YoutubePlaylistIE(InfoExtractor):
                          (?:
                             (?:course|view_play_list|my_playlists|artist|playlist|watch)
                             \? (?:.*?&)*? (?:p|a|list)=
-                        |  user/.*?/user/
                          |  p/
-                        |  user/.*?#[pg]/c/
                          )
                          ((?:PL|EC|UU)?[0-9A-Za-z-_]{10,})
                          .*
@@ -1799,9 +1821,8 @@ class YoutubePlaylistIE(InfoExtractor):
          else:
              self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos)))
  
-        for video in videos:
-            self._downloader.download([video])
-        return
+        url_results = [self.url_result(url) for url in videos]
+        return [self.playlist_result(url_results, playlist_id)]
  
  
  class YoutubeChannelIE(InfoExtractor):
@@ -1851,9 +1872,9 @@ class YoutubeChannelIE(InfoExtractor):
  
          self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
  
-        for id in video_ids:
-            self._downloader.download(['http://www.youtube.com/watch?v=%s' % id])
-        return
+        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
+        url_entries = [self.url_result(url) for url in urls]
+        return [self.playlist_result(url_entries, channel_id)]
  
  
  class YoutubeUserIE(InfoExtractor):
@@ -1935,8 +1956,9 @@ class YoutubeUserIE(InfoExtractor):
          self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
                  (username, all_ids_count, len(video_ids)))
  
-        for video_id in video_ids:
-            self._downloader.download(['http://www.youtube.com/watch?v=%s' % video_id])
+        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
+        url_results = [self.url_result(url) for url in urls]
+        return [self.playlist_result(url_results, playlist_title = username)]
  
  
  class BlipTVUserIE(InfoExtractor):
@@ -2026,8 +2048,9 @@ class BlipTVUserIE(InfoExtractor):
          self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" %
                  (self.IE_NAME, username, all_ids_count, len(video_ids)))
  
-        for video_id in video_ids:
-            self._downloader.download([u'http://blip.tv/'+video_id])
+        urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids]
+        url_entries = [self.url_result(url) for url in urls]
+        return [self.playlist_result(url_entries, playlist_title = username)]
  
  
  class DepositFilesIE(InfoExtractor):
@@ -3806,7 +3829,7 @@ class WorldStarHipHopIE(InfoExtractor):
          _title = r"""<title>(.*)</title>"""
  
          mobj = re.search(_title, webpage_src)
-        
+
          if mobj is not None:
              title = mobj.group(1)
          else:
@@ -3824,7 +3847,7 @@ class WorldStarHipHopIE(InfoExtractor):
              if mobj is not None:
                  title = mobj.group(1)
              thumbnail = None
-        
+
          results = [{
                      'id': video_id,
                      'url' : video_url,