YoutubeIE: use the new method in the base IE for getting the login info

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 109c8a93fa9ca7cb4caeca87462e1385a802bb69..0de3bb0b333c6957f15f290bdb6d0f8566a545c3 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -4,6 +4,7 @@ import json
  import netrc
  import re
  import socket
+import itertools
  
  from .common import InfoExtractor, SearchInfoExtractor
  from ..utils import (
@@ -19,12 +20,12 @@ from ..utils import (
      ExtractorError,
      unescapeHTML,
      unified_strdate,
+    orderedSet,
  )
  
  
  class YoutubeIE(InfoExtractor):
-    """Information extractor for youtube.com."""
-
+    IE_DESC = u'YouTube.com'
      _VALID_URL = r"""^
                       (
                           (?:https?://)?                                       # http(s):// (optional)
@@ -34,7 +35,7 @@ class YoutubeIE(InfoExtractor):
                           (?:                                                  # the various things that can precede the ID:
                               (?:(?:v|embed|e)/)                               # v/ or embed/ or e/
                               |(?:                                             # or the v= param in all its forms
-                                 (?:watch(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+                                 (?:watch|movie(?:_popup)?(?:\.php)?)?              # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
                                   (?:\?|\#!?)                                  # the params delimiter ? or # or #!
                                   (?:.*?&)?                                    # any other preceding param (like /?s=tuff&v=xxxx)
                                   v=
@@ -123,17 +124,13 @@ class YoutubeIE(InfoExtractor):
      @classmethod
      def suitable(cls, url):
          """Receives a URL and returns True if suitable for this IE."""
-        if YoutubePlaylistIE.suitable(url): return False
+        if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
  
      def report_lang(self):
          """Report attempt to set language."""
          self.to_screen(u'Setting language')
  
-    def report_login(self):
-        """Report attempt to log in."""
-        self.to_screen(u'Logging in')
-
      def report_video_webpage_download(self, video_id):
          """Report attempt to download video webpage."""
          self.to_screen(u'%s: Downloading video webpage' % video_id)
@@ -295,26 +292,6 @@ class YoutubeIE(InfoExtractor):
          if self._downloader is None:
              return
  
-        username = None
-        password = None
-        downloader_params = self._downloader.params
-
-        # Attempt to use provided username and password or .netrc data
-        if downloader_params.get('username', None) is not None:
-            username = downloader_params['username']
-            password = downloader_params['password']
-        elif downloader_params.get('usenetrc', False):
-            try:
-                info = netrc.netrc().authenticators(self._NETRC_MACHINE)
-                if info is not None:
-                    username = info[0]
-                    password = info[2]
-                else:
-                    raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
-            except (IOError, netrc.NetrcParseError) as err:
-                self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
-                return
-
          # Set language
          request = compat_urllib_request.Request(self._LANG_URL)
          try:
@@ -324,6 +301,8 @@ class YoutubeIE(InfoExtractor):
              self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
              return
  
+        (username, password) = self._get_login_info()
+
          # No authentication to be performed
          if username is None:
              return
@@ -442,7 +421,7 @@ class YoutubeIE(InfoExtractor):
                  break
          if 'token' not in video_info:
              if 'reason' in video_info:
-                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0])
+                raise ExtractorError(u'YouTube said: %s' % video_info['reason'][0], expected=True)
              else:
                  raise ExtractorError(u'"token" parameter not in video info for unknown reason')
  
@@ -472,7 +451,12 @@ class YoutubeIE(InfoExtractor):
          video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
  
          # thumbnail image
-        if 'thumbnail_url' not in video_info:
+        # We try first to get a high quality image:
+        m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+                            video_webpage, re.DOTALL)
+        if m_thumb is not None:
+            video_thumbnail = m_thumb.group(1)
+        elif 'thumbnail_url' not in video_info:
              self._downloader.report_warning(u'unable to extract video thumbnail')
              video_thumbnail = ''
          else:   # don't panic if we can't find it
@@ -586,7 +570,7 @@ class YoutubeIE(InfoExtractor):
              if req_format is None or req_format == 'best':
                  video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
              elif req_format == 'worst':
-                video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
+                video_url_list = [(existing_formats[-1], url_map[existing_formats[-1]])] # worst quality
              elif req_format in ('-1', 'all'):
                  video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
              else:
@@ -629,8 +613,7 @@ class YoutubeIE(InfoExtractor):
          return results
  
  class YoutubePlaylistIE(InfoExtractor):
-    """Information Extractor for YouTube playlists."""
-
+    IE_DESC = u'YouTube.com playlists'
      _VALID_URL = r"""(?:
                          (?:https?://)?
                          (?:\w+\.)?
@@ -692,13 +675,12 @@ class YoutubePlaylistIE(InfoExtractor):
  
          videos = [v[1] for v in sorted(videos)]
  
-        url_results = [self.url_result(url, 'Youtube') for url in videos]
+        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
          return [self.playlist_result(url_results, playlist_id, playlist_title)]
  
  
  class YoutubeChannelIE(InfoExtractor):
-    """Information Extractor for YouTube channels."""
-
+    IE_DESC = u'YouTube.com channels'
      _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)"
      _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en'
      _MORE_PAGES_INDICATOR = 'yt-uix-load-more'
@@ -751,13 +733,12 @@ class YoutubeChannelIE(InfoExtractor):
          self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
  
          urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
-        url_entries = [self.url_result(url, 'Youtube') for url in urls]
+        url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
          return [self.playlist_result(url_entries, channel_id)]
  
  
  class YoutubeUserIE(InfoExtractor):
-    """Information Extractor for YouTube users."""
-
+    IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
      _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
      _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
      _GDATA_PAGE_SIZE = 50
@@ -809,11 +790,11 @@ class YoutubeUserIE(InfoExtractor):
              pagenum += 1
  
          urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
-        url_results = [self.url_result(url, 'Youtube') for url in urls]
+        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
          return [self.playlist_result(url_results, playlist_title = username)]
  
  class YoutubeSearchIE(SearchInfoExtractor):
-    """Information Extractor for YouTube search queries."""
+    IE_DESC = u'YouTube.com searches'
      _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
      _MAX_RESULTS = 1000
      IE_NAME = u'youtube:search'
@@ -853,3 +834,49 @@ class YoutubeSearchIE(SearchInfoExtractor):
              video_ids = video_ids[:n]
          videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
          return self.playlist_result(videos, query)
+
+
+class YoutubeShowIE(InfoExtractor):
+    IE_DESC = u'YouTube.com (multi-season) shows'
+    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'
+    IE_NAME = u'youtube:show'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        show_name = mobj.group(1)
+        webpage = self._download_webpage(url, show_name, u'Downloading show webpage')
+        # There's one playlist for each season of the show
+        m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
+        self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
+        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
+
+
+class YoutubeSubscriptionsIE(YoutubeIE):
+    """It's a subclass of YoutubeIE because we need to login"""
+    IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+    _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+    IE_NAME = u'youtube:subscriptions'
+    _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
+    _PAGING_STEP = 30
+
+    # Overwrite YoutubeIE properties we don't want
+    _TESTS = []
+    @classmethod
+    def suitable(cls, url):
+        return re.match(cls._VALID_URL, url) is not None
+
+    def _real_extract(self, url):
+        feed_entries = []
+        # The step argument is available only in 2.7 or higher
+        for i in itertools.count(0):
+            paging = i*self._PAGING_STEP
+            info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
+                                          u'Downloading page %s' % i)
+            info = json.loads(info)
+            feed_html = info['feed_html']
+            m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
+            ids = orderedSet(m.group(1) for m in m_ids)
+            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
+            if info['paging'] is None:
+                break
+        return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')