[clipfish] Fix imports (#1842)

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 4c43d57394e430b59f67fcc1852152a020acdcc5..765b4a9bf769926350350e83d680d35c1e4d2775 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -11,7 +11,6 @@ import socket
  import string
  import struct
  import traceback
-import xml.etree.ElementTree
  import zlib
  
  from .common import InfoExtractor, SearchInfoExtractor
@@ -29,6 +28,7 @@ from ..utils import (
      clean_html,
      get_cachedir,
      get_element_by_id,
+    get_element_by_attribute,
      ExtractorError,
      unescapeHTML,
      unified_strdate,
@@ -1144,8 +1144,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                  'asrs': 1,
              })
              list_url = caption_url + '&' + list_params
-            list_page = self._download_webpage(list_url, video_id)
-            caption_list = xml.etree.ElementTree.fromstring(list_page.encode('utf-8'))
+            caption_list = self._download_xml(list_url, video_id)
              original_lang_node = caption_list.find('track')
              if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
                  self._downloader.report_warning(u'Video doesn\'t have automatic captions')
@@ -1539,6 +1538,24 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
      def _real_initialize(self):
          self._login()
  
+    def _ids_to_results(self, ids):
+        return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
+                       for vid_id in ids]
+
+    def _extract_mix(self, playlist_id):
+        # The mixes are generated from a a single video
+        # the id of the playlist is just 'RD' + video_id
+        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
+        webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+        title_span = (get_element_by_attribute('class', 'title long-title', webpage) or
+            get_element_by_attribute('class', 'title ', webpage))
+        title = clean_html(title_span)
+        video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
+        ids = orderedSet(re.findall(video_re, webpage))
+        url_results = self._ids_to_results(ids)
+
+        return self.playlist_result(url_results, playlist_id, title)
+
      def _real_extract(self, url):
          # Extract playlist id
          mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1556,6 +1573,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
              else:
                  self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
  
+        if len(playlist_id) == 13:  # 'RD' + 11 characters for the video id
+            # Mixes require a custom extraction process
+            return self._extract_mix(playlist_id)
+
          # Extract the video ids from the playlist pages
          ids = []
  
@@ -1573,8 +1594,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
  
          playlist_title = self._og_search_title(page)
  
-        url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
-                       for vid_id in ids]
+        url_results = self._ids_to_results(ids)
          return self.playlist_result(url_results, playlist_id, playlist_title)
  
  
@@ -1771,7 +1791,6 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
      Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
      """
      _LOGIN_REQUIRED = True
-    _PAGING_STEP = 30
      # use action_load_personal_feed instead of action_load_system_feed
      _PERSONAL_FEED = False
  
@@ -1791,9 +1810,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
  
      def _real_extract(self, url):
          feed_entries = []
-        # The step argument is available only in 2.7 or higher
-        for i in itertools.count(0):
-            paging = i*self._PAGING_STEP
+        paging = 0
+        for i in itertools.count(1):
              info = self._download_webpage(self._FEED_TEMPLATE % paging,
                                            u'%s feed' % self._FEED_NAME,
                                            u'Downloading page %s' % i)
@@ -1806,6 +1824,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
                  for video_id in ids)
              if info['paging'] is None:
                  break
+            paging = info['paging']
          return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
  
  class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
@@ -1825,7 +1844,6 @@ class YoutubeWatchLaterIE(YoutubeFeedsInfoExtractor):
      _VALID_URL = r'https?://www\.youtube\.com/feed/watch_later|:ytwatchlater'
      _FEED_NAME = 'watch_later'
      _PLAYLIST_TITLE = u'Youtube Watch Later'
-    _PAGING_STEP = 100
      _PERSONAL_FEED = True
  
  class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
@@ -1835,13 +1853,6 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
      _PERSONAL_FEED = True
      _PLAYLIST_TITLE = u'Youtube Watch History'
  
-    def _real_extract(self, url):
-        webpage = self._download_webpage('https://www.youtube.com/feed/history', u'History')
-        data_paging = self._search_regex(r'data-paging="(\d+)"', webpage, u'data-paging')
-        # The step is actually a ridiculously big number (like 1374343569725646)
-        self._PAGING_STEP = int(data_paging)
-        return super(YoutubeHistoryIE, self)._real_extract(url)
-
  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
      IE_NAME = u'youtube:favorites'
      IE_DESC = u'YouTube.com favourite videos, "ytfav" keyword (requires authentication)'