import netrc
import re
import socket
+import itertools
from .common import InfoExtractor, SearchInfoExtractor
from ..utils import (
ExtractorError,
unescapeHTML,
unified_strdate,
+ orderedSet,
)
@classmethod
def suitable(cls, url):
"""Receives a URL and returns True if suitable for this IE."""
- if YoutubePlaylistIE.suitable(url): return False
+ if YoutubePlaylistIE.suitable(url) or YoutubeSubscriptionsIE.suitable(url): return False
return re.match(cls._VALID_URL, url, re.VERBOSE) is not None
def report_lang(self):
"""Report attempt to set language."""
self.to_screen(u'Setting language')
- def report_login(self):
- """Report attempt to log in."""
- self.to_screen(u'Logging in')
-
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self.to_screen(u'%s: Downloading video webpage' % video_id)
if self._downloader is None:
return
- username = None
- password = None
- downloader_params = self._downloader.params
-
- # Attempt to use provided username and password or .netrc data
- if downloader_params.get('username', None) is not None:
- username = downloader_params['username']
- password = downloader_params['password']
- elif downloader_params.get('usenetrc', False):
- try:
- info = netrc.netrc().authenticators(self._NETRC_MACHINE)
- if info is not None:
- username = info[0]
- password = info[2]
- else:
- raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
- except (IOError, netrc.NetrcParseError) as err:
- self._downloader.report_warning(u'parsing .netrc: %s' % compat_str(err))
- return
-
# Set language
request = compat_urllib_request.Request(self._LANG_URL)
try:
self._downloader.report_warning(u'unable to set language: %s' % compat_str(err))
return
+ (username, password) = self._get_login_info()
+
# No authentication to be performed
if username is None:
return
video_title = compat_urllib_parse.unquote_plus(video_info['title'][0])
# thumbnail image
- if 'thumbnail_url' not in video_info:
+ # We try first to get a high quality image:
+ m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+ video_webpage, re.DOTALL)
+ if m_thumb is not None:
+ video_thumbnail = m_thumb.group(1)
+ elif 'thumbnail_url' not in video_info:
self._downloader.report_warning(u'unable to extract video thumbnail')
video_thumbnail = ''
else: # don't panic if we can't find it
videos = [v[1] for v in sorted(videos)]
- url_results = [self.url_result(url, 'Youtube') for url in videos]
+ url_results = [self.url_result(vurl, 'Youtube') for vurl in videos]
return [self.playlist_result(url_results, playlist_id, playlist_title)]
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
- url_entries = [self.url_result(url, 'Youtube') for url in urls]
+ url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
return [self.playlist_result(url_entries, channel_id)]
pagenum += 1
urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
- url_results = [self.url_result(url, 'Youtube') for url in urls]
+ url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
return [self.playlist_result(url_results, playlist_title = username)]
class YoutubeSearchIE(SearchInfoExtractor):
m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage))
self.to_screen(u'%s: Found %s seasons' % (show_name, len(m_seasons)))
return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons]
+
+
+class YoutubeSubscriptionsIE(YoutubeIE):
+ """It's a subclass of YoutubeIE because we need to login"""
+ IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+ _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+ IE_NAME = u'youtube:subscriptions'
+ _FEED_TEMPLATE = 'http://www.youtube.com/feed_ajax?action_load_system_feed=1&feed_name=subscriptions&paging=%s'
+ _PAGING_STEP = 30
+
+ # Overwrite YoutubeIE properties we don't want
+ _TESTS = []
+ @classmethod
+ def suitable(cls, url):
+ return re.match(cls._VALID_URL, url) is not None
+
+ def _real_initialize(self):
+ (username, password) = self._get_login_info()
+ if username is None:
+ raise ExtractorError(u'No login info available, needed for downloading the Youtube subscriptions.', expected=True)
+ super(YoutubeSubscriptionsIE, self)._real_initialize()
+
+ def _real_extract(self, url):
+ feed_entries = []
+ # The step argument is available only in 2.7 or higher
+ for i in itertools.count(0):
+ paging = i*self._PAGING_STEP
+ info = self._download_webpage(self._FEED_TEMPLATE % paging, 'feed',
+ u'Downloading page %s' % i)
+ info = json.loads(info)
+ feed_html = info['feed_html']
+ m_ids = re.finditer(r'"/watch\?v=(.*?)"', feed_html)
+ ids = orderedSet(m.group(1) for m in m_ids)
+ feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
+ if info['paging'] is None:
+ break
+ return self.playlist_result(feed_entries, playlist_title='Youtube Subscriptions')