import json
import os.path
import re
-import string
import struct
import traceback
import zlib
break
more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id, 'Downloading page #%s' % page_num)
+ 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'Downloading page #%s' % page_num,
+ transform_source=uppercase_escape)
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
playlist_title = self._html_search_regex(
- r'<h1 class="pl-header-title">\s*(.*?)\s*</h1>', page, u'title')
+ r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>',
+ page, u'title')
url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)
class YoutubeSearchIE(SearchInfoExtractor):
IE_DESC = u'YouTube.com searches'
- _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
+ _API_URL = u'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc'
_MAX_RESULTS = 1000
IE_NAME = u'youtube:search'
_SEARCH_KEY = 'ytsearch'
video_ids = []
pagenum = 0
limit = n
+ PAGE_SIZE = 50
- while (50 * pagenum) < limit:
- result_url = self._API_URL % (compat_urllib_parse.quote_plus(query), (50*pagenum)+1)
+ while (PAGE_SIZE * pagenum) < limit:
+ result_url = self._API_URL % (
+ compat_urllib_parse.quote_plus(query.encode('utf-8')),
+ (PAGE_SIZE * pagenum) + 1)
data_json = self._download_webpage(
result_url, video_id=u'query "%s"' % query,
note=u'Downloading page %s' % (pagenum + 1),
feed_entries = []
paging = 0
for i in itertools.count(1):
- info = self._download_webpage(self._FEED_TEMPLATE % paging,
+ info = self._download_json(self._FEED_TEMPLATE % paging,
u'%s feed' % self._FEED_NAME,
u'Downloading page %s' % i)
- info = json.loads(info)
- feed_html = info['feed_html']
+ feed_html = info.get('feed_html') or info.get('content_html')
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids)
feed_entries.extend(
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
- IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword(requires authentication)'
+ IE_DESC = u'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
_VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
_FEED_NAME = 'subscriptions'
_PLAYLIST_TITLE = u'Youtube Subscriptions'