X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=ef08bf8cbafc7a6e1da08d21f0da0f5f2538aeb6;hb=07af16b92ef52ac29ecd7f1defdca89295fa611c;hp=aaa4023b478e561027c98aa90d295971507ca385;hpb=7b16239a490a0d8784375895b620598bfccf0ede;p=youtube-dl
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index aaa4023b4..ef08bf8cb 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -77,7 +77,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _set_language(self):
self._set_cookie(
- '.youtube.com', 'PREF', 'f1=50000000&hl=en',
+ '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
# YouTube sets the expire time to about two months
expire_time=time.time() + 2 * 30 * 24 * 3600)
@@ -303,7 +303,7 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
# Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry
more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
'Downloading page #%s%s'
% (page_num, ' (retry #%d)' % count if count else ''),
transform_source=uppercase_escape,
@@ -2230,6 +2230,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if mobj is not None:
video_uploader_id = mobj.group('uploader_id')
video_uploader_url = mobj.group('uploader_url')
+ else:
+ owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
+ if owner_profile_url:
+ video_uploader_id = self._search_regex(
+ r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
+ default=None)
+ video_uploader_url = owner_profile_url
channel_id = (
str_or_none(video_details.get('channelId'))
@@ -2349,17 +2356,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
m_cat_container = self._search_regex(
r'(?s)
]*>\s*Category\s*
\s*',
video_webpage, 'categories', default=None)
+ category = None
if m_cat_container:
category = self._html_search_regex(
r'(?s)(.*?)', m_cat_container, 'category',
default=None)
- video_categories = None if category is None else [category]
- else:
- video_categories = None
+ if not category:
+ category = try_get(
+ microformat, lambda x: x['category'], compat_str)
+ video_categories = None if category is None else [category]
video_tags = [
unescapeHTML(m.group('content'))
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+ if not video_tags:
+ video_tags = try_get(video_details, lambda x: x['keywords'], list)
def _extract_count(count_name):
return str_to_int(self._search_regex(
@@ -2765,7 +2776,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
ids = []
last_id = playlist_id[-11:]
for n in itertools.count(1):
- url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+ url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
webpage = self._download_webpage(
url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
new_ids = orderedSet(re.findall(
@@ -3105,7 +3116,7 @@ class YoutubeLiveIE(YoutubeBaseInfoExtractor):
class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
IE_DESC = 'YouTube.com user/channel playlists'
- _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P[^/]+)/playlists'
+ _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P[^/]+)/playlists'
IE_NAME = 'youtube:playlists'
_TESTS = [{
@@ -3131,6 +3142,9 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
'title': 'Chem Player',
},
'skip': 'Blocked',
+ }, {
+ 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
+ 'only_matching': True,
}]
@@ -3275,7 +3289,7 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
break
more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
+ 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
'Downloading page #%s' % page_num,
transform_source=uppercase_escape,
headers=self._YOUTUBE_CLIENT_HEADERS)