projects
/
youtube-dl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
[youtube] Add fallback metadata extraction from videoDetails (closes #18052)
[youtube-dl]
/
youtube_dl
/
extractor
/
youtube.py
diff --git
a/youtube_dl/extractor/youtube.py
b/youtube_dl/extractor/youtube.py
index 78203ef84a9efe7fd3046bf025cf871e1d26aead..abadfa5455f95a9270f3a933fd5222d6f9c5bb4a 100644
(file)
--- a/
youtube_dl/extractor/youtube.py
+++ b/
youtube_dl/extractor/youtube.py
@@
-41,6
+41,7
@@
from ..utils import (
remove_quotes,
remove_start,
smuggle_url,
remove_quotes,
remove_start,
smuggle_url,
+ str_or_none,
str_to_int,
try_get,
unescapeHTML,
str_to_int,
try_get,
unescapeHTML,
@@
-501,6
+502,7
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
+ 'view_count': int,
'like_count': int,
'dislike_count': int,
'start_time': 1,
'like_count': int,
'dislike_count': int,
'start_time': 1,
@@
-583,6
+585,7
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
'categories': ['Science & Technology'],
'tags': ['youtube-dl'],
'duration': 10,
+ 'view_count': int,
'like_count': int,
'dislike_count': int,
},
'like_count': int,
'dislike_count': int,
},
@@
-1538,6
+1541,8
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+ player_response = {}
+
# Get video info
embed_webpage = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
# Get video info
embed_webpage = None
if re.search(r'player-age-gate-content">', video_webpage) is not None:
@@
-1580,6
+1585,12
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
sts = ytplayer_config.get('sts')
if args.get('livestream') == '1' or args.get('live_playback') == 1:
is_live = True
sts = ytplayer_config.get('sts')
+ if not player_response:
+ pl_response = str_or_none(args.get('player_response'))
+ if pl_response:
+ pl_response = self._parse_json(pl_response, video_id, fatal=False)
+ if isinstance(pl_response, dict):
+ player_response = pl_response
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
# We also try looking in get_video_info since it may contain different dashmpd
# URL that points to a DASH manifest with possibly different itag set (some itags
if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
# We also try looking in get_video_info since it may contain different dashmpd
# URL that points to a DASH manifest with possibly different itag set (some itags
@@
-1608,6
+1619,10
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
if not video_info_webpage:
continue
get_video_info = compat_parse_qs(video_info_webpage)
if not video_info_webpage:
continue
get_video_info = compat_parse_qs(video_info_webpage)
+ if not player_response:
+ pl_response = get_video_info.get('player_response', [None])[0]
+ if isinstance(pl_response, dict):
+ player_response = pl_response
add_dash_mpd(get_video_info)
if view_count is None:
view_count = extract_view_count(get_video_info)
add_dash_mpd(get_video_info)
if view_count is None:
view_count = extract_view_count(get_video_info)
@@
-1653,9
+1668,14
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
'"token" parameter not in video info for unknown reason',
video_id=video_id)
'"token" parameter not in video info for unknown reason',
video_id=video_id)
+ video_details = try_get(
+ player_response, lambda x: x['videoDetails'], dict) or {}
+
# title
if 'title' in video_info:
video_title = video_info['title'][0]
# title
if 'title' in video_info:
video_title = video_info['title'][0]
+ elif 'title' in player_response:
+ video_title = video_details['title']
else:
self._downloader.report_warning('Unable to extract video title')
video_title = '_'
else:
self._downloader.report_warning('Unable to extract video title')
video_title = '_'
@@
-1718,6
+1738,8
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
if view_count is None:
view_count = extract_view_count(video_info)
if view_count is None:
view_count = extract_view_count(video_info)
+ if view_count is None and video_details:
+ view_count = int_or_none(video_details.get('viewCount'))
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
# Check for "rental" videos
if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
@@
-1898,7
+1920,9
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# uploader
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# uploader
- video_uploader = try_get(video_info, lambda x: x['author'][0], compat_str)
+ video_uploader = try_get(
+ video_info, lambda x: x['author'][0],
+ compat_str) or str_or_none(video_details.get('author'))
if video_uploader:
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
else:
if video_uploader:
video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
else:
@@
-2011,12
+2035,19
@@
class YoutubeIE(YoutubeBaseInfoExtractor):
like_count = _extract_count('like')
dislike_count = _extract_count('dislike')
like_count = _extract_count('like')
dislike_count = _extract_count('dislike')
+ if view_count is None:
+ view_count = str_to_int(self._search_regex(
+ r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
+ 'view count', default=None))
+
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
video_duration = try_get(
video_info, lambda x: int_or_none(x['length_seconds'][0]))
# subtitles
video_subtitles = self.extract_subtitles(video_id, video_webpage)
automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
video_duration = try_get(
video_info, lambda x: int_or_none(x['length_seconds'][0]))
+ if not video_duration:
+ video_duration = int_or_none(video_details.get('lengthSeconds'))
if not video_duration:
video_duration = parse_duration(self._html_search_meta(
'duration', video_webpage, 'video duration'))
if not video_duration:
video_duration = parse_duration(self._html_search_meta(
'duration', video_webpage, 'video duration'))
@@
-2244,6
+2275,7
@@
class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
'categories': ['People & Blogs'],
'tags': list,
'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
'categories': ['People & Blogs'],
'tags': list,
+ 'view_count': int,
'like_count': int,
'dislike_count': int,
},
'like_count': int,
'dislike_count': int,
},