X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fyoutube.py;h=438eb5aa7d371f0d0fd9c70b9c8b7d15a8d44737;hb=5caabd3c701a484271d197f7006ecf831e38136b;hp=886fc15914e5c8ecc08bd6eb55a04fdfee5587f9;hpb=77d95677b7ab4a9840ef142b14627b07a9a31120;p=youtube-dl diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 886fc1591..438eb5aa7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -484,7 +484,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # RTMP (unnamed) '_rtmp': {'protocol': 'rtmp'}, } - _SUBTITLE_FORMATS = ('ttml', 'vtt') + _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') _GEO_BYPASS = False @@ -1086,7 +1086,95 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, 'youtube_include_dash_manifest': False, }, - } + }, + { + # artist and track fields should return non-null, per issue #20599 + 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs', + 'info_dict': { + 'id': 'MgNrAu2pzNs', + 'ext': 'mp4', + 'title': 'Voyeur Girl', + 'description': 'md5:7ae382a65843d6df2685993e90a8628f', + 'upload_date': '20190312', + 'uploader': 'Various Artists - Topic', + 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw', + 'artist': 'Stephen', + 'track': 'Voyeur Girl', + 'album': 'it\'s too much love to know my dear', + 'release_date': '20190313', + 'release_year': 2019, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # Retrieve 'artist' field from 'Artist:' in video description + # when it is present on youtube music video + # Some videos have release_date and no release_year - + # (release_year should be extracted from release_date) + # https://github.com/ytdl-org/youtube-dl/pull/20742#issuecomment-485740932 + 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY', + 'info_dict': { + 'id': 'k0jLE7tTwjY', + 'ext': 'mp4', + 'title': 'Latch Feat. Sam Smith', + 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335', + 'upload_date': '20150110', + 'uploader': 'Various Artists - Topic', + 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w', + 'artist': 'Disclosure', + 'track': 'Latch Feat. Sam Smith', + 'album': 'Latch Featuring Sam Smith', + 'release_date': '20121008', + 'release_year': 2012, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # handle multiple artists on youtube music video + 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA', + 'info_dict': { + 'id': '74qn0eJSjpA', + 'ext': 'mp4', + 'title': 'Eastside', + 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2', + 'upload_date': '20180710', + 'uploader': 'Benny Blanco - Topic', + 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A', + 'artist': 'benny blanco, Halsey, Khalid', + 'track': 'Eastside', + 'album': 'Eastside', + 'release_date': '20180713', + 'release_year': 2018, + }, + 'params': { + 'skip_download': True, + }, + }, + { + # handle youtube music video with release_year and no release_date + 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M', + 'info_dict': { + 'id': '-hcAI0g-f5M', + 'ext': 'mp4', + 'title': 'Put It On Me', + 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e', + 'upload_date': '20180426', + 'uploader': 'Matt Maeson - Topic', + 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ', + 'artist': 'Matt Maeson', + 'track': 'Put It On Me', + 'album': 'The Hearse', + 'release_date': None, + 'release_year': 2018, + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): @@ -1652,7 +1740,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): view_count = extract_view_count(get_video_info) if not video_info: video_info = get_video_info - if 'token' in get_video_info: + get_token = get_video_info.get('token') or get_video_info.get('account_playback_token') + if get_token: # Different get_video_info requests may report different results, e.g. # some may report video unavailability, but some may serve it without # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362, @@ -1662,7 +1751,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # due to YouTube measures against IP ranges of hosting providers. # Working around by preferring the first succeeded video_info containing # the token if no such video_info yet was found. - if 'token' not in video_info: + token = video_info.get('token') or video_info.get('account_playback_token') + if not token: video_info = get_video_info break @@ -1671,7 +1761,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'(?s)]+id="unavailable-message"[^>]*>(.+?)', video_webpage, 'unavailable message', default=None) - if 'token' not in video_info: + if not video_info: + unavailable_message = extract_unavailable_message() + if not unavailable_message: + unavailable_message = 'Unable to extract video data' + raise ExtractorError( + 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) + + token = video_info.get('token') or video_info.get('account_playback_token') + if not token: if 'reason' in video_info: if 'The uploader has not made this video available in your country.' in video_info['reason']: regions_allowed = self._html_search_meta( @@ -2063,6 +2161,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor): track = extract_meta('Song') artist = extract_meta('Artist') + album = None + release_date = None + release_year = None + + description_info = video_description.split('\n\n') + # If the description of the video has the youtube music auto-generated format, extract additional info + if len(description_info) >= 5 and description_info[-1] == 'Auto-generated by YouTube.': + track_artist = description_info[1].split(' · ') + if len(track_artist) >= 2: + if track is None: + track = track_artist[0] + if artist is None: + artist = re.search(r'Artist: ([^\n]+)', description_info[-2]) + if artist: + artist = artist.group(1) + if artist is None: + artist = track_artist[1] + # handle multiple artists + if len(track_artist) > 2: + for i in range(2, len(track_artist)): + artist += ', %s' % track_artist[i] + release_year = re.search(r'℗ ([0-9]+)', video_description) + if release_year: + release_year = int_or_none(release_year.group(1)) + album = description_info[2] + if description_info[4].startswith('Released on: '): + release_date = description_info[4].split(': ')[1].replace('-', '') + # extract release_year from release_date if necessary + if release_year is None: + release_year = int_or_none(release_date[0:4]) m_episode = re.search( r']+id="watch7-headline"[^>]*>\s*]*>.*?>(?P[^<]+)\s*S(?P\d+)\s*•\s*E(?P\d+)', @@ -2216,6 +2344,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'episode_number': episode_number, 'track': track, 'artist': artist, + 'album': album, + 'release_date': release_date, + 'release_year': release_year, }