From: Sergey M․ Date: Tue, 28 Jul 2015 21:43:32 +0000 (+0600) Subject: [youtube] Improve tags extraction and add test X-Git-Url: http://git.bitcoin.ninja/?a=commitdiff_plain;h=000b6b5ae5cc214906effe4ac5b78b579bc7db70;p=youtube-dl [youtube] Improve tags extraction and add test --- diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c0fafbfd5..4c449fd74 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -329,6 +329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'upload_date': '20121002', 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .', 'categories': ['Science & Technology'], + 'tags': ['youtube-dl'], 'like_count': int, 'dislike_count': int, 'start_time': 1, @@ -343,7 +344,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'mp4', 'upload_date': '20120506', 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]', - 'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f', + 'description': 'md5:782e8651347686cba06e58f71ab51773', + 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli', + 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop', + 'iconic ep', 'iconic', 'love', 'it'], 'uploader': 'Icona Pop', 'uploader_id': 'IconaPop', } @@ -1072,8 +1076,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): else: video_categories = None - video_tags = re.findall(r''''"]+?)['"]?\s*>''' - , video_webpage, re.DOTALL | re.IGNORECASE); + video_tags = [ + unescapeHTML(m.group('content')) + for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] + # description video_description = get_element_by_id("eow-description", video_webpage) if video_description: @@ -1261,8 +1267,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': video_title, 'thumbnail': video_thumbnail, 'description': video_description, - 'tags' : video_tags, 'categories': video_categories, + 'tags': video_tags, 'subtitles': video_subtitles, 'automatic_captions': automatic_captions, 'duration': video_duration,