Merge remote-tracking branch 'dstftw/multifeed-videos' (closes #6360)

author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Wed, 29 Jul 2015 19:55:20 +0000 (21:55 +0200)

committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>

Wed, 29 Jul 2015 19:55:20 +0000 (21:55 +0200)
author Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Wed, 29 Jul 2015 19:55:20 +0000 (21:55 +0200)
committer Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
Wed, 29 Jul 2015 19:55:20 +0000 (21:55 +0200)
diff --combined youtube_dl/extractor/youtube.py

index 4c449fd741ba3d0445060204e13f0fc5e378b97e,8a5ef2e7028a58f6bc33d2f100197442ba34aca1..67a1df9a0a1bebeaea4577411ff0c65f99d0166f
--- 1/youtube_dl/extractor/youtube.py
--- 2/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@@ -33,9 -33,11 +33,11 @@@ from ..utils import 
       int_or_none,
       orderedSet,
       parse_duration,
+     smuggle_url,
       str_to_int,
       unescapeHTML,
       unified_strdate,
+     unsmuggle_url,
       uppercase_escape,
       ISO3166Utils,
   )
@@@ -281,13 -283,13 +283,13 @@@ class YoutubeIE(YoutubeBaseInfoExtracto
           '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'vcodec': 'none', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
   
           # Dash webm
- -        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
- -        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
- -        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
- -        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
- -        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
- -        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'VP8', 'preference': -40},
- -        '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'VP9'},
+ +        '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
+ +        '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
+ +        '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
+ +        '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
+ +        '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
+ +        '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'acodec': 'none', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
+ +        '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'container': 'webm', 'vcodec': 'vp9'},
           '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
           '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
           '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
@@@ -297,11 -299,11 +299,11 @@@
           '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
           '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
           '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},
- -        '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
- -        '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
- -        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
- -        '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'},
- -        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},
+ +        '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
+ +        '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
+ +        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
+ +        '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'vp9'},
+ +        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'},
   
           # Dash webm audio
           '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50},
@@@ -329,7 -331,6 +331,7 @@@
                   'upload_date': '20121002',
                   'description': 'test chars:  "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
                   'categories': ['Science & Technology'],
+ +                'tags': ['youtube-dl'],
                   'like_count': int,
                   'dislike_count': int,
                   'start_time': 1,
@@@ -344,10 -345,7 +346,10 @@@
                   'ext': 'mp4',
                   'upload_date': '20120506',
                   'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
- -                'description': 'md5:fea86fda2d5a5784273df5c7cc994d9f',
+ +                'description': 'md5:782e8651347686cba06e58f71ab51773',
+ +                'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
+ +                         'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
+ +                         'iconic ep', 'iconic', 'love', 'it'],
                   'uploader': 'Icona Pop',
                   'uploader_id': 'IconaPop',
               }
@@@ -562,6 -560,59 +564,59 @@@
                   'format': '135',  # bestvideo
               }
           },
+         {
+             # Multifeed videos (multiple cameras), URL is for Main Camera
+             'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
+             'info_dict': {
+                 'id': 'jqWvoWXjCVs',
+                 'title': 'teamPGP: Rocket League Noob Stream',
+                 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+             },
+             'playlist': [{
+                 'info_dict': {
+                     'id': 'jqWvoWXjCVs',
+                     'ext': 'mp4',
+                     'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
+                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
+                     'upload_date': '20150721',
+                     'uploader': 'Beer Games Beer',
+                     'uploader_id': 'beergamesbeer',
+                 },
+             }, {
+                 'info_dict': {
+                     'id': '6h8e8xoXJzg',
+                     'ext': 'mp4',
+                     'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
+                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
+                     'upload_date': '20150721',
+                     'uploader': 'Beer Games Beer',
+                     'uploader_id': 'beergamesbeer',
+                 },
+             }, {
+                 'info_dict': {
+                     'id': 'PUOgX5z9xZw',
+                     'ext': 'mp4',
+                     'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
+                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
+                     'upload_date': '20150721',
+                     'uploader': 'Beer Games Beer',
+                     'uploader_id': 'beergamesbeer',
+                 },
+             }, {
+                 'info_dict': {
+                     'id': 'teuwxikvS5k',
+                     'ext': 'mp4',
+                     'title': 'teamPGP: Rocket League Noob Stream (zim)',
+                     'description': 'md5:dc7872fb300e143831327f1bae3af010',
+                     'upload_date': '20150721',
+                     'uploader': 'Beer Games Beer',
+                     'uploader_id': 'beergamesbeer',
+                 },
+             }],
+             'params': {
+                 'skip_download': True,
+             },
+         }
       ]
   
       def __init__(self, *args, **kwargs):
@@@ -893,6 -944,8 +948,8 @@@
           return formats
   
       def _real_extract(self, url):
+         url, smuggled_data = unsmuggle_url(url, {})
+ 
           proto = (
               'http' if self._downloader.params.get('prefer_insecure', False)
               else 'https')
@@@ -1009,6 -1062,55 +1066,55 @@@
                       '"token" parameter not in video info for unknown reason',
                       video_id=video_id)
   
+         # title
+         if 'title' in video_info:
+             video_title = video_info['title'][0]
+         else:
+             self._downloader.report_warning('Unable to extract video title')
+             video_title = '_'
+ 
+         # description
+         video_description = get_element_by_id("eow-description", video_webpage)
+         if video_description:
+             video_description = re.sub(r'''(?x)
+                 <a\s+
+                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                     title="([^"]+)"\s+
+                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                     class="yt-uix-redirect-link"\s*>
+                 [^<]+
+                 </a>
+             ''', r'\1', video_description)
+             video_description = clean_html(video_description)
+         else:
+             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
+             if fd_mobj:
+                 video_description = unescapeHTML(fd_mobj.group(1))
+             else:
+                 video_description = ''
+ 
+         if 'multifeed_metadata_list' in video_info and not smuggled_data.get('force_singlefeed', False):
+             if not self._downloader.params.get('noplaylist'):
+                 entries = []
+                 feed_ids = []
+                 multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
+                 for feed in multifeed_metadata_list.split(','):
+                     feed_data = compat_parse_qs(feed)
+                     entries.append({
+                         '_type': 'url_transparent',
+                         'ie_key': 'Youtube',
+                         'url': smuggle_url(
+                             '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
+                             {'force_singlefeed': True}),
+                         'title': '%s (%s)' % (video_title, feed_data['title'][0]),
+                     })
+                     feed_ids.append(feed_data['id'][0])
+                 self.to_screen(
+                     'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+                     % (', '.join(feed_ids), video_id))
+                 return self.playlist_result(entries, video_id, video_title, video_description)
+             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ 
           if 'view_count' in video_info:
               view_count = int(video_info['view_count'][0])
           else:
@@@ -1034,13 -1136,6 +1140,6 @@@
           else:
               self._downloader.report_warning('unable to extract uploader nickname')
   
-         # title
-         if 'title' in video_info:
-             video_title = video_info['title'][0]
-         else:
-             self._downloader.report_warning('Unable to extract video title')
-             video_title = '_'
- 
           # thumbnail image
           # We try first to get a high quality image:
           m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@@@ -1076,30 -1171,6 +1175,10 @@@
           else:
               video_categories = None
   
-         # description
-         video_description = get_element_by_id("eow-description", video_webpage)
-         if video_description:
-             video_description = re.sub(r'''(?x)
-                 <a\s+
-                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                     title="([^"]+)"\s+
-                     (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                     class="yt-uix-redirect-link"\s*>
-                 [^<]+
-                 </a>
-             ''', r'\1', video_description)
-             video_description = clean_html(video_description)
-         else:
-             fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
-             if fd_mobj:
-                 video_description = unescapeHTML(fd_mobj.group(1))
-             else:
-                 video_description = ''
- 
+ +        video_tags = [
+ +            unescapeHTML(m.group('content'))
+ +            for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+ +
           def _extract_count(count_name):
               return str_to_int(self._search_regex(
                   r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
@@@ -1268,7 -1339,6 +1347,7 @@@
               'thumbnail': video_thumbnail,
               'description': video_description,
               'categories': video_categories,
+ +            'tags': video_tags,
               'subtitles': video_subtitles,
               'automatic_captions': automatic_captions,
               'duration': video_duration,
author	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Wed, 29 Jul 2015 19:55:20 +0000 (21:55 +0200)
committer	Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>
	Wed, 29 Jul 2015 19:55:20 +0000 (21:55 +0200)