[youtube] Respect noplaylist for multifeed videos

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 3d8b31f9830b218e02021b41214010b4f61d29a9..bcd27408dae6f41d869224ef1a3a3e3b62a0be7b 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -19,6 +19,7 @@ from ..compat import (
      compat_urllib_parse,
      compat_urllib_parse_unquote,
      compat_urllib_parse_unquote_plus,
+    compat_urllib_parse_urlparse,
      compat_urllib_request,
      compat_urlparse,
      compat_str,
@@ -31,9 +32,12 @@ from ..utils import (
      get_element_by_id,
      int_or_none,
      orderedSet,
+    parse_duration,
+    smuggle_url,
      str_to_int,
      unescapeHTML,
      unified_strdate,
+    unsmuggle_url,
      uppercase_escape,
      ISO3166Utils,
  )
@@ -317,7 +321,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
      IE_NAME = 'youtube'
      _TESTS = [
          {
-            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
+            'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s&end=9',
              'info_dict': {
                  'id': 'BaW_jenozKc',
                  'ext': 'mp4',
@@ -329,6 +333,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'categories': ['Science & Technology'],
                  'like_count': int,
                  'dislike_count': int,
+                'start_time': 1,
+                'end_time': 9,
              }
          },
          {
@@ -554,6 +560,59 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'format': '135',  # bestvideo
              }
          },
+        {
+            # Multifeed videos (multiple cameras), URL is for Main Camera
+            'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
+            'info_dict': {
+                'id': 'jqWvoWXjCVs',
+                'title': 'teamPGP: Rocket League Noob Stream',
+                'description': 'md5:dc7872fb300e143831327f1bae3af010',
+            },
+            'playlist': [{
+                'info_dict': {
+                    'id': 'jqWvoWXjCVs',
+                    'ext': 'mp4',
+                    'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
+                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
+                    'upload_date': '20150721',
+                    'uploader': 'Beer Games Beer',
+                    'uploader_id': 'beergamesbeer',
+                },
+            }, {
+                'info_dict': {
+                    'id': '6h8e8xoXJzg',
+                    'ext': 'mp4',
+                    'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
+                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
+                    'upload_date': '20150721',
+                    'uploader': 'Beer Games Beer',
+                    'uploader_id': 'beergamesbeer',
+                },
+            }, {
+                'info_dict': {
+                    'id': 'PUOgX5z9xZw',
+                    'ext': 'mp4',
+                    'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
+                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
+                    'upload_date': '20150721',
+                    'uploader': 'Beer Games Beer',
+                    'uploader_id': 'beergamesbeer',
+                },
+            }, {
+                'info_dict': {
+                    'id': 'teuwxikvS5k',
+                    'ext': 'mp4',
+                    'title': 'teamPGP: Rocket League Noob Stream (zim)',
+                    'description': 'md5:dc7872fb300e143831327f1bae3af010',
+                    'upload_date': '20150721',
+                    'uploader': 'Beer Games Beer',
+                    'uploader_id': 'beergamesbeer',
+                },
+            }],
+            'params': {
+                'skip_download': True,
+            },
+        }
      ]
  
      def __init__(self, *args, **kwargs):
@@ -885,10 +944,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          return formats
  
      def _real_extract(self, url):
+        url, smuggled_data = unsmuggle_url(url, {})
+
          proto = (
              'http' if self._downloader.params.get('prefer_insecure', False)
              else 'https')
  
+        start_time = None
+        end_time = None
+        parsed_url = compat_urllib_parse_urlparse(url)
+        for component in [parsed_url.fragment, parsed_url.query]:
+            query = compat_parse_qs(component)
+            if start_time is None and 't' in query:
+                start_time = parse_duration(query['t'][0])
+            if start_time is None and 'start' in query:
+                start_time = parse_duration(query['start'][0])
+            if end_time is None and 'end' in query:
+                end_time = parse_duration(query['end'][0])
+
          # Extract original video URL from URL with redirection, like age verification, using next_url parameter
          mobj = re.search(self._NEXT_URL_RE, url)
          if mobj:
@@ -967,7 +1040,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          video_id, note=False,
                          errnote='unable to download video info webpage')
                      get_video_info = compat_parse_qs(video_info_webpage)
-                    add_dash_mpd(get_video_info)
+                    if get_video_info.get('use_cipher_signature') != ['True']:
+                        add_dash_mpd(get_video_info)
                      if not video_info:
                          video_info = get_video_info
                      if 'token' in get_video_info:
@@ -976,7 +1050,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              if 'reason' in video_info:
                  if 'The uploader has not made this video available in your country.' in video_info['reason']:
                      regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
-                    if regions_allowed is not None:
+                    if regions_allowed:
                          raise ExtractorError('YouTube said: This video is available in %s only' % (
                              ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
                              expected=True)
@@ -988,6 +1062,50 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                      '"token" parameter not in video info for unknown reason',
                      video_id=video_id)
  
+        # title
+        if 'title' in video_info:
+            video_title = video_info['title'][0]
+        else:
+            self._downloader.report_warning('Unable to extract video title')
+            video_title = '_'
+
+        # description
+        video_description = get_element_by_id("eow-description", video_webpage)
+        if video_description:
+            video_description = re.sub(r'''(?x)
+                <a\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    title="([^"]+)"\s+
+                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
+                    class="yt-uix-redirect-link"\s*>
+                [^<]+
+                </a>
+            ''', r'\1', video_description)
+            video_description = clean_html(video_description)
+        else:
+            fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
+            if fd_mobj:
+                video_description = unescapeHTML(fd_mobj.group(1))
+            else:
+                video_description = ''
+
+        if (not self._downloader.params.get('noplaylist') and
+                not smuggled_data.get('force_singlefeed', False) and
+                'multifeed_metadata_list' in video_info):
+            entries = []
+            multifeed_metadata_list = compat_urllib_parse_unquote_plus(video_info['multifeed_metadata_list'][0])
+            for feed in multifeed_metadata_list.split(','):
+                feed_data = compat_parse_qs(feed)
+                entries.append({
+                    '_type': 'url_transparent',
+                    'ie_key': 'Youtube',
+                    'url': smuggle_url(
+                        '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
+                        {'force_singlefeed': True}),
+                    'title': '%s (%s)' % (video_title, feed_data['title'][0]),
+                })
+            return self.playlist_result(entries, video_id, video_title, video_description)
+
          if 'view_count' in video_info:
              view_count = int(video_info['view_count'][0])
          else:
@@ -1013,13 +1131,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          else:
              self._downloader.report_warning('unable to extract uploader nickname')
  
-        # title
-        if 'title' in video_info:
-            video_title = video_info['title'][0]
-        else:
-            self._downloader.report_warning('Unable to extract video title')
-            video_title = '_'
-
          # thumbnail image
          # We try first to get a high quality image:
          m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
@@ -1055,26 +1166,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          else:
              video_categories = None
  
-        # description
-        video_description = get_element_by_id("eow-description", video_webpage)
-        if video_description:
-            video_description = re.sub(r'''(?x)
-                <a\s+
-                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                    title="([^"]+)"\s+
-                    (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                    class="yt-uix-redirect-link"\s*>
-                [^<]+
-                </a>
-            ''', r'\1', video_description)
-            video_description = clean_html(video_description)
-        else:
-            fd_mobj = re.search(r'<meta name="description" content="([^"]+)"', video_webpage)
-            if fd_mobj:
-                video_description = unescapeHTML(fd_mobj.group(1))
-            else:
-                video_description = ''
-
          def _extract_count(count_name):
              return str_to_int(self._search_regex(
                  r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
@@ -1255,6 +1346,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
              'formats': formats,
              'is_live': is_live,
+            'start_time': start_time,
+            'end_time': end_time,
          }