[youtube:user] Preserve user/c path segment

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index f3f102c30f5a0a5852a292529ea52b70051260f7..8aa7dfc413a7141cd56328ec3c0eaa56941b4171 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -344,6 +344,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'preference': -50, 'container': 'm4a_dash'},
          '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'preference': -50, 'container': 'm4a_dash'},
          '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
+        '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
+        '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
  
          # Dash webm
          '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
@@ -499,6 +501,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'youtube_include_dash_manifest': True,
                  'format': '141',
              },
+            'skip': 'format 141 not served anymore',
          },
          # DASH manifest with encrypted signature
          {
@@ -515,7 +518,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              },
              'params': {
                  'youtube_include_dash_manifest': True,
-                'format': '141',
+                'format': '141/bestaudio[ext=m4a]',
              },
          },
          # JS player signature function name containing $
@@ -535,7 +538,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              },
              'params': {
                  'youtube_include_dash_manifest': True,
-                'format': '141',
+                'format': '141/bestaudio[ext=m4a]',
              },
          },
          # Controversy video
@@ -616,7 +619,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/olympic',
                  'license': 'Standard YouTube License',
                  'description': 'HO09  - Women -  GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
-                'uploader': 'Olympics',
+                'uploader': 'Olympic',
                  'title': 'Hockey - Women -  GER-AUS - London 2012 Olympic Games',
              },
              'params': {
@@ -669,7 +672,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader_url': 're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
                  'uploader': 'dorappi2000',
                  'license': 'Standard YouTube License',
-                'formats': 'mincount:33',
+                'formats': 'mincount:32',
              },
          },
          # DASH manifest with segment_list
@@ -689,7 +692,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              'params': {
                  'youtube_include_dash_manifest': True,
                  'format': '135',  # bestvideo
-            }
+            },
+            'skip': 'This live event has ended.',
          },
          {
              # Multifeed videos (multiple cameras), URL is for Main Camera
@@ -760,6 +764,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
              },
              'playlist_count': 2,
+            'skip': 'Not multifeed anymore',
          },
          {
              'url': 'http://vid.plus/FlRa-iH7PGw',
@@ -812,6 +817,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              'params': {
                  'skip_download': True,
              },
+            'skip': 'This video does not exist.',
          },
          {
              # Video licensed under Creative Commons
@@ -1329,7 +1335,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                      (?:[a-zA-Z-]+="[^"]*"\s+)*?
                      (?:title|href)="([^"]+)"\s+
                      (?:[a-zA-Z-]+="[^"]*"\s+)*?
-                    class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
+                    class="[^"]*"[^>]*>
                  [^<]+\.{3}\s*
                  </a>
              ''', r'\1', video_description)
@@ -1724,6 +1730,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          }
  
  
+class YoutubeSharedVideoIE(InfoExtractor):
+    _VALID_URL = r'(?:https?:)?//(?:www\.)?youtube\.com/shared\?ci=(?P<id>[0-9A-Za-z_-]{11})'
+    IE_NAME = 'youtube:shared'
+
+    _TEST = {
+        'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
+        'info_dict': {
+            'id': 'uPDB5I9wfp8',
+            'ext': 'webm',
+            'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
+            'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
+            'upload_date': '20160219',
+            'uploader': 'Pocoyo - Português (BR)',
+            'uploader_id': 'PocoyoBrazil',
+        },
+        'add_ie': ['Youtube'],
+        'params': {
+            # There are already too many Youtube downloads
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        real_video_id = self._html_search_meta(
+            'videoId', webpage, 'YouTube video id', fatal=True)
+
+        return self.url_result(real_video_id, YoutubeIE.ie_key())
+
+
  class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
      IE_DESC = 'YouTube.com playlists'
      _VALID_URL = r"""(?x)(?:
@@ -1939,10 +1978,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
          return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
                  else super(YoutubeChannelIE, cls).suitable(url))
  
+    def _build_template_url(self, url, channel_id):
+        return self._TEMPLATE_URL % channel_id
+
      def _real_extract(self, url):
          channel_id = self._match_id(url)
  
-        url = self._TEMPLATE_URL % channel_id
+        url = self._build_template_url(url, channel_id)
  
          # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
          # Workaround by extracting as a playlist if managed to obtain channel playlist URL
@@ -1956,9 +1998,13 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
              channel_playlist_id = self._html_search_meta(
                  'channelId', channel_page, 'channel id', default=None)
              if not channel_playlist_id:
-                channel_playlist_id = self._search_regex(
-                    r'data-(?:channel-external-|yt)id="([^"]+)"',
-                    channel_page, 'channel id', default=None)
+                channel_url = self._html_search_meta(
+                    ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
+                    channel_page, 'channel url', default=None)
+                if channel_url:
+                    channel_playlist_id = self._search_regex(
+                        r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
+                        channel_url, 'channel id', default=None)
          if channel_playlist_id and channel_playlist_id.startswith('UC'):
              playlist_id = 'UU' + channel_playlist_id[2:]
              return self.url_result(
@@ -1981,24 +2027,53 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
                  for video_id, video_title in self.extract_videos_from_page(channel_page)]
              return self.playlist_result(entries, channel_id)
  
+        try:
+            next(self._entries(channel_page, channel_id))
+        except StopIteration:
+            alert_message = self._html_search_regex(
+                r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
+                channel_page, 'alert', default=None, group='alert')
+            if alert_message:
+                raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
+
          return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
  
  
  class YoutubeUserIE(YoutubeChannelIE):
      IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
-    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
-    _TEMPLATE_URL = 'https://www.youtube.com/user/%s/videos'
+    _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+    _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
      IE_NAME = 'youtube:user'
  
      _TESTS = [{
          'url': 'https://www.youtube.com/user/TheLinuxFoundation',
          'playlist_mincount': 320,
          'info_dict': {
-            'title': 'TheLinuxFoundation',
+            'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
+            'title': 'Uploads from The Linux Foundation',
+        }
+    }, {
+        # Only available via https://www.youtube.com/c/12minuteathlete/videos
+        # but not https://www.youtube.com/user/12minuteathlete/videos
+        'url': 'https://www.youtube.com/c/12minuteathlete/videos',
+        'playlist_mincount': 249,
+        'info_dict': {
+            'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
+            'title': 'Uploads from 12 Minute Athlete',
          }
      }, {
          'url': 'ytuser:phihag',
          'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/c/gametrailers',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/gametrailers',
+        'only_matching': True,
+    }, {
+        # This channel is not available.
+        'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
+        'only_matching': True,
      }]
  
      @classmethod
@@ -2011,6 +2086,10 @@ class YoutubeUserIE(YoutubeChannelIE):
          else:
              return super(YoutubeUserIE, cls).suitable(url)
  
+    def _build_template_url(self, url, channel_id):
+        mobj = re.match(self._VALID_URL, url)
+        return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
+
  
  class YoutubeLiveIE(YoutubeBaseInfoExtractor):
      IE_DESC = 'YouTube.com live streams'