Merge branch 'Weiqitv' of https://github.com/FounderSG/youtube-dl into FounderSG...

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 4556a16fb0d3d7a338e0b876f7094f561cfdf1b7..d31161d21c7279d33885fb7849b82e7513c96645 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -26,6 +26,7 @@ from ..compat import (
  from ..utils import (
      clean_html,
      encode_dict,
+    error_to_compat_str,
      ExtractorError,
      float_or_none,
      get_element_by_attribute,
@@ -612,7 +613,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              },
              'params': {
                  'skip_download': 'requires avconv',
-            }
+            },
+            'skip': 'This live event has ended.',
          },
          # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
          {
@@ -705,6 +707,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          },
          {
              # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
+            # Also tests cut-off URL expansion in video description (see
+            # https://github.com/rg3/youtube-dl/issues/1892,
+            # https://github.com/rg3/youtube-dl/issues/8164)
              'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
              'info_dict': {
                  'id': 'lsguqyKfVQg',
@@ -903,7 +908,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
                  video_id, note=False)
          except ExtractorError as err:
-            self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
+            self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
              return {}
  
          sub_lang_list = {}
@@ -1234,10 +1239,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              video_description = re.sub(r'''(?x)
                  <a\s+
                      (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                    title="([^"]+)"\s+
+                    (?:title|href)="([^"]+)"\s+
                      (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                    class="yt-uix-redirect-link"\s*>
-                [^<]+
+                    class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
+                [^<]+\.{3}\s*
                  </a>
              ''', r'\1', video_description)
              video_description = clean_html(video_description)
@@ -1486,7 +1491,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                              if codecs:
                                  codecs = codecs.split(',')
                                  if len(codecs) == 2:
-                                    acodec, vcodec = codecs[0], codecs[1]
+                                    acodec, vcodec = codecs[1], codecs[0]
                                  else:
                                      acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
                                  dct.update({
@@ -1504,6 +1509,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              for a_format in formats:
                  a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
          else:
+            unavailable_message = self._html_search_regex(
+                r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
+                video_webpage, 'unavailable message', default=None)
+            if unavailable_message:
+                raise ExtractorError(unavailable_message, expected=True)
              raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
  
          # Look for the DASH manifest
@@ -1774,6 +1784,10 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
          },
      }]
  
+    @classmethod
+    def suitable(cls, url):
+        return False if YoutubePlaylistsIE.suitable(url) else super(YoutubeChannelIE, cls).suitable(url)
+
      def _real_extract(self, url):
          channel_id = self._match_id(url)
  
@@ -1847,10 +1861,10 @@ class YoutubeUserIE(YoutubeChannelIE):
              return super(YoutubeUserIE, cls).suitable(url)
  
  
-class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
-    IE_DESC = 'YouTube.com user playlists'
-    _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists'
-    IE_NAME = 'youtube:user:playlists'
+class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
+    IE_DESC = 'YouTube.com user/channel playlists'
+    _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
+    IE_NAME = 'youtube:playlists'
  
      _TESTS = [{
          'url': 'http://www.youtube.com/user/ThirstForScience/playlists',
@@ -1867,6 +1881,13 @@ class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
              'id': 'igorkle1',
              'title': 'Игорь Клейнер',
          },
+    }, {
+        'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
+        'playlist_mincount': 17,
+        'info_dict': {
+            'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
+            'title': 'Chem Player',
+        },
      }]