[youtube] Simplify automatic captions URL check (Closes #8287)

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 4aac2cc03a0b10886c997d18e0607a54a0d0447f..8e8fc14d27b85942c7a0867d125c4a336e878dcf 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -613,7 +613,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              },
              'params': {
                  'skip_download': 'requires avconv',
              },
              'params': {
                  'skip_download': 'requires avconv',
-            }
+            },
+            'skip': 'This live event has ended.',
          },
          # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
          {
          },
          # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
          {
@@ -706,6 +707,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          },
          {
              # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
          },
          {
              # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
+            # Also tests cut-off URL expansion in video description (see
+            # https://github.com/rg3/youtube-dl/issues/1892,
+            # https://github.com/rg3/youtube-dl/issues/8164)
              'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
              'info_dict': {
                  'id': 'lsguqyKfVQg',
              'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
              'info_dict': {
                  'id': 'lsguqyKfVQg',
@@ -960,6 +964,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          try:
              args = player_config['args']
              caption_url = args['ttsurl']
          try:
              args = player_config['args']
              caption_url = args['ttsurl']
+            if not caption_url:
+                self._downloader.report_warning(err_msg)
+                return {}
              timestamp = args['timestamp']
              # We get the available subtitles
              list_params = compat_urllib_parse.urlencode({
              timestamp = args['timestamp']
              # We get the available subtitles
              list_params = compat_urllib_parse.urlencode({
@@ -1235,10 +1242,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              video_description = re.sub(r'''(?x)
                  <a\s+
                      (?:[a-zA-Z-]+="[^"]+"\s+)*?
              video_description = re.sub(r'''(?x)
                  <a\s+
                      (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                    title="([^"]+)"\s+
+                    (?:title|href)="([^"]+)"\s+
                      (?:[a-zA-Z-]+="[^"]+"\s+)*?
                      (?:[a-zA-Z-]+="[^"]+"\s+)*?
-                    class="yt-uix-redirect-link"\s*>
-                [^<]+
+                    class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
+                [^<]+\.{3}\s*
                  </a>
              ''', r'\1', video_description)
              video_description = clean_html(video_description)
                  </a>
              ''', r'\1', video_description)
              video_description = clean_html(video_description)
@@ -1487,7 +1494,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                              if codecs:
                                  codecs = codecs.split(',')
                                  if len(codecs) == 2:
                              if codecs:
                                  codecs = codecs.split(',')
                                  if len(codecs) == 2:
-                                    acodec, vcodec = codecs[0], codecs[1]
+                                    acodec, vcodec = codecs[1], codecs[0]
                                  else:
                                      acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
                                  dct.update({
                                  else:
                                      acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
                                  dct.update({
@@ -1505,6 +1512,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              for a_format in formats:
                  a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
          else:
              for a_format in formats:
                  a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
          else:
+            unavailable_message = self._html_search_regex(
+                r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
+                video_webpage, 'unavailable message', default=None)
+            if unavailable_message:
+                raise ExtractorError(unavailable_message, expected=True)
              raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
  
          # Look for the DASH manifest
              raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
  
          # Look for the DASH manifest