[youtube:playlist] Recognize another playlist pattern (closes #11928, closes #12286)

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 0e67fdd127289ba63358182fbab8fc25d50ae2d7..81c7939215bcfccb60b11b2ff28336f0f8d163c5 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -34,6 +34,7 @@ from ..utils import (
      int_or_none,
      mimetype2ext,
      orderedSet,
+    parse_codecs,
      parse_duration,
      remove_quotes,
      remove_start,
@@ -46,7 +47,6 @@ from ..utils import (
      unsmuggle_url,
      uppercase_escape,
      urlencode_postdata,
-    ISO3166Utils,
  )
  
  
@@ -329,6 +329,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'preference': -50, 'container': 'm4a_dash'},
          '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
          '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'preference': -50, 'container': 'm4a_dash'},
+        '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'preference': -50, 'container': 'm4a_dash'},
+        '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'preference': -50, 'container': 'm4a_dash'},
  
          # Dash webm
          '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8', 'preference': -40},
@@ -368,6 +370,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
      }
      _SUBTITLE_FORMATS = ('ttml', 'vtt')
  
+    _GEO_BYPASS = False
+
      IE_NAME = 'youtube'
      _TESTS = [
          {
@@ -914,7 +918,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              # itag 212
              'url': '1t24XAntNCY',
              'only_matching': True,
-        }
+        },
+        {
+            # geo restricted to JP
+            'url': 'sJL6WA-aGkQ',
+            'only_matching': True,
+        },
      ]
  
      def __init__(self, *args, **kwargs):
@@ -1373,11 +1382,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          if 'token' not in video_info:
              if 'reason' in video_info:
                  if 'The uploader has not made this video available in your country.' in video_info['reason']:
-                    regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
-                    if regions_allowed:
-                        raise ExtractorError('YouTube said: This video is available in %s only' % (
-                            ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
-                            expected=True)
+                    regions_allowed = self._html_search_meta(
+                        'regionsAllowed', video_webpage, default=None)
+                    countries = regions_allowed.split(',') if regions_allowed else None
+                    self.raise_geo_restricted(
+                        msg=video_info['reason'][0], countries=countries)
                  raise ExtractorError(
                      'YouTube said: %s' % video_info['reason'][0],
                      expected=True, video_id=video_id)
@@ -1694,15 +1703,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                      codecs = mobj.group('val')
                                      break
                              if codecs:
-                                codecs = codecs.split(',')
-                                if len(codecs) == 2:
-                                    acodec, vcodec = codecs[1], codecs[0]
-                                else:
-                                    acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
-                                dct.update({
-                                    'acodec': acodec,
-                                    'vcodec': vcodec,
-                                })
+                                dct.update(parse_codecs(codecs))
                  formats.append(dct)
          elif video_info.get('hlsvp'):
              manifest_url = video_info['hlsvp'][0]
@@ -1850,7 +1851,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                          (?:
                              youtube\.com/
                              (?:
-                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
+                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
                                 \? (?:.*?[&;])*? (?:p|a|list)=
                              |  p/
                              )|
@@ -1923,6 +1924,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
              'title': 'JODA15',
              'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
          }
+    }, {
+        'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+        'playlist_mincount': 485,
+        'info_dict': {
+            'title': '2017 華語最新單曲 (2/24更新)',
+            'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+        }
      }, {
          'note': 'Embedded SWF player',
          'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
@@ -2071,7 +2079,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
          # Check if it's a video-specific URL
          query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
          video_id = query_dict.get('v', [None])[0] or self._search_regex(
-            r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
+            r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
              'video id', default=None)
          if video_id:
              if self._downloader.params.get('noplaylist'):
@@ -2231,7 +2239,7 @@ class YoutubeUserIE(YoutubeChannelIE):
          'url': 'https://www.youtube.com/gametrailers',
          'only_matching': True,
      }, {
-        # This channel is not available.
+        # This channel is not available, geo restricted to JP
          'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
          'only_matching': True,
      }]
@@ -2348,18 +2356,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
          videos = []
          limit = n
  
+        url_query = {
+            'search_query': query.encode('utf-8'),
+        }
+        url_query.update(self._EXTRA_QUERY_ARGS)
+        result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
+
          for pagenum in itertools.count(1):
-            url_query = {
-                'search_query': query.encode('utf-8'),
-                'page': pagenum,
-                'spf': 'navigate',
-            }
-            url_query.update(self._EXTRA_QUERY_ARGS)
-            result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
              data = self._download_json(
                  result_url, video_id='query "%s"' % query,
                  note='Downloading page %s' % pagenum,
-                errnote='Unable to download API page')
+                errnote='Unable to download API page',
+                query={'spf': 'navigate'})
              html_content = data[1]['body']['content']
  
              if 'class="search-message' in html_content:
@@ -2371,6 +2379,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
              videos += new_videos
              if not new_videos or len(videos) > limit:
                  break
+            next_link = self._html_search_regex(
+                r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
+                html_content, 'next link', default=None)
+            if next_link is None:
+                break
+            result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
  
          if len(videos) > n:
              videos = videos[:n]