[youtube] Improve _VALID_URLs (closes #12538)

author Sergey M․ <dstftw@gmail.com>

Fri, 24 Mar 2017 18:17:17 +0000 (01:17 +0700)

committer Sergey M․ <dstftw@gmail.com>

Fri, 24 Mar 2017 18:18:33 +0000 (01:18 +0700)
author Sergey M․ <dstftw@gmail.com>
Fri, 24 Mar 2017 18:17:17 +0000 (01:17 +0700)
committer Sergey M․ <dstftw@gmail.com>
Fri, 24 Mar 2017 18:18:33 +0000 (01:18 +0700)
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index caa0482491fef0c550f8b9de8b608e317e40aa4f..ca40de522bc5e341f2ac269db997a0e73914c127 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      # If True it will raise an error if no login info is provided
      _LOGIN_REQUIRED = False
  
      # If True it will raise an error if no login info is provided
      _LOGIN_REQUIRED = False
  
+    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
+
      def _set_language(self):
          self._set_cookie(
              '.youtube.com', 'PREF', 'f1=50000000&hl=en',
      def _set_language(self):
          self._set_cookie(
              '.youtube.com', 'PREF', 'f1=50000000&hl=en',
@@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                           )
                       )?                                                       # all until now is optional -> you can pass the naked ID
                       ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
                           )
                       )?                                                       # all until now is optional -> you can pass the naked ID
                       ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
-                     (?!.*?\blist=)                                            # combined list/video URLs are handled by the playlist IE
+                     (?!.*?\blist=
+                        (?:
+                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
+                            WL                                                # WL are handled by the watch later IE
+                        )
+                     )
                       (?(1).+)?                                                # if we found the ID, everything can follow
                       (?(1).+)?                                                # if we found the ID, everything can follow
-                     $"""
+                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
      _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
      _formats = {
          '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
      _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
      _formats = {
          '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
@@ -924,6 +931,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              'url': 'sJL6WA-aGkQ',
              'only_matching': True,
          },
              'url': 'sJL6WA-aGkQ',
              'only_matching': True,
          },
+        {
+            'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+            'only_matching': True,
+        },
      ]
  
      def __init__(self, *args, **kwargs):
      ]
  
      def __init__(self, *args, **kwargs):
@@ -1864,8 +1875,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                          )
                          .*
                       |
                          )
                          .*
                       |
-                        ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
-                     )"""
+                        (%(playlist_id)s)
+                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
      _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
      _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
      IE_NAME = 'youtube:playlist'
      _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
      _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
      IE_NAME = 'youtube:playlist'
author	Sergey M․ <dstftw@gmail.com>
	Fri, 24 Mar 2017 18:17:17 +0000 (01:17 +0700)
committer	Sergey M․ <dstftw@gmail.com>
	Fri, 24 Mar 2017 18:18:33 +0000 (01:18 +0700)