[youtube:playlist] Recognize popular uploads playlist as mix (Closes #9170)

[youtube-dl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 83b5840f76c7c9aa76760457e9b87ce86e83e680..5a102de5109f417561e85bdbff56999a21b1a869 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -44,6 +44,7 @@ from ..utils import (
      unified_strdate,
      unsmuggle_url,
      uppercase_escape,
+    urlencode_postdata,
      ISO3166Utils,
  )
  
@@ -115,7 +116,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
              'hl': 'en_US',
          }
  
-        login_data = compat_urllib_parse_urlencode(login_form_strs).encode('ascii')
+        login_data = urlencode_postdata(login_form_strs)
  
          req = sanitized_Request(self._LOGIN_URL, login_data)
          login_results = self._download_webpage(
@@ -148,7 +149,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                  'TrustDevice': 'on',
              })
  
-            tfa_data = compat_urllib_parse_urlencode(tfa_form_strs).encode('ascii')
+            tfa_data = urlencode_postdata(tfa_form_strs)
  
              tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data)
              tfa_results = self._download_webpage(
@@ -233,7 +234,9 @@ class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
  
  class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
      def _process_page(self, content):
-        for playlist_id in orderedSet(re.findall(r'href="/?playlist\?list=([0-9A-Za-z-_]{10,})"', content)):
+        for playlist_id in orderedSet(re.findall(
+                r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
+                content)):
              yield self.url_result(
                  'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
  
@@ -267,7 +270,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                           ))
                           |(?:
                              youtu\.be|                                        # just youtu.be/xxxx
-                            vid\.plus                                         # or vid.plus/xxxx
+                            vid\.plus|                                        # or vid.plus/xxxx
+                            zwearz\.com/watch|                                # or zwearz.com/watch/xxxx
                           )/
                           |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
                           )
@@ -755,6 +759,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              'url': 'http://vid.plus/FlRa-iH7PGw',
              'only_matching': True,
          },
+        {
+            'url': 'http://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
+            'only_matching': True,
+        },
          {
              # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
              # Also tests cut-off URL expansion in video description (see
@@ -1876,7 +1884,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
          if video:
              return video
  
-        if playlist_id.startswith('RD') or playlist_id.startswith('UL'):
+        if playlist_id.startswith(('RD', 'UL', 'PU')):
              # Mixes require a custom extraction process
              return self._extract_mix(playlist_id)