[youtube] Separate methods for embeds extraction

author Sergey M․ <dstftw@gmail.com>

Tue, 5 Sep 2017 17:48:37 +0000 (00:48 +0700)

committer Sergey M․ <dstftw@gmail.com>

Tue, 5 Sep 2017 17:48:37 +0000 (00:48 +0700)
author Sergey M․ <dstftw@gmail.com>
Tue, 5 Sep 2017 17:48:37 +0000 (00:48 +0700)
committer Sergey M․ <dstftw@gmail.com>
Tue, 5 Sep 2017 17:48:37 +0000 (00:48 +0700)
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index c81efdc005f848450c91f53b160e9c2da29cd8be..b83c18380d2ba24ff0ce4909cf5a66643ae19d41 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -2243,36 +2243,11 @@ class GenericIE(InfoExtractor):
          if vid_me_embed_url is not None:
              return self.url_result(vid_me_embed_url, 'Vidme')
  
-        # Look for embedded YouTube player
-        matches = re.findall(r'''(?x)
-            (?:
-                <iframe[^>]+?src=|
-                data-video-url=|
-                <embed[^>]+?src=|
-                embedSWF\(?:\s*|
-                <object[^>]+data=|
-                new\s+SWFObject\(
-            )
-            (["\'])
-                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-                (?:embed|v|p)/.+?)
-            \1''', webpage)
-        if matches:
+        # Look for YouTube embeds
+        youtube_urls = YoutubeIE._extract_urls(webpage)
+        if youtube_urls:
              return self.playlist_from_matches(
-                matches, video_id, video_title, lambda m: unescapeHTML(m[1]))
-
-        # Look for lazyYT YouTube embed
-        matches = re.findall(
-            r'class="lazyYT" data-youtube-id="([^"]+)"', webpage)
-        if matches:
-            return self.playlist_from_matches(matches, video_id, video_title, lambda m: unescapeHTML(m))
-
-        # Look for Wordpress "YouTube Video Importer" plugin
-        matches = re.findall(r'''(?x)<div[^>]+
-            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
-            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
-        if matches:
-            return self.playlist_from_matches(matches, video_id, video_title, lambda m: m[-1])
+                youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
  
          matches = DailymotionIE._extract_urls(webpage)
          if matches:
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 953e38227e60e6fa08bac62b6ab56cb3f3be8b6e..ad2e933ee4e34c9ebdb982ca66278e6e4c4a06b0 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1374,6 +1374,43 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              playback_url, video_id, 'Marking watched',
              'Unable to mark watched', fatal=False)
  
+    @staticmethod
+    def _extract_urls(webpage):
+        # Embedded YouTube player
+        entries = [
+            unescapeHTML(mobj.group('url'))
+            for mobj in re.finditer(r'''(?x)
+            (?:
+                <iframe[^>]+?src=|
+                data-video-url=|
+                <embed[^>]+?src=|
+                embedSWF\(?:\s*|
+                <object[^>]+data=|
+                new\s+SWFObject\(
+            )
+            (["\'])
+                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+                (?:embed|v|p)/.+?)
+            \1''', webpage)]
+
+        # lazyYT YouTube embed
+        entries.extend(list(map(
+            unescapeHTML,
+            re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+        # Wordpress "YouTube Video Importer" plugin
+        matches = re.findall(r'''(?x)<div[^>]+
+            class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+            data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+        entries.extend(m[-1] for m in matches)
+
+        return entries
+
+    @staticmethod
+    def _extract_url(webpage):
+        urls = YoutubeIE._extract_urls(webpage)
+        return urls[0] if urls else None
+
      @classmethod
      def extract_id(cls, url):
          mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
author	Sergey M․ <dstftw@gmail.com>
	Tue, 5 Sep 2017 17:48:37 +0000 (00:48 +0700)
committer	Sergey M․ <dstftw@gmail.com>
	Tue, 5 Sep 2017 17:48:37 +0000 (00:48 +0700)
youtube_dl/extractor/generic.py		patch \| blob \| history
youtube_dl/extractor/youtube.py		patch \| blob \| history