[extractor/generic] Add test for wistia standard embed

[youtube-dl] / youtube_dl / extractor / generic.py
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py

index 0f1eb7fa64dbf64b32000fd05ea6b65bb8ea502a..9883cde6193063d76f65cc2af316bb947481e413 100644 (file)
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -61,6 +61,7 @@ from .jwplatform import JWPlatformIE
  from .digiteka import DigitekaIE
  from .instagram import InstagramIE
  from .liveleak import LiveLeakIE
+from .threeqsdn import ThreeQSDNIE
  
  
  class GenericIE(InfoExtractor):
@@ -741,6 +742,22 @@ class GenericIE(InfoExtractor):
                  'timestamp': 1401832161,
              },
          },
+        # Wistia standard embed (async)
+        {
+            'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
+            'info_dict': {
+                'id': '807fafadvk',
+                'ext': 'mp4',
+                'title': 'Drip Brennan Dunn Workshop',
+                'description': 'a JV Webinars video from getdrip-1',
+                'duration': 4986.95,
+                'upload_date': '20160518',
+                'timestamp': 1463607249,
+            },
+            'params': {
+                'skip_download': True,
+            }
+        },
          # Soundcloud embed
          {
              'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
@@ -1427,7 +1444,8 @@ class GenericIE(InfoExtractor):
          #   Site Name | Video Title
          #   Video Title - Tagline | Site Name
          # and so on and so forth; it's just not practical
-        video_title = self._html_search_regex(
+        video_title = self._og_search_title(
+            webpage, default=None) or self._html_search_regex(
              r'(?s)<title>(.*?)</title>', webpage, 'video title',
              default='video')
  
@@ -1445,6 +1463,9 @@ class GenericIE(InfoExtractor):
          video_uploader = self._search_regex(
              r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
  
+        video_description = self._og_search_description(webpage, default=None)
+        video_thumbnail = self._og_search_thumbnail(webpage, default=None)
+
          # Helper method
          def _playlist_from_matches(matches, getter=None, ie=None):
              urlrs = orderedSet(
@@ -1558,6 +1579,15 @@ class GenericIE(InfoExtractor):
                  'id': match.group('id')
              }
  
+        match = re.search(
+            r'''(?sx)
+                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
+                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
+            ''', webpage)
+        if match:
+            return self.url_result(self._proto_relative_url(
+                'wistia:%s' % match.group('id')), 'Wistia')
+
          # Look for SVT player
          svt_url = SVTIE._extract_url(webpage)
          if svt_url:
@@ -1983,6 +2013,19 @@ class GenericIE(InfoExtractor):
          if liveleak_url:
              return self.url_result(liveleak_url, 'LiveLeak')
  
+        # Look for 3Q SDN embeds
+        threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
+        if threeqsdn_url:
+            return {
+                '_type': 'url_transparent',
+                'ie_key': ThreeQSDNIE.ie_key(),
+                'url': self._proto_relative_url(threeqsdn_url),
+                'title': video_title,
+                'description': video_description,
+                'thumbnail': video_thumbnail,
+                'uploader': video_uploader,
+            }
+
          def check_video(vurl):
              if YoutubeIE.suitable(vurl):
                  return True