[youtube] Skip unsupported adaptive stream type (#18804)

[youtube-dl] / youtube_dl / extractor / xfileshare.py
diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py

index 1b5cd122ac9f5c90ff8b846c998bceff36ba350a..b38c7a7b3cd5f87a33f814444fd70af3219afb93 100644 (file)
--- a/youtube_dl/extractor/xfileshare.py
+++ b/youtube_dl/extractor/xfileshare.py
@@ -10,31 +10,32 @@ from ..utils import (
      ExtractorError,
      int_or_none,
      NO_DEFAULT,
-    sanitized_Request,
      urlencode_postdata,
  )
  
  
  class XFileShareIE(InfoExtractor):
      _SITES = (
-        ('daclips.in', 'DaClips'),
-        ('filehoot.com', 'FileHoot'),
-        ('gorillavid.in', 'GorillaVid'),
-        ('movpod.in', 'MovPod'),
-        ('powerwatch.pw', 'PowerWatch'),
-        ('rapidvideo.ws', 'Rapidvideo.ws'),
-        ('thevideobee.to', 'TheVideoBee'),
-        ('vidto.me', 'Vidto'),
-        ('streamin.to', 'Streamin.To'),
-        ('xvidstage.com', 'XVIDSTAGE'),
-        ('vidabc.com', 'Vid ABC'),
-        ('vidbom.com', 'VidBom'),
-        ('vidlo.us', 'vidlo'),
+        (r'daclips\.(?:in|com)', 'DaClips'),
+        (r'filehoot\.com', 'FileHoot'),
+        (r'gorillavid\.(?:in|com)', 'GorillaVid'),
+        (r'movpod\.in', 'MovPod'),
+        (r'powerwatch\.pw', 'PowerWatch'),
+        (r'rapidvideo\.ws', 'Rapidvideo.ws'),
+        (r'thevideobee\.to', 'TheVideoBee'),
+        (r'vidto\.(?:me|se)', 'Vidto'),
+        (r'streamin\.to', 'Streamin.To'),
+        (r'xvidstage\.com', 'XVIDSTAGE'),
+        (r'vidabc\.com', 'Vid ABC'),
+        (r'vidbom\.com', 'VidBom'),
+        (r'vidlo\.us', 'vidlo'),
+        (r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
+        (r'fastvideo\.me', 'FastVideo.me'),
      )
  
      IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
      _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
-                  % '|'.join(re.escape(site) for site in list(zip(*_SITES))[0]))
+                  % '|'.join(site for site in list(zip(*_SITES))[0]))
  
      _FILE_NOT_FOUND_REGEXES = (
          r'>(?:404 - )?File Not Found<',
@@ -109,8 +110,26 @@ class XFileShareIE(InfoExtractor):
          'params': {
              'skip_download': True,
          },
+    }, {
+        'url': 'http://www.rapidvideo.cool/b667kprndr8w',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
+        'only_matching': True,
+    }, {
+        'url': 'http://vidto.se/1tx1pf6t12cg.html',
+        'only_matching': True,
      }]
  
+    @staticmethod
+    def _extract_urls(webpage):
+        return [
+            mobj.group('url')
+            for mobj in re.finditer(
+                r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
+                % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
+                webpage)]
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          video_id = mobj.group('id')
@@ -130,12 +149,12 @@ class XFileShareIE(InfoExtractor):
              if countdown:
                  self._sleep(countdown, video_id)
  
-            post = urlencode_postdata(fields)
-
-            req = sanitized_Request(url, post)
-            req.add_header('Content-type', 'application/x-www-form-urlencoded')
-
-            webpage = self._download_webpage(req, video_id, 'Downloading video page')
+            webpage = self._download_webpage(
+                url, video_id, 'Downloading video page',
+                data=urlencode_postdata(fields), headers={
+                    'Referer': url,
+                    'Content-type': 'application/x-www-form-urlencoded',
+                })
  
          title = (self._search_regex(
              (r'style="z-index: [0-9]+;">([^<]+)</span>',
@@ -150,7 +169,7 @@ class XFileShareIE(InfoExtractor):
          def extract_formats(default=NO_DEFAULT):
              urls = []
              for regex in (
-                    r'file\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
+                    r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
                      r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
                      r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
                      r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):