[xfileshare:xvidstage] Add support for videos with packed codes (Closes #4335)
authorSergey M․ <dstftw@gmail.com>
Sun, 12 Jun 2016 18:11:04 +0000 (01:11 +0700)
committerSergey M․ <dstftw@gmail.com>
Sun, 12 Jun 2016 18:11:04 +0000 (01:11 +0700)
youtube_dl/extractor/xfileshare.py

index ee4d04c205a1f6f0ea2ba2e047399ca04877fa38..fe0ab6300bf1369b516c27f2f055047f9816051f 100644 (file)
@@ -5,8 +5,10 @@ import re
 
 from .common import InfoExtractor
 from ..utils import (
+    decode_packed_codes,
     ExtractorError,
     int_or_none,
+    NO_DEFAULT,
     sanitized_Request,
     urlencode_postdata,
 )
@@ -23,6 +25,7 @@ class XFileShareIE(InfoExtractor):
         ('thevideobee.to', 'TheVideoBee'),
         ('vidto.me', 'Vidto'),
         ('streamin.to', 'Streamin.To'),
+        ('xvidstage.com', 'XVIDSTAGE'),
     )
 
     IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
@@ -78,6 +81,13 @@ class XFileShareIE(InfoExtractor):
             'ext': 'mp4',
             'title': 'Big Buck Bunny trailer',
         },
+    }, {
+        'url': 'http://xvidstage.com/e0qcnl03co6z',
+        'info_dict': {
+            'id': 'e0qcnl03co6z',
+            'ext': 'mp4',
+            'title': 'Chucky Prank 2015.mp4',
+        },
     }]
 
     def _real_extract(self, url):
@@ -113,10 +123,23 @@ class XFileShareIE(InfoExtractor):
              r'>Watch (.+) ',
              r'<h2 class="video-page-head">([^<]+)</h2>'],
             webpage, 'title', default=None) or self._og_search_title(webpage)).strip()
-        video_url = self._search_regex(
-            [r'file\s*:\s*["\'](http[^"\']+)["\'],',
-             r'file_link\s*=\s*\'(https?:\/\/[0-9a-zA-z.\/\-_]+)'],
-            webpage, 'file url')
+
+        def extract_video_url(default=NO_DEFAULT):
+            return self._search_regex(
+                (r'file\s*:\s*(["\'])(?P<url>http.+?)\1,',
+                 r'file_link\s*=\s*(["\'])(?P<url>http.+?)\1',
+                 r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http.+?)\2\)',
+                 r'<embed[^>]+src=(["\'])(?P<url>http.+?)\1'),
+                webpage, 'file url', default=default, group='url')
+
+        video_url = extract_video_url(default=None)
+
+        if not video_url:
+            webpage = decode_packed_codes(self._search_regex(
+                r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
+                webpage, 'packed code'))
+            video_url = extract_video_url()
+
         thumbnail = self._search_regex(
             r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)