[Bigflix] Add new extractor for bigflix.com
authorVignesh Venkat <vigneshacitizenofindia@gmail.com>
Sat, 9 Jan 2016 23:31:50 +0000 (15:31 -0800)
committerVignesh Venkat <vigneshacitizenofindia@gmail.com>
Sun, 10 Jan 2016 03:45:58 +0000 (19:45 -0800)
Add an IE to support bigflix.com. It uses some sort of silverlight
plugin whose video url is being populated using base64 encoded
flashvars. So it is quite straightforward to extract.

youtube_dl/extractor/__init__.py
youtube_dl/extractor/bigflix.py [new file with mode: 0644]

index a9d23b8f4fa6afdc81edafe23d672b0b9dbc51fd..40c42d4d2f695fdfe4af8d7c14dcf7e703f91936 100644 (file)
@@ -61,6 +61,7 @@ from .beeg import BeegIE
 from .behindkink import BehindKinkIE
 from .beatportpro import BeatportProIE
 from .bet import BetIE
+from .bigflix import BigflixIE
 from .bild import BildIE
 from .bilibili import BiliBiliIE
 from .bleacherreport import (
diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py
new file mode 100644 (file)
index 0000000..aeea1a0
--- /dev/null
@@ -0,0 +1,42 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from base64 import b64decode
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+
+
+class BigflixIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.*/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537',
+        'md5': 'ec76aa9b1129e2e5b301a474e54fab74',
+        'info_dict': {
+            'id': '16537',
+            'ext': 'mp4',
+            'title': 'Singham Returns',
+            'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d',
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._html_search_regex(
+            r'<div[^>]+class=["\']pagetitle["\'][^>]*>(.+?)</div>',
+            webpage, 'title')
+
+        video_url = b64decode(compat_urllib_parse_unquote(self._search_regex(
+            r'file=([^&]+)', webpage, 'video url')).encode('ascii')).decode('utf-8')
+
+        description = self._html_search_meta('description', webpage)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'description': description,
+        }