From: Vignesh Venkat Date: Sat, 9 Jan 2016 23:31:50 +0000 (-0800) Subject: [Bigflix] Add new extractor for bigflix.com X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=0a899a1448328648927e18e43c6e2274d2706396;p=youtube-dl [Bigflix] Add new extractor for bigflix.com Add an IE to support bigflix.com. It uses some sort of silverlight plugin whose video url is being populated using base64 encoded flashvars. So it is quite straightforward to extract. --- diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index a9d23b8f4..40c42d4d2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -61,6 +61,7 @@ from .beeg import BeegIE from .behindkink import BehindKinkIE from .beatportpro import BeatportProIE from .bet import BetIE +from .bigflix import BigflixIE from .bild import BildIE from .bilibili import BiliBiliIE from .bleacherreport import ( diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dl/extractor/bigflix.py new file mode 100644 index 000000000..aeea1a002 --- /dev/null +++ b/youtube_dl/extractor/bigflix.py @@ -0,0 +1,42 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from base64 import b64decode + +from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote + + +class BigflixIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?bigflix\.com/.*/(?P[0-9]+)' + _TEST = { + 'url': 'http://www.bigflix.com/Hindi-movies/Action-movies/Singham-Returns/16537', + 'md5': 'ec76aa9b1129e2e5b301a474e54fab74', + 'info_dict': { + 'id': '16537', + 'ext': 'mp4', + 'title': 'Singham Returns', + 'description': 'md5:3d2ba5815f14911d5cc6a501ae0cf65d', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + + webpage = self._download_webpage(url, video_id) + + title = self._html_search_regex( + r']+class=["\']pagetitle["\'][^>]*>(.+?)', + webpage, 'title') + + video_url = b64decode(compat_urllib_parse_unquote(self._search_regex( + r'file=([^&]+)', webpage, 'video url')).encode('ascii')).decode('utf-8') + + description = self._html_search_meta('description', webpage) + + return { + 'id': video_id, + 'title': title, + 'url': video_url, + 'description': description, + }