Merge branch 'master' into subtitles_rework
[youtube-dl] / youtube_dl / extractor / breakcom.py
1 import re
2 import json
3
4 from .common import InfoExtractor
5 from ..utils import determine_ext
6
7
8 class BreakIE(InfoExtractor):
9     _VALID_URL = r'(?:http://)?(?:www\.)?break\.com/video/([^/]+)'
10     _TEST = {
11         u'url': u'http://www.break.com/video/when-girls-act-like-guys-2468056',
12         u'file': u'2468056.mp4',
13         u'md5': u'a3513fb1547fba4fb6cfac1bffc6c46b',
14         u'info_dict': {
15             u"title": u"When Girls Act Like D-Bags"
16         }
17     }
18
19     def _real_extract(self, url):
20         mobj = re.match(self._VALID_URL, url)
21         video_id = mobj.group(1).split("-")[-1]
22         embed_url = 'http://www.break.com/embed/%s' % video_id
23         webpage = self._download_webpage(embed_url, video_id)
24         info_json = self._search_regex(r'var embedVars = ({.*?});', webpage,
25                                        u'info json', flags=re.DOTALL)
26         info = json.loads(info_json)
27         video_url = info['videoUri']
28         m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', video_url)
29         if m_youtube is not None:
30             return self.url_result(m_youtube.group(1), 'Youtube')
31         final_url = video_url + '?' + info['AuthToken']
32         return [{
33             'id':        video_id,
34             'url':       final_url,
35             'ext':       determine_ext(final_url),
36             'title':     info['contentName'],
37             'thumbnail': info['thumbUri'],
38         }]