X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2Fextractor%2Fbreakcom.py;h=68c7cf2bba49c1cb4374ae00f6262214c1890551;hb=HEAD;hp=9b0aa2b1b9ef9db433257829d45c4a38f437b140;hpb=f78c01f68b1e16e385ec319c7f5d351b173c7ec5;p=youtube-dl diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dl/extractor/breakcom.py index 9b0aa2b1b..68c7cf2bb 100644 --- a/youtube_dl/extractor/breakcom.py +++ b/youtube_dl/extractor/breakcom.py @@ -1,20 +1,39 @@ from __future__ import unicode_literals import re -import json from .common import InfoExtractor +from .youtube import YoutubeIE +from ..utils import ( + int_or_none, + url_or_none, +) class BreakIE(InfoExtractor): - _VALID_URL = r'http://(?:www\.)?break\.com/video/(?:[^/]+/)*.+-(?P\d+)' + _VALID_URL = r'https?://(?:www\.)?break\.com/video/(?P[^/]+?)(?:-(?P\d+))?(?:[/?#&]|$)' _TESTS = [{ 'url': 'http://www.break.com/video/when-girls-act-like-guys-2468056', - 'md5': '33aa4ff477ecd124d18d7b5d23b87ce5', 'info_dict': { 'id': '2468056', 'ext': 'mp4', 'title': 'When Girls Act Like D-Bags', + 'age_limit': 13, + }, + }, { + # youtube embed + 'url': 'http://www.break.com/video/someone-forgot-boat-brakes-work', + 'info_dict': { + 'id': 'RrrDLdeL2HQ', + 'ext': 'mp4', + 'title': 'Whale Watching Boat Crashing Into San Diego Dock', + 'description': 'md5:afc1b2772f0a8468be51dd80eb021069', + 'upload_date': '20160331', + 'uploader': 'Steve Holden', + 'uploader_id': 'sdholden07', + }, + 'params': { + 'skip_download': True, } }, { 'url': 'http://www.break.com/video/ugc/baby-flex-2773063', @@ -22,21 +41,51 @@ class BreakIE(InfoExtractor): }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage( - 'http://www.break.com/embed/%s' % video_id, video_id) - info = json.loads(self._search_regex( - r'var embedVars = ({.*})\s*?', - webpage, 'info json', flags=re.DOTALL)) - video_url = info['videoUri'] - youtube_id = info.get('youtubeId') - if youtube_id: - return self.url_result(youtube_id, 'Youtube') - - final_url = video_url + '?' + info['AuthToken'] + display_id, video_id = re.match(self._VALID_URL, url).groups() + + webpage = self._download_webpage(url, display_id) + + youtube_url = YoutubeIE._extract_url(webpage) + if youtube_url: + return self.url_result(youtube_url, ie=YoutubeIE.ie_key()) + + content = self._parse_json( + self._search_regex( + r'(?s)content["\']\s*:\s*(\[.+?\])\s*[,\n]', webpage, + 'content'), + display_id) + + formats = [] + for video in content: + video_url = url_or_none(video.get('url')) + if not video_url: + continue + bitrate = int_or_none(self._search_regex( + r'(\d+)_kbps', video_url, 'tbr', default=None)) + formats.append({ + 'url': video_url, + 'format_id': 'http-%d' % bitrate if bitrate else 'http', + 'tbr': bitrate, + }) + self._sort_formats(formats) + + title = self._search_regex( + (r'title["\']\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', + r']*>(?P[^<]+)'), webpage, 'title', group='value') + + def get(key, name): + return int_or_none(self._search_regex( + r'%s["\']\s*:\s*["\'](\d+)' % key, webpage, name, + default=None)) + + age_limit = get('ratings', 'age limit') + video_id = video_id or get('pid', 'video id') or display_id + return { 'id': video_id, - 'url': final_url, - 'title': info['contentName'], - 'thumbnail': info['thumbUri'], + 'display_id': display_id, + 'title': title, + 'thumbnail': self._og_search_thumbnail(webpage), + 'age_limit': age_limit, + 'formats': formats, }