From: Philipp Hagemeister Date: Sun, 5 Jan 2014 02:48:45 +0000 (+0100) Subject: Merge remote-tracking branch 'rzhxeo/blip' X-Git-Url: http://git.bitcoin.ninja/index.cgi?p=youtube-dl;a=commitdiff_plain;h=aff24732b96b5ec89cb41a05fd132e12c5990caf;hp=-c Merge remote-tracking branch 'rzhxeo/blip' Conflicts: youtube_dl/extractor/bliptv.py --- aff24732b96b5ec89cb41a05fd132e12c5990caf diff --combined youtube_dl/extractor/bliptv.py index 159ea39d9,8a8c2e7a8..4f1272f29 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@@ -1,5 -1,3 +1,5 @@@ +from __future__ import unicode_literals + import datetime import json import re @@@ -22,41 -20,37 +22,36 @@@ from ..utils import class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" - _VALID_URL = r'^(?:https?://)?(?:www\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' - IE_NAME = 'blip.tv' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' - _URL_EXT = r'^.*\.([a-z0-9]+)$' - IE_NAME = u'blip.tv' ++ _TEST = { - u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', - u'file': u'5779306.m4v', - u'md5': u'80baf1ec5c3d2019037c1c707d676b9f', - u'info_dict': { - u"upload_date": u"20111205", - u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596", - u"uploader": u"Comic Book Resources - CBR TV", - u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" + 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', + 'file': '5779306.mov', + 'md5': 'c6934ad0b6acf2bd920720ec888eb812', + 'info_dict': { + 'upload_date': '20111205', + 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', + 'uploader': 'Comic Book Resources - CBR TV', + 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3', } } def report_direct_download(self, title): """Report information extraction.""" - self.to_screen(u'%s: Direct download detected' % title) + self.to_screen('%s: Direct download detected' % title) def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError('Invalid URL: %s' % url) # See https://github.com/rg3/youtube-dl/issues/857 - api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P[\d\w]+)', url) - if api_mobj is not None: - url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') - urlp = compat_urllib_parse_urlparse(url) - if urlp.path.startswith('/play/'): - response = self._request_webpage(url, None, False) - redirecturl = response.geturl() - rurlp = compat_urllib_parse_urlparse(redirecturl) - file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2] - url = 'http://blip.tv/a/a-' + file_id - return self._real_extract(url) + embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url) + if embed_mobj: + info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1) + info_page = self._download_webpage(info_url, embed_mobj.group(1)) - video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, u'video_id') - return self.url_result('http://blip.tv/a/a-'+video_id, 'BlipTV') ++ video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, 'video_id') ++ return self.url_result('http://blip.tv/a/a-' + video_id, 'BlipTV') if '?' in url: cchar = '&' @@@ -67,13 -61,13 +62,13 @@@ request.add_header('User-Agent', 'iTunes/10.6.1') self.report_extraction(mobj.group(1)) urlh = self._request_webpage(request, None, False, - u'unable to download video info webpage') + 'unable to download video info webpage') try: json_code_bytes = urlh.read() json_code = json_code_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) + raise ExtractorError('Unable to read video info webpage: %s' % compat_str(err)) try: json_data = json.loads(json_code) @@@ -83,38 -77,32 +78,38 @@@ data = json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + formats = [] if 'additionalMedia' in data: - formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])) - best_format = formats[-1] - video_url = best_format['url'] + for f in sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])): + if not int(f['media_width']): # filter m3u8 + continue + formats.append({ + 'url': f['url'], + 'format_id': f['role'], + 'width': int(f['media_width']), + 'height': int(f['media_height']), + }) else: - video_url = data['media']['url'] - umobj = re.match(self._URL_EXT, video_url) - if umobj is None: - raise ValueError('Can not determine filename extension') - ext = umobj.group(1) + formats.append({ + 'url': data['media']['url'], + 'width': int(data['media']['width']), + 'height': int(data['media']['height']), + }) + + self._sort_formats(formats) return { 'id': compat_str(data['item_id']), - 'url': video_url, 'uploader': data['display_name'], 'upload_date': upload_date, 'title': data['title'], - 'ext': ext, - 'format': data['media']['mimeType'], 'thumbnail': data['thumbnailUrl'], 'description': data['description'], - 'player_url': data['embedUrl'], 'user_agent': 'iTunes/10.6.1', + 'formats': formats, } except (ValueError, KeyError) as err: - raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) + raise ExtractorError('Unable to parse video information: %s' % repr(err)) class BlipTVUserIE(InfoExtractor): @@@ -122,19 -110,19 +117,19 @@@ _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' _PAGE_SIZE = 12 - IE_NAME = u'blip.tv:user' + IE_NAME = 'blip.tv:user' def _real_extract(self, url): # Extract username mobj = re.match(self._VALID_URL, url) if mobj is None: - raise ExtractorError(u'Invalid URL: %s' % url) + raise ExtractorError('Invalid URL: %s' % url) username = mobj.group(1) page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' - page = self._download_webpage(url, username, u'Downloading user page') + page = self._download_webpage(url, username, 'Downloading user page') mobj = re.search(r'data-users-id="([^"]+)"', page) page_base = page_base % mobj.group(1) @@@ -150,7 -138,7 +145,7 @@@ while True: url = page_base + "&page=" + str(pagenum) page = self._download_webpage(url, username, - u'Downloading video ids from page %d' % pagenum) + 'Downloading video ids from page %d' % pagenum) # Extract video identifiers ids_in_page = [] @@@ -172,6 -160,6 +167,6 @@@ pagenum += 1 - urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] + urls = ['http://blip.tv/%s' % video_id for video_id in video_ids] url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls] return [self.playlist_result(url_entries, playlist_title = username)] diff --combined youtube_dl/extractor/generic.py index 377ae91c4,f1b152f62..57a6b1820 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@@ -162,8 -162,6 +162,8 @@@ class GenericIE(InfoExtractor) return self.url_result('http://' + url) video_id = os.path.splitext(url.split('/')[-1])[0] + self.to_screen(u'%s: Requesting header' % video_id) + try: response = self._send_head(url) @@@ -273,16 -271,12 +273,12 @@@ } # Look for embedded blip.tv player - mobj = re.search(r']*https?://api.blip.tv/\w+/redirect/\w+/(\d+)', webpage) + mobj = re.search(r']*https?://api\.blip\.tv/\w+/redirect/\w+/(\d+)', webpage) if mobj: - return self.url_result('http://blip.tv/seo/-'+mobj.group(1), 'BlipTV') - mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*https?://(?:\w+\.)?blip.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', webpage) + return self.url_result('http://blip.tv/a/a-'+mobj.group(1), 'BlipTV') + mobj = re.search(r'<(?:iframe|embed|object)\s[^>]*(https?://(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)[a-zA-Z0-9]+)', webpage) if mobj: - player_url = 'http://blip.tv/play/%s.x?p=1' % mobj.group(1) - player_page = self._download_webpage(player_url, mobj.group(1)) - blip_video_id = self._search_regex(r'data-episode-id="(\d+)', player_page, u'blip_video_id', fatal=False) - if blip_video_id: - return self.url_result('http://blip.tv/seo/-'+blip_video_id, 'BlipTV') + return self.url_result(mobj.group(1), 'BlipTV') # Look for Bandcamp pages with custom domain mobj = re.search(r']*?content="(.*?bandcamp\.com.*?)"', webpage)