X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2Fextractor%2Fbliptv.py;h=8a8c2e7a88583ec49bc87000712aa3cd8d5fed1a;hb=c0f9969b9e297a2c4ceb87017081792e13f4cdc6;hp=df2ad4be2a454ff7f864e775586970e126787a37;hpb=825e0984e27f0c626c4d072066e0c9cae9069704;p=youtube-dl diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index df2ad4be2..8a8c2e7a8 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -1,6 +1,5 @@ import datetime import json -import os import re import socket @@ -24,6 +23,17 @@ class BlipTVIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv/((.+/)|(play/)|(api\.swf#))(.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' IE_NAME = u'blip.tv' + _TEST = { + u'url': u'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', + u'file': u'5779306.m4v', + u'md5': u'80baf1ec5c3d2019037c1c707d676b9f', + u'info_dict': { + u"upload_date": u"20111205", + u"description": u"md5:9bc31f227219cde65e47eeec8d2dc596", + u"uploader": u"Comic Book Resources - CBR TV", + u"title": u"CBR EXCLUSIVE: \"Gotham City Imposters\" Bats VS Jokerz Short 3" + } + } def report_direct_download(self, title): """Report information extraction.""" @@ -35,19 +45,12 @@ class BlipTVIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) # See https://github.com/rg3/youtube-dl/issues/857 - api_mobj = re.match(r'http://a\.blip\.tv/api\.swf#(?P[\d\w]+)', url) - if api_mobj is not None: - url = 'http://blip.tv/play/g_%s' % api_mobj.group('video_id') - urlp = compat_urllib_parse_urlparse(url) - if urlp.path.startswith('/play/'): - request = compat_urllib_request.Request(url) - response = compat_urllib_request.urlopen(request) - redirecturl = response.geturl() - rurlp = compat_urllib_parse_urlparse(redirecturl) - file_id = compat_parse_qs(rurlp.fragment)['file'][0].rpartition('/')[2] - url = 'http://blip.tv/a/a-' + file_id - return self._real_extract(url) - + embed_mobj = re.search(r'^(?:https?://)?(?:\w+\.)?blip\.tv/(?:play/|api\.swf#)([a-zA-Z0-9]+)', url) + if embed_mobj: + info_url = 'http://blip.tv/play/%s.x?p=1' % embed_mobj.group(1) + info_page = self._download_webpage(info_url, embed_mobj.group(1)) + video_id = self._search_regex(r'data-episode-id="(\d+)', info_page, u'video_id') + return self.url_result('http://blip.tv/a/a-'+video_id, 'BlipTV') if '?' in url: cchar = '&' @@ -57,64 +60,49 @@ class BlipTVIE(InfoExtractor): request = compat_urllib_request.Request(json_url) request.add_header('User-Agent', 'iTunes/10.6.1') self.report_extraction(mobj.group(1)) - info = None + urlh = self._request_webpage(request, None, False, + u'unable to download video info webpage') + try: - urlh = compat_urllib_request.urlopen(request) - if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download - basename = url.split('/')[-1] - title,ext = os.path.splitext(basename) - title = title.decode('UTF-8') - ext = ext.replace('.', '') - self.report_direct_download(title) - info = { - 'id': title, - 'url': url, - 'uploader': None, - 'upload_date': None, - 'title': title, - 'ext': ext, - 'urlhandle': urlh - } + json_code_bytes = urlh.read() + json_code = json_code_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) - if info is None: # Regular URL - try: - json_code_bytes = urlh.read() - json_code = json_code_bytes.decode('utf-8') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) - - try: - json_data = json.loads(json_code) - if 'Post' in json_data: - data = json_data['Post'] - else: - data = json_data - - upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + raise ExtractorError(u'Unable to read video info webpage: %s' % compat_str(err)) + + try: + json_data = json.loads(json_code) + if 'Post' in json_data: + data = json_data['Post'] + else: + data = json_data + + upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') + if 'additionalMedia' in data: + formats = sorted(data['additionalMedia'], key=lambda f: int(f['media_height'])) + best_format = formats[-1] + video_url = best_format['url'] + else: video_url = data['media']['url'] - umobj = re.match(self._URL_EXT, video_url) - if umobj is None: - raise ValueError('Can not determine filename extension') - ext = umobj.group(1) - - info = { - 'id': data['item_id'], - 'url': video_url, - 'uploader': data['display_name'], - 'upload_date': upload_date, - 'title': data['title'], - 'ext': ext, - 'format': data['media']['mimeType'], - 'thumbnail': data['thumbnailUrl'], - 'description': data['description'], - 'player_url': data['embedUrl'], - 'user_agent': 'iTunes/10.6.1', - } - except (ValueError,KeyError) as err: - raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) - - return [info] + umobj = re.match(self._URL_EXT, video_url) + if umobj is None: + raise ValueError('Can not determine filename extension') + ext = umobj.group(1) + + return { + 'id': compat_str(data['item_id']), + 'url': video_url, + 'uploader': data['display_name'], + 'upload_date': upload_date, + 'title': data['title'], + 'ext': ext, + 'format': data['media']['mimeType'], + 'thumbnail': data['thumbnailUrl'], + 'description': data['description'], + 'player_url': data['embedUrl'], + 'user_agent': 'iTunes/10.6.1', + } + except (ValueError, KeyError) as err: + raise ExtractorError(u'Unable to parse video information: %s' % repr(err)) class BlipTVUserIE(InfoExtractor): @@ -173,5 +161,5 @@ class BlipTVUserIE(InfoExtractor): pagenum += 1 urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] - url_entries = [self.url_result(url, 'BlipTV') for url in urls] + url_entries = [self.url_result(vurl, 'BlipTV') for vurl in urls] return [self.playlist_result(url_entries, playlist_title = username)]