X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=sidebyside;f=youtube_dl%2FInfoExtractors.py;h=3a6e84ebb10d6e283668b32a5b24d122dd4144c9;hb=0be41ec241d8308378c134d803f6b67b93a6c8de;hp=1b37eb648bac41ef29f6fb4b638fc669355affa4;hpb=2b5b2cb84cb380eec0433f9e6b6ed92181108d96;p=youtube-dl diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 1b37eb648..3a6e84ebb 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -666,7 +666,8 @@ class DailymotionIE(InfoExtractor): request.add_header('Cookie', 'family_filter=off') try: self.report_download_webpage(video_id) - webpage = compat_urllib_request.urlopen(request).read() + webpage_bytes = compat_urllib_request.urlopen(request).read() + webpage = webpage_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return @@ -701,7 +702,7 @@ class DailymotionIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return - video_title = unescapeHTML(mobj.group('title').decode('utf-8')) + video_title = unescapeHTML(mobj.group('title')) video_uploader = None mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) @@ -721,12 +722,12 @@ class DailymotionIE(InfoExtractor): video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) return [{ - 'id': video_id.decode('utf-8'), - 'url': video_url.decode('utf-8'), - 'uploader': video_uploader.decode('utf-8'), + 'id': video_id, + 'url': video_url, + 'uploader': video_uploader, 'upload_date': video_upload_date, 'title': video_title, - 'ext': video_extension.decode('utf-8'), + 'ext': video_extension, }] @@ -1061,7 +1062,8 @@ class VimeoIE(InfoExtractor): request = compat_urllib_request.Request(url, None, std_headers) try: self.report_download_webpage(video_id) - webpage = compat_urllib_request.urlopen(request).read() + webpage_bytes = compat_urllib_request.urlopen(request).read() + webpage = webpage_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -1089,7 +1091,7 @@ class VimeoIE(InfoExtractor): video_thumbnail = config["video"]["thumbnail"] # Extract video description - video_description = get_element_by_id("description", webpage.decode('utf8')) + video_description = get_element_by_id("description", webpage) if video_description: video_description = clean_html(video_description) else: video_description = '' @@ -1407,22 +1409,22 @@ class GenericIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return - video_title = mobj.group(1).decode('utf-8') + video_title = mobj.group(1) # video uploader is domain name mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return - video_uploader = mobj.group(1).decode('utf-8') + video_uploader = mobj.group(1) return [{ - 'id': video_id.decode('utf-8'), - 'url': video_url.decode('utf-8'), + 'id': video_id, + 'url': video_url, 'uploader': video_uploader, 'upload_date': None, 'title': video_title, - 'ext': video_extension.decode('utf-8'), + 'ext': video_extension, }] @@ -2770,13 +2772,14 @@ class XVideosIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - video_id = mobj.group(1).decode('utf-8') + video_id = mobj.group(1) self.report_webpage(video_id) request = compat_urllib_request.Request(r'http://www.xvideos.com/video' + video_id) try: - webpage = compat_urllib_request.urlopen(request).read() + webpage_bytes = compat_urllib_request.urlopen(request).read() + webpage = webpage_bytes.decode('utf-8', 'replace') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return @@ -2789,7 +2792,7 @@ class XVideosIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video url') return - video_url = compat_urllib_parse.unquote(mobj.group(1).decode('utf-8')) + video_url = compat_urllib_parse.unquote(mobj.group(1)) # Extract title @@ -2797,7 +2800,7 @@ class XVideosIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = mobj.group(1).decode('utf-8') + video_title = mobj.group(1) # Extract video thumbnail @@ -2805,7 +2808,7 @@ class XVideosIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video thumbnail') return - video_thumbnail = mobj.group(0).decode('utf-8') + video_thumbnail = mobj.group(0) info = { 'id': video_id, @@ -3348,7 +3351,8 @@ class YoukuIE(InfoExtractor): self.report_extraction(video_id) try: - config = json.loads(jsondata) + jsonstr = jsondata.decode('utf-8') + config = json.loads(jsonstr) video_title = config['data'][0]['title'] seed = config['data'][0]['seed'] @@ -3371,15 +3375,8 @@ class YoukuIE(InfoExtractor): fileid = config['data'][0]['streamfileids'][format] - seg_number = len(config['data'][0]['segs'][format]) - - keys=[] - for i in xrange(seg_number): - keys.append(config['data'][0]['segs'][format][i]['k']) - - #TODO check error - #youku only could be viewed from mainland china - except: + keys = [s['k'] for s in config['data'][0]['segs'][format]] + except (UnicodeDecodeError, ValueError, KeyError): self._downloader.trouble(u'ERROR: unable to extract info section') return @@ -3429,13 +3426,14 @@ class XNXXIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - video_id = mobj.group(1).decode('utf-8') + video_id = mobj.group(1) self.report_webpage(video_id) # Get webpage content try: - webpage = compat_urllib_request.urlopen(url).read() + webpage_bytes = compat_urllib_request.urlopen(url).read() + webpage = webpage_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err) return @@ -3444,19 +3442,19 @@ class XNXXIE(InfoExtractor): if result is None: self._downloader.trouble(u'ERROR: unable to extract video url') return - video_url = compat_urllib_parse.unquote(result.group(1).decode('utf-8')) + video_url = compat_urllib_parse.unquote(result.group(1)) result = re.search(self.VIDEO_TITLE_RE, webpage) if result is None: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = result.group(1).decode('utf-8') + video_title = result.group(1) result = re.search(self.VIDEO_THUMB_RE, webpage) if result is None: self._downloader.trouble(u'ERROR: unable to extract video thumbnail') return - video_thumbnail = result.group(1).decode('utf-8') + video_thumbnail = result.group(1) return [{ 'id': video_id, @@ -3590,3 +3588,49 @@ class GooglePlusIE(InfoExtractor): 'title': video_title.decode('utf-8'), 'ext': video_extension.decode('utf-8'), }] + +class NBAIE(InfoExtractor): + _VALID_URL = r'^(?:https?://)?(?:watch\.|www\.)?nba\.com/(?:nba/)?video(/[^?]*)(\?.*)?$' + IE_NAME = u'nba' + + def report_extraction(self, video_id): + self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + if video_id.endswith('/index.html'): + video_id = video_id[:-len('/index.html')] + + self.report_extraction(video_id) + try: + urlh = compat_urllib_request.urlopen(url) + webpage_bytes = urlh.read() + webpage = webpage_bytes.decode('utf-8', 'ignore') + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err)) + return + + video_url = u'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4' + def _findProp(rexp, default=None): + m = re.search(rexp, webpage) + if m: + return unescapeHTML(m.group(1)) + else: + return default + + shortened_video_id = video_id.rpartition('/')[2] + title = _findProp(r'Date: (.*?)'), + 'description': _findProp(r'
(.*?)'), + } + return [info]