X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=3a6e84ebb10d6e283668b32a5b24d122dd4144c9;hb=0be41ec241d8308378c134d803f6b67b93a6c8de;hp=b7fedabb2efe521fb3c75830c5d7883027427fa5;hpb=4cc3d0742663c92ef5cf89927edee4300666ea69;p=youtube-dl diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index b7fedabb2..3a6e84ebb 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -666,7 +666,8 @@ class DailymotionIE(InfoExtractor): request.add_header('Cookie', 'family_filter=off') try: self.report_download_webpage(video_id) - webpage = compat_urllib_request.urlopen(request).read() + webpage_bytes = compat_urllib_request.urlopen(request).read() + webpage = webpage_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return @@ -701,7 +702,7 @@ class DailymotionIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return - video_title = unescapeHTML(mobj.group('title').decode('utf-8')) + video_title = unescapeHTML(mobj.group('title')) video_uploader = None mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) @@ -721,12 +722,12 @@ class DailymotionIE(InfoExtractor): video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) return [{ - 'id': video_id.decode('utf-8'), - 'url': video_url.decode('utf-8'), - 'uploader': video_uploader.decode('utf-8'), + 'id': video_id, + 'url': video_url, + 'uploader': video_uploader, 'upload_date': video_upload_date, 'title': video_title, - 'ext': video_extension.decode('utf-8'), + 'ext': video_extension, }] @@ -1061,7 +1062,8 @@ class VimeoIE(InfoExtractor): request = compat_urllib_request.Request(url, None, std_headers) try: self.report_download_webpage(video_id) - webpage = compat_urllib_request.urlopen(request).read() + webpage_bytes = compat_urllib_request.urlopen(request).read() + webpage = webpage_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -1089,7 +1091,7 @@ class VimeoIE(InfoExtractor): video_thumbnail = config["video"]["thumbnail"] # Extract video description - video_description = get_element_by_id("description", webpage.decode('utf8')) + video_description = get_element_by_id("description", webpage) if video_description: video_description = clean_html(video_description) else: video_description = '' @@ -1407,22 +1409,22 @@ class GenericIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return - video_title = mobj.group(1).decode('utf-8') + video_title = mobj.group(1) # video uploader is domain name mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return - video_uploader = mobj.group(1).decode('utf-8') + video_uploader = mobj.group(1) return [{ - 'id': video_id.decode('utf-8'), - 'url': video_url.decode('utf-8'), + 'id': video_id, + 'url': video_url, 'uploader': video_uploader, 'upload_date': None, 'title': video_title, - 'ext': video_extension.decode('utf-8'), + 'ext': video_extension, }] @@ -2770,13 +2772,14 @@ class XVideosIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - video_id = mobj.group(1).decode('utf-8') + video_id = mobj.group(1) self.report_webpage(video_id) request = compat_urllib_request.Request(r'http://www.xvideos.com/video' + video_id) try: - webpage = compat_urllib_request.urlopen(request).read() + webpage_bytes = compat_urllib_request.urlopen(request).read() + webpage = webpage_bytes.decode('utf-8', 'replace') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return @@ -2789,7 +2792,7 @@ class XVideosIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video url') return - video_url = compat_urllib_parse.unquote(mobj.group(1).decode('utf-8')) + video_url = compat_urllib_parse.unquote(mobj.group(1)) # Extract title @@ -2797,7 +2800,7 @@ class XVideosIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = mobj.group(1).decode('utf-8') + video_title = mobj.group(1) # Extract video thumbnail @@ -2805,7 +2808,7 @@ class XVideosIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video thumbnail') return - video_thumbnail = mobj.group(0).decode('utf-8') + video_thumbnail = mobj.group(0) info = { 'id': video_id, @@ -3348,7 +3351,8 @@ class YoukuIE(InfoExtractor): self.report_extraction(video_id) try: - config = json.loads(jsondata) + jsonstr = jsondata.decode('utf-8') + config = json.loads(jsonstr) video_title = config['data'][0]['title'] seed = config['data'][0]['seed'] @@ -3371,15 +3375,8 @@ class YoukuIE(InfoExtractor): fileid = config['data'][0]['streamfileids'][format] - seg_number = len(config['data'][0]['segs'][format]) - - keys=[] - for i in xrange(seg_number): - keys.append(config['data'][0]['segs'][format][i]['k']) - - #TODO check error - #youku only could be viewed from mainland china - except: + keys = [s['k'] for s in config['data'][0]['segs'][format]] + except (UnicodeDecodeError, ValueError, KeyError): self._downloader.trouble(u'ERROR: unable to extract info section') return @@ -3429,13 +3426,14 @@ class XNXXIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - video_id = mobj.group(1).decode('utf-8') + video_id = mobj.group(1) self.report_webpage(video_id) # Get webpage content try: - webpage = compat_urllib_request.urlopen(url).read() + webpage_bytes = compat_urllib_request.urlopen(url).read() + webpage = webpage_bytes.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err) return @@ -3444,19 +3442,19 @@ class XNXXIE(InfoExtractor): if result is None: self._downloader.trouble(u'ERROR: unable to extract video url') return - video_url = compat_urllib_parse.unquote(result.group(1).decode('utf-8')) + video_url = compat_urllib_parse.unquote(result.group(1)) result = re.search(self.VIDEO_TITLE_RE, webpage) if result is None: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = result.group(1).decode('utf-8') + video_title = result.group(1) result = re.search(self.VIDEO_THUMB_RE, webpage) if result is None: self._downloader.trouble(u'ERROR: unable to extract video thumbnail') return - video_thumbnail = result.group(1).decode('utf-8') + video_thumbnail = result.group(1) return [{ 'id': video_id,