request.add_header('Cookie', 'family_filter=off')
try:
self.report_download_webpage(video_id)
- webpage = compat_urllib_request.urlopen(request).read()
+ webpage_bytes = compat_urllib_request.urlopen(request).read()
+ webpage = webpage_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err))
return
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
- video_title = unescapeHTML(mobj.group('title').decode('utf-8'))
+ video_title = unescapeHTML(mobj.group('title'))
video_uploader = None
mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage)
video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
return [{
- 'id': video_id.decode('utf-8'),
- 'url': video_url.decode('utf-8'),
- 'uploader': video_uploader.decode('utf-8'),
+ 'id': video_id,
+ 'url': video_url,
+ 'uploader': video_uploader,
'upload_date': video_upload_date,
'title': video_title,
- 'ext': video_extension.decode('utf-8'),
+ 'ext': video_extension,
}]
request = compat_urllib_request.Request(url, None, std_headers)
try:
self.report_download_webpage(video_id)
- webpage = compat_urllib_request.urlopen(request).read()
+ webpage_bytes = compat_urllib_request.urlopen(request).read()
+ webpage = webpage_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err))
return
video_thumbnail = config["video"]["thumbnail"]
# Extract video description
- video_description = get_element_by_id("description", webpage.decode('utf8'))
+ video_description = get_element_by_id("description", webpage)
if video_description: video_description = clean_html(video_description)
else: video_description = ''
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
- video_title = mobj.group(1).decode('utf-8')
+ video_title = mobj.group(1)
# video uploader is domain name
mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract title')
return
- video_uploader = mobj.group(1).decode('utf-8')
+ video_uploader = mobj.group(1)
return [{
- 'id': video_id.decode('utf-8'),
- 'url': video_url.decode('utf-8'),
+ 'id': video_id,
+ 'url': video_url,
'uploader': video_uploader,
'upload_date': None,
'title': video_title,
- 'ext': video_extension.decode('utf-8'),
+ 'ext': video_extension,
}]
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
- video_id = mobj.group(1).decode('utf-8')
+ video_id = mobj.group(1)
self.report_webpage(video_id)
request = compat_urllib_request.Request(r'http://www.xvideos.com/video' + video_id)
try:
- webpage = compat_urllib_request.urlopen(request).read()
+ webpage_bytes = compat_urllib_request.urlopen(request).read()
+ webpage = webpage_bytes.decode('utf-8', 'replace')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err))
return
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
- video_url = compat_urllib_parse.unquote(mobj.group(1).decode('utf-8'))
+ video_url = compat_urllib_parse.unquote(mobj.group(1))
# Extract title
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
- video_title = mobj.group(1).decode('utf-8')
+ video_title = mobj.group(1)
# Extract video thumbnail
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
return
- video_thumbnail = mobj.group(0).decode('utf-8')
+ video_thumbnail = mobj.group(0)
info = {
'id': video_id,
self.report_extraction(video_id)
try:
- config = json.loads(jsondata)
+ jsonstr = jsondata.decode('utf-8')
+ config = json.loads(jsonstr)
video_title = config['data'][0]['title']
seed = config['data'][0]['seed']
fileid = config['data'][0]['streamfileids'][format]
- seg_number = len(config['data'][0]['segs'][format])
-
- keys=[]
- for i in xrange(seg_number):
- keys.append(config['data'][0]['segs'][format][i]['k'])
-
- #TODO check error
- #youku only could be viewed from mainland china
- except:
+ keys = [s['k'] for s in config['data'][0]['segs'][format]]
+ except (UnicodeDecodeError, ValueError, KeyError):
self._downloader.trouble(u'ERROR: unable to extract info section')
return
if mobj is None:
self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
return
- video_id = mobj.group(1).decode('utf-8')
+ video_id = mobj.group(1)
self.report_webpage(video_id)
# Get webpage content
try:
- webpage = compat_urllib_request.urlopen(url).read()
+ webpage_bytes = compat_urllib_request.urlopen(url).read()
+ webpage = webpage_bytes.decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
return
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
- video_url = compat_urllib_parse.unquote(result.group(1).decode('utf-8'))
+ video_url = compat_urllib_parse.unquote(result.group(1))
result = re.search(self.VIDEO_TITLE_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
- video_title = result.group(1).decode('utf-8')
+ video_title = result.group(1)
result = re.search(self.VIDEO_THUMB_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
return
- video_thumbnail = result.group(1).decode('utf-8')
+ video_thumbnail = result.group(1)
return [{
'id': video_id,