X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=f64b88d55c187c7c9dfa9ef874136e3fbb98387c;hb=a130bc6d024e9bfa3c7f8742f8bf5038b2c6e363;hp=cea30dad81fa4224a848732159aa19684c7d5dbc;hpb=1a9c655e3b1569f315d4193e877cba0b4a863c63;p=youtube-dl diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index cea30dad8..f64b88d55 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2,26 +2,17 @@ # -*- coding: utf-8 -*- import datetime -import HTMLParser -import httplib import netrc import os import re import socket import time -import urllib -import urllib2 import email.utils import xml.etree.ElementTree import random import math from urlparse import parse_qs -try: - import cStringIO as StringIO -except ImportError: - import StringIO - from utils import * @@ -29,37 +20,48 @@ class InfoExtractor(object): """Information Extractor class. Information extractors are the classes that, given a URL, extract - information from the video (or videos) the URL refers to. This - information includes the real video URL, the video title and simplified - title, author and others. The information is stored in a dictionary - which is then passed to the FileDownloader. The FileDownloader - processes this information possibly downloading the video to the file - system, among other possible outcomes. The dictionaries must include - the following fields: - - id: Video identifier. - url: Final video URL. - uploader: Nickname of the video uploader. - title: Literal title. - ext: Video filename extension. - format: Video format. - player_url: SWF Player URL (may be None). - - The following fields are optional. Their primary purpose is to allow - youtube-dl to serve as the backend for a video search function, such - as the one in youtube2mp3. They are only used when their respective - forced printing functions are called: - - thumbnail: Full URL to a video thumbnail image. - description: One-line video description. + information about the video (or videos) the URL refers to. This + information includes the real video URL, the video title, author and + others. The information is stored in a dictionary which is then + passed to the FileDownloader. The FileDownloader processes this + information possibly downloading the video to the file system, among + other possible outcomes. + + The dictionaries must include the following fields: + + id: Video identifier. + url: Final video URL. + uploader: Nickname of the video uploader, unescaped. + upload_date: Video upload date (YYYYMMDD). + title: Video title, unescaped. + ext: Video filename extension. + + The following fields are optional: + + format: The video format, defaults to ext (used for --get-format) + thumbnail: Full URL to a video thumbnail image. + description: One-line video description. + player_url: SWF Player URL (used for rtmpdump). + subtitles: The .srt file contents. + urlhandle: [internal] The urlHandle to be used to download the file, + like returned by urllib.request.urlopen + + The fields should all be Unicode strings. Subclasses of this one should re-define the _real_initialize() and _real_extract() methods and define a _VALID_URL regexp. Probably, they should also be added to the list of extractors. + + _real_extract() must return a *list* of information dictionaries as + described above. + + Finally, the _WORKING attribute should be set to False for broken IEs + in order to warn the users and skip the tests. """ _ready = False _downloader = None + _WORKING = True def __init__(self, downloader=None): """Constructor. Receives an optional downloader.""" @@ -70,6 +72,10 @@ class InfoExtractor(object): """Receives a URL and returns True if suitable for this IE.""" return re.match(self._VALID_URL, url) is not None + def working(self): + """Getter method for _WORKING.""" + return self._WORKING + def initialize(self): """Initializes an instance (authentication, etc).""" if not self._ready: @@ -237,16 +243,16 @@ class YoutubeIE(InfoExtractor): password = info[2] else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) - except (IOError, netrc.NetrcParseError), err: + except (IOError, netrc.NetrcParseError) as err: self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return # Set language - request = urllib2.Request(self._LANG_URL) + request = compat_urllib_request.Request(self._LANG_URL) try: self.report_lang() - urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) return @@ -262,14 +268,14 @@ class YoutubeIE(InfoExtractor): 'username': username, 'password': password, } - request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) + request = compat_urllib_request.Request(self._LOGIN_URL, compat_urllib_parse.urlencode(login_form)) try: self.report_login() - login_results = urllib2.urlopen(request).read() + login_results = compat_urllib_request.urlopen(request).read() if re.search(r'(?i)]* name="loginForm"', login_results) is not None: self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') return - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return @@ -278,11 +284,11 @@ class YoutubeIE(InfoExtractor): 'next_url': '/', 'action_confirm': 'Confirm', } - request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form)) + request = compat_urllib_request.Request(self._AGE_URL, compat_urllib_parse.urlencode(age_form)) try: self.report_age_confirmation() - age_results = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + age_results = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return @@ -290,7 +296,7 @@ class YoutubeIE(InfoExtractor): # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: - url = 'http://www.youtube.com/' + urllib.unquote(mobj.group(1)).lstrip('/') + url = 'http://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') # Extract video id from URL mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -301,10 +307,10 @@ class YoutubeIE(InfoExtractor): # Get video webpage self.report_video_webpage_download(video_id) - request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) + request = compat_urllib_request.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id) try: - video_webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + video_webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return @@ -320,13 +326,13 @@ class YoutubeIE(InfoExtractor): for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' % (video_id, el_type)) - request = urllib2.Request(video_info_url) + request = compat_urllib_request.Request(video_info_url) try: - video_info_webpage = urllib2.urlopen(request).read() + video_info_webpage = compat_urllib_request.urlopen(request).read() video_info = parse_qs(video_info_webpage) if 'token' in video_info: break - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if 'token' not in video_info: @@ -348,13 +354,13 @@ class YoutubeIE(InfoExtractor): if 'author' not in video_info: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return - video_uploader = urllib.unquote_plus(video_info['author'][0]) + video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0]) # title if 'title' not in video_info: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = urllib.unquote_plus(video_info['title'][0]) + video_title = compat_urllib_parse.unquote_plus(video_info['title'][0]) video_title = video_title.decode('utf-8') # thumbnail image @@ -362,10 +368,10 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'WARNING: unable to extract video thumbnail') video_thumbnail = '' else: # don't panic if we can't find it - video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0]) + video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) # upload date - upload_date = u'NA' + upload_date = None mobj = re.search(r'id="eow-date.*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) @@ -386,10 +392,10 @@ class YoutubeIE(InfoExtractor): if self._downloader.params.get('writesubtitles', False): try: self.report_video_subtitles_download(video_id) - request = urllib2.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) + request = compat_urllib_request.Request('http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id) try: - srt_list = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + srt_list = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) @@ -403,10 +409,10 @@ class YoutubeIE(InfoExtractor): srt_lang = srt_lang_list.keys()[0] if not srt_lang in srt_lang_list: raise Trouble(u'WARNING: no closed captions found in the specified language') - request = urllib2.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) + request = compat_urllib_request.Request('http://www.youtube.com/api/timedtext?lang=%s&name=%s&v=%s' % (srt_lang, srt_lang_list[srt_lang], video_id)) try: - srt_xml = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + srt_xml = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) if not srt_xml: raise Trouble(u'WARNING: unable to download video subtitles') @@ -418,10 +424,10 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'WARNING: unable to extract video duration') video_duration = '' else: - video_duration = urllib.unquote_plus(video_info['length_seconds'][0]) + video_duration = compat_urllib_parse.unquote_plus(video_info['length_seconds'][0]) # token - video_token = urllib.unquote_plus(video_info['token'][0]) + video_token = compat_urllib_parse.unquote_plus(video_info['token'][0]) # Decide which formats to download req_format = self._downloader.params.get('format', None) @@ -475,6 +481,9 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') + video_format = '{} - {}'.format(format_param.decode('utf-8') if format_param else video_extension.decode('utf-8'), + self._video_dimensions.get(format_param, '???')) + results.append({ 'id': video_id.decode('utf-8'), 'url': video_real_url.decode('utf-8'), @@ -482,7 +491,7 @@ class YoutubeIE(InfoExtractor): 'upload_date': upload_date, 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': (format_param is None and u'NA' or format_param.decode('utf-8')), + 'format': video_format, 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'player_url': player_url, @@ -521,11 +530,11 @@ class MetacafeIE(InfoExtractor): def _real_initialize(self): # Retrieve disclaimer - request = urllib2.Request(self._DISCLAIMER) + request = compat_urllib_request.Request(self._DISCLAIMER) try: self.report_disclaimer() - disclaimer = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + disclaimer = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) return @@ -534,11 +543,11 @@ class MetacafeIE(InfoExtractor): 'filters': '0', 'submit': "Continue - I'm over 18", } - request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form)) + request = compat_urllib_request.Request(self._FILTER_POST, compat_urllib_parse.urlencode(disclaimer_form)) try: self.report_age_confirmation() - disclaimer = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + disclaimer = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return @@ -558,11 +567,11 @@ class MetacafeIE(InfoExtractor): return # Retrieve video webpage to extract further information - request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id) + request = compat_urllib_request.Request('http://www.metacafe.com/watch/%s/' % video_id) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return @@ -570,7 +579,7 @@ class MetacafeIE(InfoExtractor): self.report_extraction(video_id) mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) if mobj is not None: - mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = compat_urllib_parse.unquote(mobj.group(1)) video_extension = mediaURL[-3:] # Extract gdaKey if available @@ -613,11 +622,9 @@ class MetacafeIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader.decode('utf-8'), - 'upload_date': u'NA', + 'upload_date': None, 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -650,12 +657,12 @@ class DailymotionIE(InfoExtractor): video_extension = 'mp4' # Retrieve video webpage to extract further information - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) request.add_header('Cookie', 'family_filter=off') try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return @@ -665,7 +672,7 @@ class DailymotionIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - flashvars = urllib.unquote(mobj.group(1)) + flashvars = compat_urllib_parse.unquote(mobj.group(1)) for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: if key in flashvars: @@ -681,7 +688,7 @@ class DailymotionIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video URL') return - video_url = urllib.unquote(mobj.group(1)).replace('\\/', '/') + video_url = compat_urllib_parse.unquote(mobj.group(1)).replace('\\/', '/') # TODO: support choosing qualities @@ -691,7 +698,7 @@ class DailymotionIE(InfoExtractor): return video_title = unescapeHTML(mobj.group('title').decode('utf-8')) - video_uploader = u'NA' + video_uploader = None mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) if mobj is None: # lookin for official user @@ -703,7 +710,7 @@ class DailymotionIE(InfoExtractor): else: video_uploader = mobj.group(1) - video_upload_date = u'NA' + video_upload_date = None mobj = re.search(r'
([0-9]{2})-([0-9]{2})-([0-9]{4})
', webpage) if mobj is not None: video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1) @@ -715,8 +722,6 @@ class DailymotionIE(InfoExtractor): 'upload_date': video_upload_date, 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -749,11 +754,11 @@ class GoogleIE(InfoExtractor): video_extension = 'mp4' # Retrieve video webpage to extract further information - request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id) + request = compat_urllib_request.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -766,7 +771,7 @@ class GoogleIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = compat_urllib_parse.unquote(mobj.group(1)) mediaURL = mediaURL.replace('\\x3d', '\x3d') mediaURL = mediaURL.replace('\\x26', '\x26') @@ -789,10 +794,10 @@ class GoogleIE(InfoExtractor): # Extract video thumbnail if self._downloader.params.get('forcethumbnail', False): - request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id))) + request = compat_urllib_request.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id))) try: - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'', webpage) @@ -806,12 +811,10 @@ class GoogleIE(InfoExtractor): return [{ 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), - 'uploader': u'NA', - 'upload_date': u'NA', + 'uploader': None, + 'upload_date': None, 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -844,11 +847,11 @@ class PhotobucketIE(InfoExtractor): video_extension = 'flv' # Retrieve video webpage to extract further information - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -858,7 +861,7 @@ class PhotobucketIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = urllib.unquote(mobj.group(1)) + mediaURL = compat_urllib_parse.unquote(mobj.group(1)) video_url = mediaURL @@ -874,11 +877,9 @@ class PhotobucketIE(InfoExtractor): 'id': video_id.decode('utf-8'), 'url': video_url.decode('utf-8'), 'uploader': video_uploader, - 'upload_date': u'NA', + 'upload_date': None, 'title': video_title, 'ext': video_extension.decode('utf-8'), - 'format': u'NA', - 'player_url': None, }] @@ -915,10 +916,10 @@ class YahooIE(InfoExtractor): # Rewrite valid but non-extractable URLs as # extractable English language /watch/ URLs if re.match(self._VPAGE_URL, url) is None: - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) try: - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -938,11 +939,11 @@ class YahooIE(InfoExtractor): return self._real_extract(url, new_video=False) # Retrieve video webpage to extract further information - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -994,13 +995,13 @@ class YahooIE(InfoExtractor): # seem to need most of them, otherwise the server sends a 401. yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded - request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + + request = compat_urllib_request.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -1009,20 +1010,18 @@ class YahooIE(InfoExtractor): if mobj is None: self._downloader.trouble(u'ERROR: Unable to extract media URL') return - video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8') + video_url = compat_urllib_parse.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8') video_url = unescapeHTML(video_url) return [{ 'id': video_id.decode('utf-8'), 'url': video_url, 'uploader': video_uploader, - 'upload_date': u'NA', + 'upload_date': None, 'title': video_title, 'ext': video_extension.decode('utf-8'), 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, - 'thumbnail': video_thumbnail, - 'player_url': None, }] @@ -1054,11 +1053,11 @@ class VimeoIE(InfoExtractor): video_id = mobj.group(1) # Retrieve video webpage to extract further information - request = urllib2.Request(url, None, std_headers) + request = compat_urllib_request.Request(url, None, std_headers) try: self.report_download_webpage(video_id) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return @@ -1090,7 +1089,7 @@ class VimeoIE(InfoExtractor): else: video_description = '' # Extract upload date - video_upload_date = u'NA' + video_upload_date = None mobj = re.search(r'', webpage) if mobj is not None: video_upload_date = mobj.group(1) @@ -1136,7 +1135,6 @@ class VimeoIE(InfoExtractor): 'ext': video_extension, 'thumbnail': video_thumbnail, 'description': video_description, - 'player_url': None, }] @@ -1161,14 +1159,14 @@ class ArteTvIE(InfoExtractor): def fetch_webpage(self, url): self._downloader.increment_downloads() - request = urllib2.Request(url) + request = compat_urllib_request.Request(url) try: self.report_download_webpage(url) - webpage = urllib2.urlopen(request).read() - except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + webpage = compat_urllib_request.urlopen(request).read() + except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return - except ValueError, err: + except ValueError as err: self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) return return webpage @@ -1202,7 +1200,7 @@ class ArteTvIE(InfoExtractor): ] ) http_host = url.split('/')[2] - next_url = 'http://%s%s' % (http_host, urllib.unquote(info.get('url'))) + next_url = 'http://%s%s' % (http_host, compat_urllib_parse.unquote(info.get('url'))) info = self.grep_webpage( next_url, r'(s_artestras_scst_geoFRDE_' + video_lang + '.*?)\'.*?' + @@ -1227,7 +1225,7 @@ class ArteTvIE(InfoExtractor): (1, 'url', u'ERROR: Invalid URL: %s' % url) ] ) - next_url = urllib.unquote(info.get('url')) + next_url = compat_urllib_parse.unquote(info.get('url')) info = self.grep_webpage( next_url, r'