X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;ds=inline;f=youtube_dl%2FInfoExtractors.py;h=d71e1a90ee9b32cb1c4bc5a4cca5741a819edc64;hb=320e26a0af46dcefd53ba06656c986def933bc6b;hp=83cb32196880f58c742a271279034c044e4153e9;hpb=1f46c152628bdd6b97212ced758b9f83063b5820;p=youtube-dl
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 83cb32196..d71e1a90e 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -115,7 +115,8 @@ class InfoExtractor(object):
""" Returns the response handle """
if note is None:
note = u'Downloading video webpage'
- self._downloader.to_screen(u'[%s] %s: %s' % (self.IE_NAME, video_id, note))
+ if note is not False:
+ self.to_screen(u'%s: %s' % (video_id, note))
try:
return compat_urllib_request.urlopen(url_or_request)
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
@@ -133,8 +134,55 @@ class InfoExtractor(object):
else:
encoding = 'utf-8'
webpage_bytes = urlh.read()
+ if self._downloader.params.get('dump_intermediate_pages', False):
+ try:
+ url = url_or_request.get_full_url()
+ except AttributeError:
+ url = url_or_request
+ self.to_screen(u'Dumping request to ' + url)
+ dump = base64.b64encode(webpage_bytes).decode('ascii')
+ self._downloader.to_screen(dump)
return webpage_bytes.decode(encoding, 'replace')
+ def to_screen(self, msg):
+ """Print msg to screen, prefixing it with '[ie_name]'"""
+ self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
+
+ def report_extraction(self, id_or_name):
+ """Report information extraction."""
+ self.to_screen(u'%s: Extracting information' % id_or_name)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self.to_screen(u'%s: Downloading webpage' % video_id)
+
+ def report_age_confirmation(self):
+ """Report attempt to confirm age."""
+ self.to_screen(u'Confirming age')
+
+ #Methods for following #608
+ #They set the correct value of the '_type' key
+ def video_result(self, video_info):
+ """Returns a video"""
+ video_info['_type'] = 'video'
+ return video_info
+ def url_result(self, url, ie=None):
+ """Returns a url that points to a page that should be processed"""
+ #TODO: ie should be the class used for getting the info
+ video_info = {'_type': 'url',
+ 'url': url,
+ 'ie_key': ie}
+ return video_info
+ def playlist_result(self, entries, playlist_id=None, playlist_title=None):
+ """Returns a playlist"""
+ video_info = {'_type': 'playlist',
+ 'entries': entries}
+ if playlist_id:
+ video_info['id'] = playlist_id
+ if playlist_title:
+ video_info['title'] = playlist_title
+ return video_info
+
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
@@ -204,48 +252,44 @@ class YoutubeIE(InfoExtractor):
def report_lang(self):
"""Report attempt to set language."""
- self._downloader.to_screen(u'[youtube] Setting language')
+ self.to_screen(u'Setting language')
def report_login(self):
"""Report attempt to log in."""
- self._downloader.to_screen(u'[youtube] Logging in')
-
- def report_age_confirmation(self):
- """Report attempt to confirm age."""
- self._downloader.to_screen(u'[youtube] Confirming age')
+ self.to_screen(u'Logging in')
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
- self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
+ self.to_screen(u'%s: Downloading video webpage' % video_id)
def report_video_info_webpage_download(self, video_id):
"""Report attempt to download video info webpage."""
- self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
+ self.to_screen(u'%s: Downloading video info webpage' % video_id)
def report_video_subtitles_download(self, video_id):
"""Report attempt to download video info webpage."""
- self._downloader.to_screen(u'[youtube] %s: Checking available subtitles' % video_id)
+ self.to_screen(u'%s: Checking available subtitles' % video_id)
def report_video_subtitles_request(self, video_id, sub_lang, format):
"""Report attempt to download video info webpage."""
- self._downloader.to_screen(u'[youtube] %s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
+ self.to_screen(u'%s: Downloading video subtitles for %s.%s' % (video_id, sub_lang, format))
def report_video_subtitles_available(self, video_id, sub_lang_list):
"""Report available subtitles."""
sub_lang = ",".join(list(sub_lang_list.keys()))
- self._downloader.to_screen(u'[youtube] %s: Available subtitles for video: %s' % (video_id, sub_lang))
+ self.to_screen(u'%s: Available subtitles for video: %s' % (video_id, sub_lang))
def report_information_extraction(self, video_id):
"""Report attempt to extract video information."""
- self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
+ self.to_screen(u'%s: Extracting video information' % video_id)
def report_unavailable_format(self, video_id, format):
"""Report extracted video URL."""
- self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
+ self.to_screen(u'%s: Format %s not available' % (video_id, format))
def report_rtmp_download(self):
"""Indicate the download will use the RTMP protocol."""
- self._downloader.to_screen(u'[youtube] RTMP download detected')
+ self.to_screen(u'RTMP download detected')
def _get_available_subtitles(self, video_id):
self.report_video_subtitles_download(video_id)
@@ -253,11 +297,11 @@ class YoutubeIE(InfoExtractor):
try:
sub_list = compat_urllib_request.urlopen(request).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+ return (u'unable to download video subtitles: %s' % compat_str(err), None)
sub_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
sub_lang_list = dict((l[1], l[0]) for l in sub_lang_list)
if not sub_lang_list:
- return (u'WARNING: video doesn\'t have subtitles', None)
+ return (u'video doesn\'t have subtitles', None)
return sub_lang_list
def _list_available_subtitles(self, video_id):
@@ -265,6 +309,10 @@ class YoutubeIE(InfoExtractor):
self.report_video_subtitles_available(video_id, sub_lang_list)
def _request_subtitle(self, sub_lang, sub_name, video_id, format):
+ """
+ Return tuple:
+ (error_message, sub_lang, sub)
+ """
self.report_video_subtitles_request(video_id, sub_lang, format)
params = compat_urllib_parse.urlencode({
'lang': sub_lang,
@@ -276,14 +324,20 @@ class YoutubeIE(InfoExtractor):
try:
sub = compat_urllib_request.urlopen(url).read().decode('utf-8')
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- return (u'WARNING: unable to download video subtitles: %s' % compat_str(err), None)
+ return (u'unable to download video subtitles: %s' % compat_str(err), None, None)
if not sub:
- return (u'WARNING: Did not fetch video subtitles', None)
+ return (u'Did not fetch video subtitles', None, None)
return (None, sub_lang, sub)
def _extract_subtitle(self, video_id):
+ """
+ Return a list with a tuple:
+ [(error_message, sub_lang, sub)]
+ """
sub_lang_list = self._get_available_subtitles(video_id)
sub_format = self._downloader.params.get('subtitlesformat')
+ if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
+ return [(sub_lang_list[0], None, None)]
if self._downloader.params.get('subtitleslang', False):
sub_lang = self._downloader.params.get('subtitleslang')
elif 'en' in sub_lang_list:
@@ -291,7 +345,7 @@ class YoutubeIE(InfoExtractor):
else:
sub_lang = list(sub_lang_list.keys())[0]
if not sub_lang in sub_lang_list:
- return (u'WARNING: no closed captions found in the specified language "%s"' % sub_lang, None)
+ return [(u'no closed captions found in the specified language "%s"' % sub_lang, None, None)]
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
return [subtitle]
@@ -299,6 +353,8 @@ class YoutubeIE(InfoExtractor):
def _extract_all_subtitles(self, video_id):
sub_lang_list = self._get_available_subtitles(video_id)
sub_format = self._downloader.params.get('subtitlesformat')
+ if isinstance(sub_lang_list,tuple): #There was some error, it didn't get the available subtitles
+ return [(sub_lang_list[0], None, None)]
subtitles = []
for sub_lang in sub_lang_list:
subtitle = self._request_subtitle(sub_lang, sub_lang_list[sub_lang].encode('utf-8'), video_id, sub_format)
@@ -451,18 +507,14 @@ class YoutubeIE(InfoExtractor):
# Get video info
self.report_video_info_webpage_download(video_id)
for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
- video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
+ video_info_url = ('https://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
% (video_id, el_type))
- request = compat_urllib_request.Request(video_info_url)
- try:
- video_info_webpage_bytes = compat_urllib_request.urlopen(request).read()
- video_info_webpage = video_info_webpage_bytes.decode('utf-8', 'ignore')
- video_info = compat_parse_qs(video_info_webpage)
- if 'token' in video_info:
- break
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_error(u'unable to download video info webpage: %s' % compat_str(err))
- return
+ video_info_webpage = self._download_webpage(video_info_url, video_id,
+ note=False,
+ errnote='unable to download video info webpage')
+ video_info = compat_parse_qs(video_info_webpage)
+ if 'token' in video_info:
+ break
if 'token' not in video_info:
if 'reason' in video_info:
self._downloader.report_error(u'YouTube said: %s' % video_info['reason'][0])
@@ -522,7 +574,11 @@ class YoutubeIE(InfoExtractor):
if video_description:
video_description = clean_html(video_description)
else:
- video_description = ''
+ fd_mobj = re.search(r'http.*?)",(.*?)"key":"(?P.*?)"', vardict['mediaData'][0])
if mobj is None:
self._downloader.report_error(u'unable to extract media URL')
return
- mediaURL = mobj.group(1).replace('\\/', '/')
+ mediaURL = mobj.group('mediaURL').replace('\\/', '/')
video_extension = mediaURL[-3:]
- video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
+ video_url = '%s?__gda__=%s' % (mediaURL, mobj.group('key'))
mobj = re.search(r'(?im)(.*) - Video', webpage)
if mobj is None:
@@ -764,10 +798,6 @@ class DailymotionIE(InfoExtractor):
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
- def report_extraction(self, video_id):
- """Report information extraction."""
- self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
-
def _real_extract(self, url):
# Extract id and simplified title from URL
mobj = re.match(self._VALID_URL, url)
@@ -795,7 +825,7 @@ class DailymotionIE(InfoExtractor):
for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
if key in flashvars:
max_quality = key
- self._downloader.to_screen(u'[dailymotion] Using %s' % key)
+ self.to_screen(u'Using %s' % key)
break
else:
self._downloader.report_error(u'unable to extract video URL')
@@ -852,14 +882,6 @@ class PhotobucketIE(InfoExtractor):
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
-
- def report_extraction(self, video_id):
- """Report information extraction."""
- self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
-
def _real_extract(self, url):
# Extract id from URL
mobj = re.match(self._VALID_URL, url)
@@ -921,14 +943,6 @@ class YahooIE(InfoExtractor):
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
-
- def report_extraction(self, video_id):
- """Report information extraction."""
- self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
-
def _real_extract(self, url, new_video=True):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
@@ -1061,14 +1075,6 @@ class VimeoIE(InfoExtractor):
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
-
- def report_extraction(self, video_id):
- """Report information extraction."""
- self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
-
def _real_extract(self, url, new_video=True):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
@@ -1118,7 +1124,7 @@ class VimeoIE(InfoExtractor):
# Extract video description
video_description = get_element_by_attribute("itemprop", "description", webpage)
if video_description: video_description = clean_html(video_description)
- else: video_description = ''
+ else: video_description = u''
# Extract upload date
video_upload_date = None
@@ -1149,7 +1155,7 @@ class VimeoIE(InfoExtractor):
video_quality = files[quality][0][2]
video_codec = files[quality][0][0]
video_extension = files[quality][0][1]
- self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
+ self.to_screen(u'%s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality))
break
else:
self._downloader.report_error(u'no known codec found')
@@ -1182,14 +1188,6 @@ class ArteTvIE(InfoExtractor):
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self._downloader.to_screen(u'[arte.tv] %s: Downloading webpage' % video_id)
-
- def report_extraction(self, video_id):
- """Report information extraction."""
- self._downloader.to_screen(u'[arte.tv] %s: Extracting information' % video_id)
-
def fetch_webpage(self, url):
request = compat_urllib_request.Request(url)
try:
@@ -1214,7 +1212,7 @@ class ArteTvIE(InfoExtractor):
for (i, key, err) in matchTuples:
if mobj.group(i) is None:
- self._downloader.trouble(err)
+ self._downloader.report_error(err)
return
else:
info[key] = mobj.group(i)
@@ -1228,7 +1226,7 @@ class ArteTvIE(InfoExtractor):
r'src="(.*?/videothek_js.*?\.js)',
0,
[
- (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ (1, 'url', u'Invalid URL: %s' % url)
]
)
http_host = url.split('/')[2]
@@ -1240,9 +1238,9 @@ class ArteTvIE(InfoExtractor):
'(rtmp://.*?)\'',
re.DOTALL,
[
- (1, 'path', u'ERROR: could not extract video path: %s' % url),
- (2, 'player', u'ERROR: could not extract video player: %s' % url),
- (3, 'url', u'ERROR: could not extract video url: %s' % url)
+ (1, 'path', u'could not extract video path: %s' % url),
+ (2, 'player', u'could not extract video player: %s' % url),
+ (3, 'url', u'could not extract video url: %s' % url)
]
)
video_url = u'%s/%s' % (info.get('url'), info.get('path'))
@@ -1254,7 +1252,7 @@ class ArteTvIE(InfoExtractor):
r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
0,
[
- (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ (1, 'url', u'Invalid URL: %s' % url)
]
)
next_url = compat_urllib_parse.unquote(info.get('url'))
@@ -1263,7 +1261,7 @@ class ArteTvIE(InfoExtractor):
r'