X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=33dbaa3de4fa0de4c9e6350970c2f4fa31a95589;hb=2e2038dc352de8a10525eb103bd9f6a6ae5b43f9;hp=7c9f09f77b6d093e62e91735cf8ee8bb01db7408;hpb=41a6eb949a625983c55cdae0c06410392f3a3110;p=youtube-dl diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 7c9f09f77..33dbaa3de 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -114,8 +114,8 @@ class InfoExtractor(object): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): """ Returns the response handle """ if note is None: - note = u'Downloading video webpage' - if note is not False: + self.report_download_webpage(video_id) + elif note is not False: self.to_screen(u'%s: %s' % (video_id, note)) try: return compat_urllib_request.urlopen(url_or_request) @@ -152,6 +152,14 @@ class InfoExtractor(object): """Report information extraction.""" self.to_screen(u'%s: Extracting information' % id_or_name) + def report_download_webpage(self, video_id): + """Report webpage download.""" + self.to_screen(u'%s: Downloading webpage' % video_id) + + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self.to_screen(u'Confirming age') + #Methods for following #608 #They set the correct value of the '_type' key def video_result(self, video_info): @@ -250,10 +258,6 @@ class YoutubeIE(InfoExtractor): """Report attempt to log in.""" self.to_screen(u'Logging in') - def report_age_confirmation(self): - """Report attempt to confirm age.""" - self.to_screen(u'Confirming age') - def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" self.to_screen(u'%s: Downloading video webpage' % video_id) @@ -558,19 +562,18 @@ class YoutubeIE(InfoExtractor): mobj = re.search(r'id="eow-date.*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) - format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] - for expression in format_expressions: - try: - upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') - except: - pass + upload_date = unified_strdate(upload_date) # description video_description = get_element_by_id("eow-description", video_webpage) if video_description: video_description = clean_html(video_description) else: - video_description = '' + fd_mobj = re.search(r'https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)' IE_NAME = u'vimeo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -1105,13 +1069,7 @@ class VimeoIE(InfoExtractor): # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, std_headers) - try: - self.report_download_webpage(video_id) - webpage_bytes = compat_urllib_request.urlopen(request).read() - webpage = webpage_bytes.decode('utf-8') - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err)) - return + webpage = self._download_webpage(request, video_id) # Now we begin extracting as much information as we can from what we # retrieved. First we extract the information common to all extractors, @@ -1123,7 +1081,10 @@ class VimeoIE(InfoExtractor): config = webpage.split(' = {config:')[1].split(',assets:')[0] config = json.loads(config) except: - self._downloader.report_error(u'unable to extract info section') + if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): + self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option') + else: + self._downloader.report_error(u'unable to extract info section') return # Extract title @@ -1200,13 +1161,6 @@ class ArteTvIE(InfoExtractor): IE_NAME = u'arte.tv' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def fetch_webpage(self, url): request = compat_urllib_request.Request(url) try: @@ -1231,7 +1185,7 @@ class ArteTvIE(InfoExtractor): for (i, key, err) in matchTuples: if mobj.group(i) is None: - self._downloader.trouble(err) + self._downloader.report_error(err) return else: info[key] = mobj.group(i) @@ -1245,7 +1199,7 @@ class ArteTvIE(InfoExtractor): r'src="(.*?/videothek_js.*?\.js)', 0, [ - (1, 'url', u'ERROR: Invalid URL: %s' % url) + (1, 'url', u'Invalid URL: %s' % url) ] ) http_host = url.split('/')[2] @@ -1257,9 +1211,9 @@ class ArteTvIE(InfoExtractor): '(rtmp://.*?)\'', re.DOTALL, [ - (1, 'path', u'ERROR: could not extract video path: %s' % url), - (2, 'player', u'ERROR: could not extract video player: %s' % url), - (3, 'url', u'ERROR: could not extract video url: %s' % url) + (1, 'path', u'could not extract video path: %s' % url), + (2, 'player', u'could not extract video player: %s' % url), + (3, 'url', u'could not extract video url: %s' % url) ] ) video_url = u'%s/%s' % (info.get('url'), info.get('path')) @@ -1271,7 +1225,7 @@ class ArteTvIE(InfoExtractor): r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)', 0, [ - (1, 'url', u'ERROR: Invalid URL: %s' % url) + (1, 'url', u'Invalid URL: %s' % url) ] ) next_url = compat_urllib_parse.unquote(info.get('url')) @@ -1280,7 +1234,7 @@ class ArteTvIE(InfoExtractor): r'