X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=8b2442bacc21e86db7ffdc920116b51e79fd3820;hb=5e34d2ebbf9906bded4201d7bd8bb82e9353de9f;hp=d444e21d9c9acf32706b11c5be773ce6f6a60180;hpb=f17ce13a9260919c4d8cb652467023373c783bd2;p=youtube-dl
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index d444e21d9..8b2442bac 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -114,8 +114,8 @@ class InfoExtractor(object):
def _request_webpage(self, url_or_request, video_id, note=None, errnote=None):
""" Returns the response handle """
if note is None:
- note = u'Downloading video webpage'
- if note is not False:
+ self.report_download_webpage(video_id)
+ elif note is not False:
self.to_screen(u'%s: %s' % (video_id, note))
try:
return compat_urllib_request.urlopen(url_or_request)
@@ -148,6 +148,18 @@ class InfoExtractor(object):
"""Print msg to screen, prefixing it with '[ie_name]'"""
self._downloader.to_screen(u'[%s] %s' % (self.IE_NAME, msg))
+ def report_extraction(self, id_or_name):
+ """Report information extraction."""
+ self.to_screen(u'%s: Extracting information' % id_or_name)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self.to_screen(u'%s: Downloading webpage' % video_id)
+
+ def report_age_confirmation(self):
+ """Report attempt to confirm age."""
+ self.to_screen(u'Confirming age')
+
#Methods for following #608
#They set the correct value of the '_type' key
def video_result(self, video_info):
@@ -246,10 +258,6 @@ class YoutubeIE(InfoExtractor):
"""Report attempt to log in."""
self.to_screen(u'Logging in')
- def report_age_confirmation(self):
- """Report attempt to confirm age."""
- self.to_screen(u'Confirming age')
-
def report_video_webpage_download(self, video_id):
"""Report attempt to download video webpage."""
self.to_screen(u'%s: Downloading video webpage' % video_id)
@@ -554,19 +562,18 @@ class YoutubeIE(InfoExtractor):
mobj = re.search(r'id="eow-date.*?>(.*?)', video_webpage, re.DOTALL)
if mobj is not None:
upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
- format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
- for expression in format_expressions:
- try:
- upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
- except:
- pass
+ upload_date = unified_strdate(upload_date)
# description
video_description = get_element_by_id("eow-description", video_webpage)
if video_description:
video_description = clean_html(video_description)
else:
- video_description = ''
+ fd_mobj = re.search(r'https?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)'
IE_NAME = u'vimeo'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % video_id)
-
- def report_extraction(self, video_id):
- """Report information extraction."""
- self.to_screen(u'%s: Extracting information' % video_id)
-
def _real_extract(self, url, new_video=True):
# Extract ID from URL
mobj = re.match(self._VALID_URL, url)
@@ -1121,13 +1068,7 @@ class VimeoIE(InfoExtractor):
# Retrieve video webpage to extract further information
request = compat_urllib_request.Request(url, None, std_headers)
- try:
- self.report_download_webpage(video_id)
- webpage_bytes = compat_urllib_request.urlopen(request).read()
- webpage = webpage_bytes.decode('utf-8')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_error(u'Unable to retrieve video webpage: %s' % compat_str(err))
- return
+ webpage = self._download_webpage(request, video_id)
# Now we begin extracting as much information as we can from what we
# retrieved. First we extract the information common to all extractors,
@@ -1139,7 +1080,10 @@ class VimeoIE(InfoExtractor):
config = webpage.split(' = {config:')[1].split(',assets:')[0]
config = json.loads(config)
except:
- self._downloader.report_error(u'unable to extract info section')
+ if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
+ self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option')
+ else:
+ self._downloader.report_error(u'unable to extract info section')
return
# Extract title
@@ -1216,17 +1160,6 @@ class ArteTvIE(InfoExtractor):
IE_NAME = u'arte.tv'
- def __init__(self, downloader=None):
- InfoExtractor.__init__(self, downloader)
-
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen(u'%s: Downloading webpage' % video_id)
-
- def report_extraction(self, video_id):
- """Report information extraction."""
- self.to_screen(u'%s: Extracting information' % video_id)
-
def fetch_webpage(self, url):
request = compat_urllib_request.Request(url)
try:
@@ -1251,7 +1184,7 @@ class ArteTvIE(InfoExtractor):
for (i, key, err) in matchTuples:
if mobj.group(i) is None:
- self._downloader.trouble(err)
+ self._downloader.report_error(err)
return
else:
info[key] = mobj.group(i)
@@ -1265,7 +1198,7 @@ class ArteTvIE(InfoExtractor):
r'src="(.*?/videothek_js.*?\.js)',
0,
[
- (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ (1, 'url', u'Invalid URL: %s' % url)
]
)
http_host = url.split('/')[2]
@@ -1277,9 +1210,9 @@ class ArteTvIE(InfoExtractor):
'(rtmp://.*?)\'',
re.DOTALL,
[
- (1, 'path', u'ERROR: could not extract video path: %s' % url),
- (2, 'player', u'ERROR: could not extract video player: %s' % url),
- (3, 'url', u'ERROR: could not extract video url: %s' % url)
+ (1, 'path', u'could not extract video path: %s' % url),
+ (2, 'player', u'could not extract video player: %s' % url),
+ (3, 'url', u'could not extract video url: %s' % url)
]
)
video_url = u'%s/%s' % (info.get('url'), info.get('path'))
@@ -1291,7 +1224,7 @@ class ArteTvIE(InfoExtractor):
r'param name="movie".*?videorefFileUrl=(http[^\'"&]*)',
0,
[
- (1, 'url', u'ERROR: Invalid URL: %s' % url)
+ (1, 'url', u'Invalid URL: %s' % url)
]
)
next_url = compat_urllib_parse.unquote(info.get('url'))
@@ -1300,7 +1233,7 @@ class ArteTvIE(InfoExtractor):
r'