X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube_dl%2FInfoExtractors.py;h=6d8e7be109253306b757d7def8ad142ec7076c01;hb=6ce533a2205eec9cd94e12a6a8d281ed852b5091;hp=53d65c02eea034f8ea7d2ffe270c9586910aa504;hpb=a3d689cfb3b8d6320104bb70725679bc2d336f77;p=youtube-dl diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 53d65c02e..6d8e7be10 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -114,8 +114,8 @@ class InfoExtractor(object): def _request_webpage(self, url_or_request, video_id, note=None, errnote=None): """ Returns the response handle """ if note is None: - note = u'Downloading video webpage' - if note is not False: + self.report_download_webpage(video_id) + elif note is not False: self.to_screen(u'%s: %s' % (video_id, note)) try: return compat_urllib_request.urlopen(url_or_request) @@ -152,6 +152,10 @@ class InfoExtractor(object): """Report information extraction.""" self.to_screen(u'%s: Extracting information' % id_or_name) + def report_download_webpage(self, video_id): + """Report webpage download.""" + self.to_screen(u'%s: Downloading webpage' % video_id) + def report_age_confirmation(self): """Report attempt to confirm age.""" self.to_screen(u'Confirming age') @@ -558,12 +562,7 @@ class YoutubeIE(InfoExtractor): mobj = re.search(r'id="eow-date.*?>(.*?)', video_webpage, re.DOTALL) if mobj is not None: upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split()) - format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y'] - for expression in format_expressions: - try: - upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d') - except: - pass + upload_date = unified_strdate(upload_date) # description video_description = get_element_by_id("eow-description", video_webpage) @@ -684,17 +683,10 @@ class MetacafeIE(InfoExtractor): _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' IE_NAME = u'metacafe' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_disclaimer(self): """Report disclaimer retrieval.""" self.to_screen(u'Retrieving disclaimer') - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_initialize(self): # Retrieve disclaimer request = compat_urllib_request.Request(self._DISCLAIMER) @@ -793,10 +785,6 @@ class DailymotionIE(InfoExtractor): _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^/]+)' IE_NAME = u'dailymotion' - _WORKING = False - - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) def _real_extract(self, url): # Extract id and simplified title from URL @@ -879,13 +867,6 @@ class PhotobucketIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' IE_NAME = u'photobucket' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url): # Extract id from URL mobj = re.match(self._VALID_URL, url) @@ -944,13 +925,6 @@ class YahooIE(InfoExtractor): _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' IE_NAME = u'video.yahoo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -1080,13 +1054,6 @@ class VimeoIE(InfoExtractor): _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)' IE_NAME = u'vimeo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def _real_extract(self, url, new_video=True): # Extract ID from URL mobj = re.match(self._VALID_URL, url) @@ -1120,7 +1087,10 @@ class VimeoIE(InfoExtractor): config = webpage.split(' = {config:')[1].split(',assets:')[0] config = json.loads(config) except: - self._downloader.report_error(u'unable to extract info section') + if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): + self._downloader.report_error(u'The author has restricted the access to this video, try with the "--referer" option') + else: + self._downloader.report_error(u'unable to extract info section') return # Extract title @@ -1197,13 +1167,6 @@ class ArteTvIE(InfoExtractor): IE_NAME = u'arte.tv' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - - def report_download_webpage(self, video_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % video_id) - def fetch_webpage(self, url): request = compat_urllib_request.Request(url) try: @@ -1327,14 +1290,11 @@ class GenericIE(InfoExtractor): _VALID_URL = r'.*' IE_NAME = u'generic' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_webpage(self, video_id): """Report webpage download.""" if not self._downloader.params.get('test', False): self._downloader.report_warning(u'Falling back on generic information extractor.') - self.to_screen(u'%s: Downloading webpage' % video_id) + super(GenericIE, self).report_download_webpage(video_id) def report_following_redirect(self, new_url): """Report information extraction.""" @@ -1469,9 +1429,6 @@ class YoutubeSearchIE(InfoExtractor): _max_youtube_results = 1000 IE_NAME = u'youtube:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download search page with given number.""" query = query.decode(preferredencoding()) @@ -1546,9 +1503,6 @@ class GoogleSearchIE(InfoExtractor): _max_google_results = 1000 IE_NAME = u'video.google:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -1630,9 +1584,6 @@ class YahooSearchIE(InfoExtractor): _max_yahoo_results = 1000 IE_NAME = u'video.yahoo:search' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, query, pagenum): """Report attempt to download playlist page with given number.""" query = query.decode(preferredencoding()) @@ -1726,9 +1677,6 @@ class YoutubePlaylistIE(InfoExtractor): _MAX_RESULTS = 50 IE_NAME = u'youtube:playlist' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - @classmethod def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" @@ -1769,12 +1717,11 @@ class YoutubePlaylistIE(InfoExtractor): if 'feed' not in response: self._downloader.report_error(u'Got a malformed response from YouTube API') return + playlist_title = response['feed']['title']['$t'] if 'entry' not in response['feed']: # Number of videos is a multiple of self._MAX_RESULTS break - playlist_title = response['feed']['title']['$t'] - videos += [ (entry['yt$position']['$t'], entry['content']['src']) for entry in response['feed']['entry'] if 'content' in entry ] @@ -1873,9 +1820,6 @@ class YoutubeUserIE(InfoExtractor): _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' IE_NAME = u'youtube:user' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, username, start_index): """Report attempt to download user page.""" self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % @@ -1942,9 +1886,6 @@ class BlipTVUserIE(InfoExtractor): _PAGE_SIZE = 12 IE_NAME = u'blip.tv:user' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_page(self, username, pagenum): """Report attempt to download user page.""" self.to_screen(u'user %s: Downloading video ids from page %d' % @@ -2020,10 +1961,6 @@ class DepositFilesIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' - def report_download_webpage(self, file_id): - """Report webpage download.""" - self.to_screen(u'%s: Downloading webpage' % file_id) - def _real_extract(self, url): file_id = url.split('/')[-1] # Rebuild url in english locale @@ -2274,9 +2211,6 @@ class MyVideoIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' IE_NAME = u'myvideo' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -2445,7 +2379,7 @@ class ComedyCentralIE(InfoExtractor): shortMediaId = mediaId.split(':')[-1] showId = mediaId.split(':')[-2].replace('.com', '') officialTitle = itemEl.findall('./title')[0].text - officialDate = itemEl.findall('./pubDate')[0].text + officialDate = unified_strdate(itemEl.findall('./pubDate')[0].text) configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + compat_urllib_parse.urlencode({'uri': mediaId})) @@ -2712,9 +2646,6 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'soundcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_resolve(self, video_id): """Report information extraction.""" self.to_screen(u'%s: Resolving id' % video_id) @@ -2758,12 +2689,13 @@ class SoundcloudIE(InfoExtractor): streams = json.loads(stream_json) mediaURL = streams['http_mp3_128_url'] + upload_date = unified_strdate(info['created_at']) return [{ 'id': info['id'], 'url': mediaURL, 'uploader': info['user']['username'], - 'upload_date': info['created_at'], + 'upload_date': upload_date, 'title': info['title'], 'ext': u'mp3', 'description': info['description'], @@ -2779,10 +2711,7 @@ class SoundcloudSetIE(InfoExtractor): """ _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/sets/([\w\d-]+)' - IE_NAME = u'soundcloud' - - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) + IE_NAME = u'soundcloud:set' def report_resolve(self, video_id): """Report information extraction.""" @@ -2904,9 +2833,6 @@ class MixcloudIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)' IE_NAME = u'mixcloud' - def __init__(self, downloader=None): - InfoExtractor.__init__(self, downloader) - def report_download_json(self, file_id): """Report JSON download.""" self.to_screen(u'Downloading json') @@ -3014,10 +2940,6 @@ class StanfordOpenClassroomIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P[^&]+)(&video=(?P