X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube-dl;h=c3d9271c2cacef4dc9b1c29aef3185fdf87a8524;hb=ab1f697827c8cb1a4d09c03e843ebae123ce35f5;hp=99d07eeec0e576e245ed2a9f99f877b33757f733;hpb=0c8beb43f22c20cf0c217cf37c410fb76270bba3;p=youtube-dl diff --git a/youtube-dl b/youtube-dl index 99d07eeec..c3d9271c2 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,6 +27,22 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +def preferredencoding(): + """Get preferred encoding. + + Returns the best encoding scheme for the system, based on + locale.getpreferredencoding() and some further tweaks. + """ + def yield_preferredencoding(): + try: + pref = locale.getpreferredencoding() + u'TEST'.encode(pref) + except: + pref = 'UTF-8' + while True: + yield pref + return yield_preferredencoding().next() + class DownloadError(Exception): """Download Error exception. @@ -205,11 +221,13 @@ class FileDownloader(object): @staticmethod def verify_url(url): - """Verify a URL is valid and data could be downloaded.""" + """Verify a URL is valid and data could be downloaded. Return real data URL.""" request = urllib2.Request(url, None, std_headers) data = urllib2.urlopen(request) data.read(1) + url = data.geturl() data.close() + return url def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -224,12 +242,12 @@ class FileDownloader(object): def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" if not self.params.get('quiet', False): - print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()), + print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), sys.stdout.flush() def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message.encode(locale.getpreferredencoding()) + print >>sys.stderr, message.encode(preferredencoding()) def fixed_template(self): """Checks if the output template is fixed.""" @@ -291,15 +309,15 @@ class FileDownloader(object): # Do nothing else if in simulate mode if self.params.get('simulate', False): try: - self.verify_url(info_dict['url']) + info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: raise UnavailableFormatError # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(locale.getpreferredencoding()) + print info_dict['title'].encode(preferredencoding()) if self.params.get('forceurl', False): - print info_dict['url'].encode(locale.getpreferredencoding()) + print info_dict['url'].encode(preferredencoding()) return @@ -309,7 +327,7 @@ class FileDownloader(object): filename = self.params['outtmpl'] % template_dict except (ValueError, KeyError), err: self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) - if self.params['nooverwrites'] and os.path.exists(filename): + if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) return @@ -320,7 +338,7 @@ class FileDownloader(object): return try: - success = self._do_download(filename, info_dict['url']) + success = self._do_download(filename, info_dict['url'].encode('utf-8')) except (OSError, IOError), err: raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -384,7 +402,7 @@ class FileDownloader(object): resume_len = os.path.getsize(filename) else: resume_len = 0 - if self.params['continuedl'] and resume_len != 0: + if self.params.get('continuedl', False) and resume_len != 0: self.report_resuming_byte(resume_len) request.add_header('Range','bytes=%d-' % resume_len) @@ -512,7 +530,7 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['22', '35', '18', '5', '17', '13'] # listed in order of priority for -b flag + _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag _video_extensions = { '13': '3gp', '17': 'mp4', @@ -567,10 +585,6 @@ class YoutubeIE(InfoExtractor): """Report attempt to extract video information.""" self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) - def report_video_url(self, video_id, video_real_url): - """Report extracted video URL.""" - self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) - def report_unavailable_format(self, video_id, format): """Report extracted video URL.""" self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) @@ -696,7 +710,6 @@ class YoutubeIE(InfoExtractor): video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - self.report_video_url(video_id, video_real_url) # uploader mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage) @@ -706,11 +719,11 @@ class YoutubeIE(InfoExtractor): video_uploader = urllib.unquote(mobj.group(1)) # title - mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage) + mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = urllib.unquote(mobj.group(1)) + video_title = urllib.unquote_plus(mobj.group(1)) video_title = video_title.decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) video_title = video_title.replace(os.sep, u'%') @@ -879,7 +892,7 @@ class YoutubeSearchIE(InfoExtractor): _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' - _MORE_PAGES_INDICATOR = r'>Next' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None _max_youtube_results = 1000 @@ -956,7 +969,7 @@ class YoutubeSearchIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return - if self._MORE_PAGES_INDICATOR not in page: + if re.search(self._MORE_PAGES_INDICATOR, page) is None: for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -966,7 +979,7 @@ class YoutubeSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*' _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s' @@ -1084,7 +1097,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2009.08.08', + version='INTERNAL', conflict_handler='resolve', ) @@ -1191,7 +1204,7 @@ if __name__ == '__main__': 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding())) + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'),