X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube-dl;h=52bda1b2533e9b8b5c63274074f285a8cccced95;hb=d9bc015b3ca1071c008783e8ae8e36f4ca8e4edf;hp=50b9197f2f9ba8d1db53226ccc5830e937f62fcc;hpb=d89977437718619a057028566b1e901ea963193e;p=youtube-dl diff --git a/youtube-dl b/youtube-dl index 50b9197f2..52bda1b25 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,6 +27,22 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +def preferredencoding(): + """Get preferred encoding. + + Returns the best encoding scheme for the system, based on + locale.getpreferredencoding() and some further tweaks. + """ + def yield_preferredencoding(): + try: + pref = locale.getpreferredencoding() + u'TEST'.encode(pref) + except: + pref = 'UTF-8' + while True: + yield pref + return yield_preferredencoding().next() + class DownloadError(Exception): """Download Error exception. @@ -205,11 +221,13 @@ class FileDownloader(object): @staticmethod def verify_url(url): - """Verify a URL is valid and data could be downloaded.""" + """Verify a URL is valid and data could be downloaded. Return real data URL.""" request = urllib2.Request(url, None, std_headers) data = urllib2.urlopen(request) data.read(1) + url = data.geturl() data.close() + return url def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -224,12 +242,12 @@ class FileDownloader(object): def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" if not self.params.get('quiet', False): - print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()), + print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()), sys.stdout.flush() def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message.encode(locale.getpreferredencoding()) + print >>sys.stderr, message.encode(preferredencoding()) def fixed_template(self): """Checks if the output template is fixed.""" @@ -291,15 +309,15 @@ class FileDownloader(object): # Do nothing else if in simulate mode if self.params.get('simulate', False): try: - self.verify_url(info_dict['url']) + info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: raise UnavailableFormatError # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(locale.getpreferredencoding()) + print info_dict['title'].encode(preferredencoding()) if self.params.get('forceurl', False): - print info_dict['url'].encode(locale.getpreferredencoding()) + print info_dict['url'].encode(preferredencoding()) return @@ -309,7 +327,7 @@ class FileDownloader(object): filename = self.params['outtmpl'] % template_dict except (ValueError, KeyError), err: self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) - if self.params['nooverwrites'] and os.path.exists(filename): + if self.params.get('nooverwrites', False) and os.path.exists(filename): self.to_stderr(u'WARNING: file exists: %s; skipping' % filename) return @@ -320,7 +338,7 @@ class FileDownloader(object): return try: - success = self._do_download(filename, info_dict['url']) + success = self._do_download(filename, info_dict['url'].encode('utf-8')) except (OSError, IOError), err: raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: @@ -384,7 +402,7 @@ class FileDownloader(object): resume_len = os.path.getsize(filename) else: resume_len = 0 - if self.params['continuedl'] and resume_len != 0: + if self.params.get('continuedl', False) and resume_len != 0: self.report_resuming_byte(resume_len) request.add_header('Range','bytes=%d-' % resume_len) @@ -512,12 +530,13 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' - _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag + _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag _video_extensions = { '13': '3gp', '17': 'mp4', '18': 'mp4', '22': 'mp4', + '37': 'mp4', } @staticmethod @@ -567,10 +586,6 @@ class YoutubeIE(InfoExtractor): """Report attempt to extract video information.""" self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) - def report_video_url(self, video_id, video_real_url): - """Report extracted video URL.""" - self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) - def report_unavailable_format(self, video_id, format): """Report extracted video URL.""" self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) @@ -696,7 +711,6 @@ class YoutubeIE(InfoExtractor): video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token) if format_param is not None: video_real_url = '%s&fmt=%s' % (video_real_url, format_param) - self.report_video_url(video_id, video_real_url) # uploader mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage) @@ -706,11 +720,11 @@ class YoutubeIE(InfoExtractor): video_uploader = urllib.unquote(mobj.group(1)) # title - mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage) + mobj = re.search(r'(?m)&title=([^&]*)(?:&|$)', video_info_webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract video title') return - video_title = urllib.unquote(mobj.group(1)) + video_title = urllib.unquote_plus(mobj.group(1)) video_title = video_title.decode('utf-8') video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title) video_title = video_title.replace(os.sep, u'%') @@ -879,7 +893,7 @@ class YoutubeSearchIE(InfoExtractor): _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'href="/watch\?v=.+?"' - _MORE_PAGES_INDICATOR = r'>Next' + _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' _youtube_ie = None _max_youtube_results = 1000 @@ -956,7 +970,7 @@ class YoutubeSearchIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return - if self._MORE_PAGES_INDICATOR not in page: + if re.search(self._MORE_PAGES_INDICATOR, page) is None: for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1076,6 +1090,22 @@ if __name__ == '__main__': import getpass import optparse + # Function to update the program file with the latest version from bitbucket.org + def update_self(downloader, filename): + # Note: downloader only used for options + if not os.access (filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_stdout('Updating to latest stable version...') + latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' + latest_version = urllib.urlopen(latest_url).read().strip() + prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version + newcontent = urllib.urlopen(prog_url).read() + stream = open(filename, 'w') + stream.write(newcontent) + stream.close() + downloader.to_stdout('Updated to version %s' % latest_version) + # General configuration urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) @@ -1092,6 +1122,8 @@ if __name__ == '__main__': action='help', help='print this help text and exit') parser.add_option('-v', '--version', action='version', help='print program version and exit') + parser.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest stable version') parser.add_option('-i', '--ignore-errors', action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) parser.add_option('-r', '--rate-limit', @@ -1144,7 +1176,7 @@ if __name__ == '__main__': parser.add_option_group(filesystem) (opts, args) = parser.parse_args() - + # Batch file verification batchurls = [] if opts.batchfile is not None: @@ -1157,8 +1189,6 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options - if len(all_urls) < 1: - parser.error(u'you must provide at least one URL') if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -1191,7 +1221,7 @@ if __name__ == '__main__': 'forcetitle': opts.gettitle, 'simulate': (opts.simulate or opts.geturl or opts.gettitle), 'format': opts.format, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(locale.getpreferredencoding())) + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), @@ -1204,6 +1234,17 @@ if __name__ == '__main__': fd.add_info_extractor(youtube_pl_ie) fd.add_info_extractor(metacafe_ie) fd.add_info_extractor(youtube_ie) + + # Update version + if opts.update_self: + update_self(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() retcode = fd.download(all_urls) sys.exit(retcode)