X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube-dl;h=f6e472445d8e0d9512805487a04afc88bcc68662;hb=781daeabdb6ffa2b63bf8f7dec715ac8835c558b;hp=c526071e5c6f110a5222cec60a236d5865e0c7d0;hpb=c8619e01637ae33ff6ed2a770a6222d792cf0771;p=youtube-dl diff --git a/youtube-dl b/youtube-dl index c526071e5..f6e472445 100755 --- a/youtube-dl +++ b/youtube-dl @@ -19,7 +19,7 @@ import urllib import urllib2 std_headers = { - 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.5) Gecko/2008120122 Firefox/3.0.5', + 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Language': 'en-us,en;q=0.5', @@ -52,6 +52,29 @@ class PostProcessingError(Exception): """ pass +class UnavailableFormatError(Exception): + """Unavailable Format exception. + + This exception will be thrown when a video is requested + in a format that is not available for that video. + """ + pass + +class ContentTooShortError(Exception): + """Content Too Short exception. + + This exception may be raised by FileDownloader objects when a file they + download is too small for what the server announced first, indicating + the connection was probably interrupted. + """ + # Both in bytes + downloaded = None + expected = None + + def __init__(self, downloaded, expected): + self.downloaded = downloaded + self.expected = expected + class FileDownloader(object): """File Downloader class. @@ -65,9 +88,10 @@ class FileDownloader(object): For this, file downloader objects have a method that allows InfoExtractors to be registered in a given order. When it is passed a URL, the file downloader handles it to the first InfoExtractor it - finds that reports being able to handle it. The InfoExtractor returns - all the information to the FileDownloader and the latter downloads the - file or does whatever it's instructed to do. + finds that reports being able to handle it. The InfoExtractor extracts + all the information about the video or videos the URL refers to, and + asks the FileDownloader to process the video information, possibly + downloading the video. File downloaders accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of @@ -95,11 +119,13 @@ class FileDownloader(object): params = None _ies = [] _pps = [] + _download_retcode = None def __init__(self, params): """Create a FileDownloader object with the given options.""" self._ies = [] self._pps = [] + self._download_retcode = 0 self.params = params @staticmethod @@ -187,7 +213,7 @@ class FileDownloader(object): def to_stdout(self, message, skip_eol=False): """Print message to stdout if not in quiet mode.""" if not self.params.get('quiet', False): - print u'%s%s' % (message, [u'\n', u''][skip_eol]), + print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(locale.getpreferredencoding()), sys.stdout.flush() def to_stderr(self, message): @@ -203,15 +229,13 @@ class FileDownloader(object): Depending on if the downloader has been configured to ignore download errors or not, this method may throw an exception or - not when errors are found, after printing the message. If it - doesn't raise, it returns an error code suitable to be returned - later as a program exit code to indicate error. + not when errors are found, after printing the message. """ if message is not None: self.to_stderr(message) if not self.params.get('ignoreerrors', False): raise DownloadError(message) - return 1 + self._download_retcode = 1 def slow_down(self, start_time, byte_counter): """Sleep if the download speed is over the rate limit.""" @@ -243,47 +267,59 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'] + print info_dict['title'].encode(locale.getpreferredencoding()) if self.params.get('forceurl', False): - print info_dict['url'] + print info_dict['url'].encode(locale.getpreferredencoding()) # Do nothing else if in simulate mode if self.params.get('simulate', False): - return 0 + return try: - filename = self.params['outtmpl'] % info_dict + template_dict = dict(info_dict) + template_dict['epoch'] = unicode(long(time.time())) + filename = self.params['outtmpl'] % template_dict self.report_destination(filename) except (ValueError, KeyError), err: - return self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) + self.trouble('ERROR: invalid output template or system charset: %s' % str(err)) if self.params['nooverwrites'] and os.path.exists(filename): self.to_stderr('WARNING: file exists: %s; skipping' % filename) - return 0 + return + try: self.pmkdir(filename) except (OSError, IOError), err: - return self.trouble('ERROR: unable to create directories: %s' % str(err)) + self.trouble('ERROR: unable to create directories: %s' % str(err)) + return + try: outstream = open(filename, 'wb') except (OSError, IOError), err: - return self.trouble('ERROR: unable to open for writing: %s' % str(err)) + self.trouble('ERROR: unable to open for writing: %s' % str(err)) + return + try: self._do_download(outstream, info_dict['url']) outstream.close() except (OSError, IOError), err: - return self.trouble('ERROR: unable to write video data: %s' % str(err)) + outstream.close() + os.remove(filename) + raise UnavailableFormatError except (urllib2.URLError, httplib.HTTPException, socket.error), err: - return self.trouble('ERROR: unable to download video data: %s' % str(err)) + self.trouble('ERROR: unable to download video data: %s' % str(err)) + return + except (ContentTooShortError, ), err: + self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) + return + try: self.post_process(filename, info_dict) except (PostProcessingError), err: - return self.trouble('ERROR: postprocessing: %s' % str(err)) - - return 0 + self.trouble('ERROR: postprocessing: %s' % str(err)) + return def download(self, url_list): """Download a given list of URLs.""" - retcode = 0 if len(url_list) > 1 and self.fixed_template(): raise SameFileError(self.params['outtmpl']) @@ -297,33 +333,16 @@ class FileDownloader(object): # Suitable InfoExtractor found suitable_found = True - # Extract information from URL - all_results = ie.extract(url) - results = [x for x in all_results if x is not None] - - # See if there were problems extracting any information - if len(results) != len(all_results): - retcode = self.trouble() - - # Two results could go to the same file - if len(results) > 1 and self.fixed_template(): - raise SameFileError(self.params['outtmpl']) - - # Process each result - for result in results: - result = self.process_info(result) - - # Do not overwrite an error code with a success code - if result != 0: - retcode = result + # Extract information from URL and process it + ie.extract(url) # Suitable InfoExtractor had been found; go to next URL break if not suitable_found: - retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url) + self.trouble('ERROR: no suitable InfoExtractor: %s' % url) - return retcode + return self._download_retcode def post_process(self, filename, ie_info): """Run the postprocessing chain on the given file.""" @@ -365,7 +384,7 @@ class FileDownloader(object): self.report_finish() if data_len is not None and str(byte_counter) != data_len: - raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len)) + raise ContentTooShortError(byte_counter, long(data_len)) class InfoExtractor(object): """Information Extractor class. @@ -373,9 +392,10 @@ class InfoExtractor(object): Information extractors are the classes that, given a URL, extract information from the video (or videos) the URL refers to. This information includes the real video URL, the video title and simplified - title, author and others. It is returned in a list of dictionaries when - calling its extract() method. It is a list because a URL can refer to - more than one video (think of playlists). The dictionaries must include + title, author and others. The information is stored in a dictionary + which is then passed to the FileDownloader. The FileDownloader + processes this information possibly downloading the video to the file + system, among other possible outcomes. The dictionaries must include the following fields: id: Video identifier. @@ -419,15 +439,6 @@ class InfoExtractor(object): """Sets the downloader for this IE.""" self._downloader = downloader - def to_stdout(self, message): - """Print message to stdout if downloader is not in quiet mode.""" - if self._downloader is None or not self._downloader.params.get('quiet', False): - print message - - def to_stderr(self, message): - """Print message to stderr.""" - print >>sys.stderr, message - def _real_initialize(self): """Real initialization process. Redefine in subclasses.""" pass @@ -444,6 +455,13 @@ class YoutubeIE(InfoExtractor): _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' + _available_formats = ['22', '35', '18', '17', '13'] # listed in order of priority for -b flag + _video_extensions = { + '13': '3gp', + '17': 'mp4', + '18': 'mp4', + '22': 'mp4', + } @staticmethod def suitable(url): @@ -474,27 +492,31 @@ class YoutubeIE(InfoExtractor): def report_lang(self): """Report attempt to set language.""" - self.to_stdout(u'[youtube] Setting language') + self._downloader.to_stdout(u'[youtube] Setting language') def report_login(self): """Report attempt to log in.""" - self.to_stdout(u'[youtube] Logging in') + self._downloader.to_stdout(u'[youtube] Logging in') def report_age_confirmation(self): """Report attempt to confirm age.""" - self.to_stdout(u'[youtube] Confirming age') + self._downloader.to_stdout(u'[youtube] Confirming age') def report_webpage_download(self, video_id): """Report attempt to download webpage.""" - self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) + self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id) def report_information_extraction(self, video_id): """Report attempt to extract video information.""" - self.to_stdout(u'[youtube] %s: Extracting video information' % video_id) + self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id) def report_video_url(self, video_id, video_real_url): """Report extracted video URL.""" - self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) + self._downloader.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url)) + + def report_unavailable_format(self, video_id, format): + """Report extracted video URL.""" + self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format)) def _real_initialize(self): if self._downloader is None: @@ -517,7 +539,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) return # Set language @@ -526,7 +548,7 @@ class YoutubeIE(InfoExtractor): self.report_lang() urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) return # No authentication to be performed @@ -546,10 +568,10 @@ class YoutubeIE(InfoExtractor): self.report_login() login_results = urllib2.urlopen(request).read() if re.search(r'(?i)