Add script to regenerate index.html
[youtube-dl] / youtube-dl
index 1e6b876e162302342337aa3c758a12111654d9db..135d14809554a592101ef6b9100dd41bb3dcce7f 100755 (executable)
@@ -25,6 +25,23 @@ std_headers = {
 
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
 
 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
 
+class DownloadError(Exception):
+       """Download Error exception.
+       
+       This exception may be thrown by FileDownloader objects if they are not
+       configured to continue on errors. They will contain the appropriate
+       error message.
+       """
+       pass
+
+class SameFileError(Exception):
+       """Same File exception.
+
+       This exception will be thrown by FileDownloader objects if they detect
+       multiple files would have to be downloaded to the same file on disk.
+       """
+       pass
+
 class FileDownloader(object):
        """File Downloader class.
 
 class FileDownloader(object):
        """File Downloader class.
 
@@ -67,6 +84,7 @@ class FileDownloader(object):
        _ies = []
 
        def __init__(self, params):
        _ies = []
 
        def __init__(self, params):
+               """Create a FileDownloader object with the given options."""
                self._ies = []
                self.set_params(params)
        
                self._ies = []
                self.set_params(params)
        
@@ -164,22 +182,35 @@ class FileDownloader(object):
                """Determine action to take when a download problem appears.
 
                Depending on if the downloader has been configured to ignore
                """Determine action to take when a download problem appears.
 
                Depending on if the downloader has been configured to ignore
-               download errors or not, this method may exit the program or
+               download errors or not, this method may throw an exception or
                not when errors are found, after printing the message. If it
                not when errors are found, after printing the message. If it
-               doesn't exit, it returns an error code suitable to be returned
+               doesn't raise, it returns an error code suitable to be returned
                later as a program exit code to indicate error.
                """
                if message is not None:
                        self.to_stderr(message)
                if not self._params.get('ignoreerrors', False):
                later as a program exit code to indicate error.
                """
                if message is not None:
                        self.to_stderr(message)
                if not self._params.get('ignoreerrors', False):
-                       sys.exit(1)
+                       raise DownloadError(message)
                return 1
 
                return 1
 
+       def report_destination(self, filename):
+               """Report destination filename."""
+               self.to_stdout('[download] Destination: %s' % filename)
+       
+       def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
+               """Report download progress."""
+               self.to_stdout('\r[download] %s of %s at %s ETA %s' %
+                               (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+       
+       def report_finish(self):
+               """Report download finished."""
+               self.to_stdout('')
+
        def download(self, url_list):
                """Download a given list of URLs."""
                retcode = 0
                if len(url_list) > 1 and self.fixed_template():
        def download(self, url_list):
                """Download a given list of URLs."""
                retcode = 0
                if len(url_list) > 1 and self.fixed_template():
-                       sys.exit('ERROR: fixed output name but more than one file to download')
+                       raise SameFileError(self._params['outtmpl'])
 
                for url in url_list:
                        suitable_found = False
 
                for url in url_list:
                        suitable_found = False
@@ -194,7 +225,7 @@ class FileDownloader(object):
                                        retcode = self.trouble()
 
                                if len(results) > 1 and self.fixed_template():
                                        retcode = self.trouble()
 
                                if len(results) > 1 and self.fixed_template():
-                                       sys.exit('ERROR: fixed output name but more than one file to download')
+                                       raise SameFileError(self._params['outtmpl'])
 
                                for result in results:
 
 
                                for result in results:
 
@@ -210,6 +241,7 @@ class FileDownloader(object):
 
                                        try:
                                                filename = self._params['outtmpl'] % result
 
                                        try:
                                                filename = self._params['outtmpl'] % result
+                                               self.report_destination(filename)
                                        except (ValueError, KeyError), err:
                                                retcode = self.trouble('ERROR: invalid output template: %s' % str(err))
                                                continue
                                        except (ValueError, KeyError), err:
                                                retcode = self.trouble('ERROR: invalid output template: %s' % str(err))
                                                continue
@@ -247,12 +279,13 @@ class FileDownloader(object):
                block_size = 1024
                start = time.time()
                while True:
                block_size = 1024
                start = time.time()
                while True:
+                       # Progress message
                        percent_str = self.calc_percent(byte_counter, data_len)
                        eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
                        speed_str = self.calc_speed(start, time.time(), byte_counter)
                        percent_str = self.calc_percent(byte_counter, data_len)
                        eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
                        speed_str = self.calc_speed(start, time.time(), byte_counter)
-                       self.to_stdout('\r[download] %s of %s at %s ETA %s' %
-                                       (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+                       self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 
 
+                       # Download and write
                        before = time.time()
                        data_block = data.read(block_size)
                        after = time.time()
                        before = time.time()
                        data_block = data.read(block_size)
                        after = time.time()
@@ -263,7 +296,7 @@ class FileDownloader(object):
                        stream.write(data_block)
                        block_size = self.best_block_size(after - before, data_block_len)
 
                        stream.write(data_block)
                        block_size = self.best_block_size(after - before, data_block_len)
 
-               self.to_stdout('')
+               self.report_finish()
                if data_len is not None and str(byte_counter) != data_len:
                        raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
 
                if data_len is not None and str(byte_counter) != data_len:
                        raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
 
@@ -305,7 +338,7 @@ class InfoExtractor(object):
                return True
 
        def initialize(self):
                return True
 
        def initialize(self):
-               """Initializes an instance (login, etc)."""
+               """Initializes an instance (authentication, etc)."""
                if not self._ready:
                        self._real_initialize()
                        self._ready = True
                if not self._ready:
                        self._real_initialize()
                        self._ready = True
@@ -320,10 +353,12 @@ class InfoExtractor(object):
                self._downloader = downloader
        
        def to_stdout(self, message):
                self._downloader = downloader
        
        def to_stdout(self, message):
+               """Print message to stdout if downloader is not in quiet mode."""
                if self._downloader is None or not self._downloader.get_params().get('quiet', False):
                        print message
        
        def to_stderr(self, message):
                if self._downloader is None or not self._downloader.get_params().get('quiet', False):
                        print message
        
        def to_stderr(self, message):
+               """Print message to stderr."""
                sys.stderr.write('%s\n' % message)
 
        def _real_initialize(self):
                sys.stderr.write('%s\n' % message)
 
        def _real_initialize(self):
@@ -341,6 +376,26 @@ class YoutubeIE(InfoExtractor):
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/'
        _NETRC_MACHINE = 'youtube'
 
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/'
        _NETRC_MACHINE = 'youtube'
 
+       def report_login(self):
+               """Report attempt to log in."""
+               self.to_stdout('[youtube] Logging in')
+       
+       def report_age_confirmation(self):
+               """Report attempt to confirm age."""
+               self.to_stdout('[youtube] Confirming age')
+       
+       def report_webpage_download(self, video_id):
+               """Report attempt to download webpage."""
+               self.to_stdout('[youtube] %s: Downloading video webpage' % video_id)
+       
+       def report_information_extraction(self, video_id):
+               """Report attempt to extract video information."""
+               self.to_stdout('[youtube] %s: Extracting video information' % video_id)
+       
+       def report_video_url(self, video_id, video_real_url):
+               """Report extracted video URL."""
+               self.to_stdout('[youtube] %s: URL: %s' % (video_id, video_real_url))
+
        def _real_initialize(self):
                if self._downloader is None:
                        return
        def _real_initialize(self):
                if self._downloader is None:
                        return
@@ -365,6 +420,7 @@ class YoutubeIE(InfoExtractor):
                                self.to_stderr('WARNING: parsing .netrc: %s' % str(err))
                                return
 
                                self.to_stderr('WARNING: parsing .netrc: %s' % str(err))
                                return
 
+               # No authentication to be performed
                if username is None:
                        return
 
                if username is None:
                        return
 
@@ -378,7 +434,7 @@ class YoutubeIE(InfoExtractor):
                                }
                request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
                try:
                                }
                request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
                try:
-                       self.to_stdout('[youtube] Logging in')
+                       self.report_login()
                        login_results = urllib2.urlopen(request).read()
                        if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
                                self.to_stderr('WARNING: unable to log in: bad username or password')
                        login_results = urllib2.urlopen(request).read()
                        if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
                                self.to_stderr('WARNING: unable to log in: bad username or password')
@@ -394,10 +450,11 @@ class YoutubeIE(InfoExtractor):
                                }
                request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
                try:
                                }
                request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
                try:
-                       self.to_stdout('[youtube] Confirming age')
+                       self.report_age_confirmation()
                        age_results = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        age_results = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       sys.exit('ERROR: unable to confirm age: %s' % str(err))
+                       self.to_stderr('ERROR: unable to confirm age: %s' % str(err))
+                       return
 
        def _real_extract(self, url):
                # Extract video id from URL
 
        def _real_extract(self, url):
                # Extract video id from URL
@@ -414,7 +471,7 @@ class YoutubeIE(InfoExtractor):
                        format_param = params.get('format', None)
 
                # Extension
                        format_param = params.get('format', None)
 
                # Extension
-               video_extension = {'18': 'mp4'}.get(format_param, 'flv')
+               video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv')
 
                # Normalize URL, including format
                normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id
 
                # Normalize URL, including format
                normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id
@@ -422,11 +479,12 @@ class YoutubeIE(InfoExtractor):
                        normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
                request = urllib2.Request(normalized_url, None, std_headers)
                try:
                        normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
                request = urllib2.Request(normalized_url, None, std_headers)
                try:
-                       self.to_stdout('[youtube] %s: Downloading video webpage' % video_id)
+                       self.report_webpage_download(video_id)
                        video_webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                        video_webpage = urllib2.urlopen(request).read()
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
-                       sys.exit('ERROR: unable to download video: %s' % str(err))
-               self.to_stdout('[youtube] %s: Extracting video information' % video_id)
+                       self.to_stderr('ERROR: unable to download video webpage: %s' % str(err))
+                       return [None]
+               self.report_information_extraction(video_id)
                
                # "t" param
                mobj = re.search(r', "t": "([^"]+)"', video_webpage)
                
                # "t" param
                mobj = re.search(r', "t": "([^"]+)"', video_webpage)
@@ -436,7 +494,7 @@ class YoutubeIE(InfoExtractor):
                video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
                if format_param is not None:
                        video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
                video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
                if format_param is not None:
                        video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
-               self.to_stdout('[youtube] %s: URL: %s' % (video_id, video_real_url))
+               self.report_video_url(video_id, video_real_url)
 
                # uploader
                mobj = re.search(r'More From: ([^<]*)<', video_webpage)
 
                # uploader
                mobj = re.search(r'More From: ([^<]*)<', video_webpage)
@@ -482,7 +540,7 @@ if __name__ == '__main__':
                # Parse command line
                parser = optparse.OptionParser(
                                usage='Usage: %prog [options] url...',
                # Parse command line
                parser = optparse.OptionParser(
                                usage='Usage: %prog [options] url...',
-                               version='INTERNAL',
+                               version='2008.07.22',
                                conflict_handler='resolve',
                                )
                parser.add_option('-h', '--help',
                                conflict_handler='resolve',
                                )
                parser.add_option('-h', '--help',
@@ -512,7 +570,9 @@ if __name__ == '__main__':
                parser.add_option('-f', '--format',
                                dest='format', metavar='FMT', help='video format code')
                parser.add_option('-b', '--best-quality',
                parser.add_option('-f', '--format',
                                dest='format', metavar='FMT', help='video format code')
                parser.add_option('-b', '--best-quality',
-                               action='store_const', dest='video_format', help='alias for -f 18', const='18')
+                               action='store_const', dest='format', help='alias for -f 18', const='18')
+               parser.add_option('-m', '--mobile-version',
+                               action='store_const', dest='format', help='alias for -f 17', const='17')
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                (opts, args) = parser.parse_args()
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                (opts, args) = parser.parse_args()
@@ -554,5 +614,9 @@ if __name__ == '__main__':
                retcode = fd.download(args)
                sys.exit(retcode)
 
                retcode = fd.download(args)
                sys.exit(retcode)
 
+       except DownloadError:
+               sys.exit(1)
+       except SameFileError:
+               sys.exit('ERROR: fixed output name but more than one file to download')
        except KeyboardInterrupt:
                sys.exit('\nERROR: Interrupted by user')
        except KeyboardInterrupt:
                sys.exit('\nERROR: Interrupted by user')