Bump version number
[youtube-dl] / youtube-dl
index b1f2717936c93cd0503bcf78deb0f6ccf54e47be..640ed4bdc801855e71a2fd6809dd65fc0724fb87 100755 (executable)
@@ -13,11 +13,18 @@ import os.path
 import re
 import socket
 import string
 import re
 import socket
 import string
+import subprocess
 import sys
 import time
 import urllib
 import urllib2
 
 import sys
 import time
 import urllib
 import urllib2
 
+# parse_qs was moved from the cgi module to the urlparse module recently.
+try:
+       from urlparse import parse_qs
+except ImportError:
+       from cgi import parse_qs
+
 std_headers = {
        'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 std_headers = {
        'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
@@ -33,15 +40,15 @@ def preferredencoding():
        Returns the best encoding scheme for the system, based on
        locale.getpreferredencoding() and some further tweaks.
        """
        Returns the best encoding scheme for the system, based on
        locale.getpreferredencoding() and some further tweaks.
        """
-       try:
-               pref = locale.getpreferredencoding()
-               # Mac OSX systems have this problem sometimes
-               if pref == '':
-                       return 'UTF-8'
-               return pref
-       except:
-               sys.stderr.write('WARNING: problem obtaining preferred encoding. Falling back to UTF-8.\n')
-               return 'UTF-8'
+       def yield_preferredencoding():
+               try:
+                       pref = locale.getpreferredencoding()
+                       u'TEST'.encode(pref)
+               except:
+                       pref = 'UTF-8'
+               while True:
+                       yield pref
+       return yield_preferredencoding().next()
 
 class DownloadError(Exception):
        """Download Error exception.
 
 class DownloadError(Exception):
        """Download Error exception.
@@ -309,7 +316,7 @@ class FileDownloader(object):
                # Do nothing else if in simulate mode
                if self.params.get('simulate', False):
                        try:
                # Do nothing else if in simulate mode
                if self.params.get('simulate', False):
                        try:
-                               info_dict['url'] = self.verify_url(info_dict['url'])
+                               info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
                        except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
                                raise UnavailableFormatError
 
                        except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
                                raise UnavailableFormatError
 
@@ -327,7 +334,7 @@ class FileDownloader(object):
                        filename = self.params['outtmpl'] % template_dict
                except (ValueError, KeyError), err:
                        self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
                        filename = self.params['outtmpl'] % template_dict
                except (ValueError, KeyError), err:
                        self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
-               if self.params['nooverwrites'] and os.path.exists(filename):
+               if self.params.get('nooverwrites', False) and os.path.exists(filename):
                        self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
                        return
 
                        self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
                        return
 
@@ -338,7 +345,7 @@ class FileDownloader(object):
                        return
 
                try:
                        return
 
                try:
-                       success = self._do_download(filename, info_dict['url'])
+                       success = self._do_download(filename, info_dict['url'].encode('utf-8'))
                except (OSError, IOError), err:
                        raise UnavailableFormatError
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                except (OSError, IOError), err:
                        raise UnavailableFormatError
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@@ -390,21 +397,52 @@ class FileDownloader(object):
                        if info is None:
                                break
        
                        if info is None:
                                break
        
+       def _download_with_rtmpdump(self, filename, url):
+               self.report_destination(filename)
+
+               # Check for rtmpdump first
+               try:
+                       subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+               except (OSError, IOError):
+                       self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
+                       return False
+
+               # Download using rtmpdump. rtmpdump returns exit code 2 when
+               # the connection was interrumpted and resuming appears to be
+               # possible. This is part of rtmpdump's normal usage, AFAIK.
+               retval = subprocess.call(['rtmpdump', '-q', '-r', url, '-o', filename] + [[], ['-e']][self.params.get('continuedl', False)])
+               while retval == 2:
+                       self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
+                       time.sleep(2.0) # This seems to be needed
+                       retval = subprocess.call(['rtmpdump', '-q', '-e', '-r', url, '-o', filename])
+               if retval == 0:
+                       self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
+                       return True
+               else:
+                       self.trouble('ERROR: rtmpdump exited with code %d' % retval)
+                       return False
+
        def _do_download(self, filename, url):
        def _do_download(self, filename, url):
-               stream = None
-               open_mode = 'ab'
+               # Attempt to download using rtmpdump
+               if url.startswith('rtmp'):
+                       return self._download_with_rtmpdump(filename, url)
 
 
+               stream = None
+               open_mode = 'wb'
                basic_request = urllib2.Request(url, None, std_headers)
                request = urllib2.Request(url, None, std_headers)
 
                basic_request = urllib2.Request(url, None, std_headers)
                request = urllib2.Request(url, None, std_headers)
 
-               # Attempt to resume download with "continuedl" option
+               # Establish possible resume length
                if os.path.isfile(filename):
                        resume_len = os.path.getsize(filename)
                else:
                        resume_len = 0
                if os.path.isfile(filename):
                        resume_len = os.path.getsize(filename)
                else:
                        resume_len = 0
-               if self.params['continuedl'] and resume_len != 0:
+
+               # Request parameters in case of being able to resume
+               if self.params.get('continuedl', False) and resume_len != 0:
                        self.report_resuming_byte(resume_len)
                        request.add_header('Range','bytes=%d-' % resume_len)
                        self.report_resuming_byte(resume_len)
                        request.add_header('Range','bytes=%d-' % resume_len)
+                       open_mode = 'ab'
 
                # Establish connection
                try:
 
                # Establish connection
                try:
@@ -412,12 +450,16 @@ class FileDownloader(object):
                except (urllib2.HTTPError, ), err:
                        if err.code != 416: #  416 is 'Requested range not satisfiable'
                                raise
                except (urllib2.HTTPError, ), err:
                        if err.code != 416: #  416 is 'Requested range not satisfiable'
                                raise
+                       # Unable to resume
                        data = urllib2.urlopen(basic_request)
                        content_length = data.info()['Content-Length']
                        data = urllib2.urlopen(basic_request)
                        content_length = data.info()['Content-Length']
+
                        if content_length is not None and long(content_length) == resume_len:
                        if content_length is not None and long(content_length) == resume_len:
+                               # Because the file had already been fully downloaded
                                self.report_file_already_downloaded(filename)
                                return True
                        else:
                                self.report_file_already_downloaded(filename)
                                return True
                        else:
+                               # Because the server didn't let us
                                self.report_unable_to_resume()
                                open_mode = 'wb'
 
                                self.report_unable_to_resume()
                                open_mode = 'wb'
 
@@ -530,12 +572,13 @@ class YoutubeIE(InfoExtractor):
        _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
        _NETRC_MACHINE = 'youtube'
        _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
        _NETRC_MACHINE = 'youtube'
-       _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
+       _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
        _video_extensions = {
                '13': '3gp',
                '17': 'mp4',
                '18': 'mp4',
                '22': 'mp4',
        _video_extensions = {
                '13': '3gp',
                '17': 'mp4',
                '18': 'mp4',
                '22': 'mp4',
+               '37': 'mp4',
        }
 
        @staticmethod
        }
 
        @staticmethod
@@ -589,6 +632,10 @@ class YoutubeIE(InfoExtractor):
                """Report extracted video URL."""
                self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
        
                """Report extracted video URL."""
                self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
        
+       def report_rtmp_download(self):
+               """Indicate the download will use the RTMP protocol."""
+               self._downloader.to_stdout(u'[youtube] RTMP download detected')
+       
        def _real_initialize(self):
                if self._downloader is None:
                        return
        def _real_initialize(self):
                if self._downloader is None:
                        return
@@ -687,43 +734,45 @@ class YoutubeIE(InfoExtractor):
                        try:
                                self.report_video_info_webpage_download(video_id)
                                video_info_webpage = urllib2.urlopen(request).read()
                        try:
                                self.report_video_info_webpage_download(video_id)
                                video_info_webpage = urllib2.urlopen(request).read()
+                               video_info = parse_qs(video_info_webpage)
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                                self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
                                return
                        self.report_information_extraction(video_id)
 
                        # "t" param
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                                self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
                                return
                        self.report_information_extraction(video_id)
 
                        # "t" param
-                       mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage)
-                       if mobj is None:
+                       if 'token' not in video_info:
                                # Attempt to see if YouTube has issued an error message
                                # Attempt to see if YouTube has issued an error message
-                               mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage)
-                               if mobj is None:
+                               if 'reason' not in video_info:
                                        self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
                                        stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
                                        stream.write(video_info_webpage)
                                        stream.close()
                                else:
                                        self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
                                        stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
                                        stream.write(video_info_webpage)
                                        stream.close()
                                else:
-                                       reason = urllib.unquote_plus(mobj.group(1))
+                                       reason = urllib.unquote_plus(video_info['reason'][0])
                                        self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
                                return
                                        self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
                                return
-                       token = urllib.unquote(mobj.group(1))
+                       token = urllib.unquote_plus(video_info['token'][0])
                        video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
                        if format_param is not None:
                                video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 
                        video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
                        if format_param is not None:
                                video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 
+                       # Check possible RTMP download
+                       if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
+                               self.report_rtmp_download()
+                               video_real_url = video_info['conn'][0]
+
                        # uploader
                        # uploader
-                       mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
-                       if mobj is None:
+                       if 'author' not in video_info:
                                self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                                return
                                self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                                return
-                       video_uploader = urllib.unquote(mobj.group(1))
+                       video_uploader = urllib.unquote_plus(video_info['author'][0])
 
                        # title
 
                        # title
-                       mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage)
-                       if mobj is None:
+                       if 'title' not in video_info:
                                self._downloader.trouble(u'ERROR: unable to extract video title')
                                return
                                self._downloader.trouble(u'ERROR: unable to extract video title')
                                return
-                       video_title = urllib.unquote(mobj.group(1))
+                       video_title = urllib.unquote_plus(video_info['title'][0])
                        video_title = video_title.decode('utf-8')
                        video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
                        video_title = video_title.replace(os.sep, u'%')
                        video_title = video_title.decode('utf-8')
                        video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
                        video_title = video_title.replace(os.sep, u'%')
@@ -867,7 +916,7 @@ class MetacafeIE(InfoExtractor):
                        return
                video_title = mobj.group(1).decode('utf-8')
 
                        return
                video_title = mobj.group(1).decode('utf-8')
 
-               mobj = re.search(r'(?ms)<li id="ChnlUsr">.*?Submitter:.*?<a .*?>(.*?)<', webpage)
+               mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
                if mobj is None:
                        self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                        return
                if mobj is None:
                        self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                        return
@@ -1036,6 +1085,61 @@ class YoutubePlaylistIE(InfoExtractor):
                        self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                return
 
                        self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                return
 
+class YoutubeUserIE(InfoExtractor):
+       """Information Extractor for YouTube users."""
+
+       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
+       _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
+       _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
+       _youtube_ie = None
+
+       def __init__(self, youtube_ie, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+               self._youtube_ie = youtube_ie
+       
+       @staticmethod
+       def suitable(url):
+               return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
+
+       def report_download_page(self, username):
+               """Report attempt to download user page."""
+               self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
+
+       def _real_initialize(self):
+               self._youtube_ie.initialize()
+       
+       def _real_extract(self, url):
+               # Extract username
+               mobj = re.match(self._VALID_URL, url)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: invalid url: %s' % url)
+                       return
+
+               # Download user page
+               username = mobj.group(1)
+               video_ids = []
+               pagenum = 1
+
+               self.report_download_page(username)
+               request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
+               try:
+                       page = urllib2.urlopen(request).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                       return
+
+               # Extract video identifiers
+               ids_in_page = []
+
+               for mobj in re.finditer(self._VIDEO_INDICATOR, page):
+                       if mobj.group(1) not in ids_in_page:
+                               ids_in_page.append(mobj.group(1))
+               video_ids.extend(ids_in_page)
+
+               for id in video_ids:
+                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+               return
+
 class PostProcessor(object):
        """Post Processor class.
 
 class PostProcessor(object):
        """Post Processor class.
 
@@ -1089,6 +1193,22 @@ if __name__ == '__main__':
                import getpass
                import optparse
 
                import getpass
                import optparse
 
+               # Function to update the program file with the latest version from bitbucket.org
+               def update_self(downloader, filename):
+                       # Note: downloader only used for options
+                       if not os.access (filename, os.W_OK):
+                               sys.exit('ERROR: no write permissions on %s' % filename)
+
+                       downloader.to_stdout('Updating to latest stable version...')
+                       latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
+                       latest_version = urllib.urlopen(latest_url).read().strip()
+                       prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
+                       newcontent = urllib.urlopen(prog_url).read()
+                       stream = open(filename, 'w')
+                       stream.write(newcontent)
+                       stream.close()
+                       downloader.to_stdout('Updated to version %s' % latest_version)
+
                # General configuration
                urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
                urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
                # General configuration
                urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
                urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
@@ -1097,7 +1217,7 @@ if __name__ == '__main__':
                # Parse command line
                parser = optparse.OptionParser(
                        usage='Usage: %prog [options] url...',
                # Parse command line
                parser = optparse.OptionParser(
                        usage='Usage: %prog [options] url...',
-                       version='INTERNAL',
+                       version='2010.01.06',
                        conflict_handler='resolve',
                )
 
                        conflict_handler='resolve',
                )
 
@@ -1105,6 +1225,8 @@ if __name__ == '__main__':
                                action='help', help='print this help text and exit')
                parser.add_option('-v', '--version',
                                action='version', help='print program version and exit')
                                action='help', help='print this help text and exit')
                parser.add_option('-v', '--version',
                                action='version', help='print program version and exit')
+               parser.add_option('-U', '--update',
+                               action='store_true', dest='update_self', help='update this program to latest stable version')
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                parser.add_option('-r', '--rate-limit',
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                parser.add_option('-r', '--rate-limit',
@@ -1157,7 +1279,7 @@ if __name__ == '__main__':
                parser.add_option_group(filesystem)
 
                (opts, args) = parser.parse_args()
                parser.add_option_group(filesystem)
 
                (opts, args) = parser.parse_args()
-
+        
                # Batch file verification
                batchurls = []
                if opts.batchfile is not None:
                # Batch file verification
                batchurls = []
                if opts.batchfile is not None:
@@ -1170,8 +1292,6 @@ if __name__ == '__main__':
                all_urls = batchurls + args
 
                # Conflicting, missing and erroneous options
                all_urls = batchurls + args
 
                # Conflicting, missing and erroneous options
-               if len(all_urls) < 1:
-                       parser.error(u'you must provide at least one URL')
                if opts.usenetrc and (opts.username is not None or opts.password is not None):
                        parser.error(u'using .netrc conflicts with giving username/password')
                if opts.password is not None and opts.username is None:
                if opts.usenetrc and (opts.username is not None or opts.password is not None):
                        parser.error(u'using .netrc conflicts with giving username/password')
                if opts.password is not None and opts.username is None:
@@ -1192,6 +1312,7 @@ if __name__ == '__main__':
                youtube_ie = YoutubeIE()
                metacafe_ie = MetacafeIE(youtube_ie)
                youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
                youtube_ie = YoutubeIE()
                metacafe_ie = MetacafeIE(youtube_ie)
                youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
+               youtube_user_ie = YoutubeUserIE(youtube_ie)
                youtube_search_ie = YoutubeSearchIE(youtube_ie)
 
                # File downloader
                youtube_search_ie = YoutubeSearchIE(youtube_ie)
 
                # File downloader
@@ -1215,8 +1336,20 @@ if __name__ == '__main__':
                        })
                fd.add_info_extractor(youtube_search_ie)
                fd.add_info_extractor(youtube_pl_ie)
                        })
                fd.add_info_extractor(youtube_search_ie)
                fd.add_info_extractor(youtube_pl_ie)
+               fd.add_info_extractor(youtube_user_ie)
                fd.add_info_extractor(metacafe_ie)
                fd.add_info_extractor(youtube_ie)
                fd.add_info_extractor(metacafe_ie)
                fd.add_info_extractor(youtube_ie)
+
+               # Update version
+               if opts.update_self:
+                       update_self(fd, sys.argv[0])
+
+               # Maybe do nothing
+               if len(all_urls) < 1:
+                       if not opts.update_self:
+                               parser.error(u'you must provide at least one URL')
+                       else:
+                               sys.exit()
                retcode = fd.download(all_urls)
                sys.exit(retcode)
 
                retcode = fd.download(all_urls)
                sys.exit(retcode)