Give preference to format 34 before format 5 in quality list
[youtube-dl] / youtube-dl
index b1f2717936c93cd0503bcf78deb0f6ccf54e47be..10dd7c7916c990815c4c388211e1c17d709b1dce 100755 (executable)
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 # Author: Ricardo Garcia Gonzalez
 # Author: Danny Colligan
 # -*- coding: utf-8 -*-
 # Author: Ricardo Garcia Gonzalez
 # Author: Danny Colligan
+# Author: Benjamin Johnson
 # License: Public domain code
 import htmlentitydefs
 import httplib
 # License: Public domain code
 import htmlentitydefs
 import httplib
@@ -13,13 +14,20 @@ import os.path
 import re
 import socket
 import string
 import re
 import socket
 import string
+import subprocess
 import sys
 import time
 import urllib
 import urllib2
 
 import sys
 import time
 import urllib
 import urllib2
 
+# parse_qs was moved from the cgi module to the urlparse module recently.
+try:
+       from urlparse import parse_qs
+except ImportError:
+       from cgi import parse_qs
+
 std_headers = {
 std_headers = {
-       'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
+       'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
        'Accept-Language': 'en-us,en;q=0.5',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
        'Accept-Language': 'en-us,en;q=0.5',
@@ -33,15 +41,68 @@ def preferredencoding():
        Returns the best encoding scheme for the system, based on
        locale.getpreferredencoding() and some further tweaks.
        """
        Returns the best encoding scheme for the system, based on
        locale.getpreferredencoding() and some further tweaks.
        """
+       def yield_preferredencoding():
+               try:
+                       pref = locale.getpreferredencoding()
+                       u'TEST'.encode(pref)
+               except:
+                       pref = 'UTF-8'
+               while True:
+                       yield pref
+       return yield_preferredencoding().next()
+
+def htmlentity_transform(matchobj):
+       """Transforms an HTML entity to a Unicode character.
+       
+       This function receives a match object and is intended to be used with
+       the re.sub() function.
+       """
+       entity = matchobj.group(1)
+
+       # Known non-numeric HTML entity
+       if entity in htmlentitydefs.name2codepoint:
+               return unichr(htmlentitydefs.name2codepoint[entity])
+
+       # Unicode character
+       mobj = re.match(ur'(?u)#(x?\d+)', entity)
+       if mobj is not None:
+               numstr = mobj.group(1)
+               if numstr.startswith(u'x'):
+                       base = 16
+                       numstr = u'0%s' % numstr
+               else:
+                       base = 10
+               return unichr(long(numstr, base))
+
+       # Unknown entity in name, return its literal representation
+       return (u'&%s;' % entity)
+
+def sanitize_title(utitle):
+       """Sanitizes a video title so it could be used as part of a filename."""
+       utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
+       return utitle.replace(unicode(os.sep), u'%')
+
+def sanitize_open(filename, open_mode):
+       """Try to open the given filename, and slightly tweak it if this fails.
+
+       Attempts to open the given filename. If this fails, it tries to change
+       the filename slightly, step by step, until it's either able to open it
+       or it fails and raises a final exception, like the standard open()
+       function.
+
+       It returns the tuple (stream, definitive_file_name).
+       """
        try:
        try:
-               pref = locale.getpreferredencoding()
-               # Mac OSX systems have this problem sometimes
-               if pref == '':
-                       return 'UTF-8'
-               return pref
-       except:
-               sys.stderr.write('WARNING: problem obtaining preferred encoding. Falling back to UTF-8.\n')
-               return 'UTF-8'
+               stream = open(filename, open_mode)
+               return (stream, filename)
+       except (IOError, OSError), err:
+               # In case of error, try to remove win32 forbidden chars
+               filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename)
+
+               # An exception here should be caught in the caller
+               stream = open(filename, open_mode)
+               return (stream, filename)
+
 
 class DownloadError(Exception):
        """Download Error exception.
 
 class DownloadError(Exception):
        """Download Error exception.
@@ -239,11 +300,15 @@ class FileDownloader(object):
                self._pps.append(pp)
                pp.set_downloader(self)
        
                self._pps.append(pp)
                pp.set_downloader(self)
        
-       def to_stdout(self, message, skip_eol=False):
+       def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
                """Print message to stdout if not in quiet mode."""
                """Print message to stdout if not in quiet mode."""
-               if not self.params.get('quiet', False):
-                       print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
+               try:
+                       if not self.params.get('quiet', False):
+                               print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
                        sys.stdout.flush()
                        sys.stdout.flush()
+               except (UnicodeEncodeError), err:
+                       if not ignore_encoding_errors:
+                               raise
        
        def to_stderr(self, message):
                """Print message to stderr."""
        
        def to_stderr(self, message):
                """Print message to stderr."""
@@ -281,7 +346,7 @@ class FileDownloader(object):
 
        def report_destination(self, filename):
                """Report destination filename."""
 
        def report_destination(self, filename):
                """Report destination filename."""
-               self.to_stdout(u'[download] Destination: %s' % filename)
+               self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
        
        def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
                """Report download progress."""
        
        def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
                """Report download progress."""
@@ -294,7 +359,10 @@ class FileDownloader(object):
        
        def report_file_already_downloaded(self, file_name):
                """Report file has already been fully downloaded."""
        
        def report_file_already_downloaded(self, file_name):
                """Report file has already been fully downloaded."""
-               self.to_stdout(u'[download] %s has already been downloaded' % file_name)
+               try:
+                       self.to_stdout(u'[download] %s has already been downloaded' % file_name)
+               except (UnicodeEncodeError), err:
+                       self.to_stdout(u'[download] The file has already been downloaded')
        
        def report_unable_to_resume(self):
                """Report it was impossible to resume download."""
        
        def report_unable_to_resume(self):
                """Report it was impossible to resume download."""
@@ -308,16 +376,18 @@ class FileDownloader(object):
                """Process a single dictionary returned by an InfoExtractor."""
                # Do nothing else if in simulate mode
                if self.params.get('simulate', False):
                """Process a single dictionary returned by an InfoExtractor."""
                # Do nothing else if in simulate mode
                if self.params.get('simulate', False):
-                       try:
-                               info_dict['url'] = self.verify_url(info_dict['url'])
-                       except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
-                               raise UnavailableFormatError
+                       # Verify URL if it's an HTTP one
+                       if info_dict['url'].startswith('http'):
+                               try:
+                                       info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
+                               except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
+                                       raise UnavailableFormatError
 
                        # Forced printings
                        if self.params.get('forcetitle', False):
 
                        # Forced printings
                        if self.params.get('forcetitle', False):
-                               print info_dict['title'].encode(preferredencoding())
+                               print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
                        if self.params.get('forceurl', False):
                        if self.params.get('forceurl', False):
-                               print info_dict['url'].encode(preferredencoding())
+                               print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 
                        return
                        
 
                        return
                        
@@ -327,7 +397,7 @@ class FileDownloader(object):
                        filename = self.params['outtmpl'] % template_dict
                except (ValueError, KeyError), err:
                        self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
                        filename = self.params['outtmpl'] % template_dict
                except (ValueError, KeyError), err:
                        self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
-               if self.params['nooverwrites'] and os.path.exists(filename):
+               if self.params.get('nooverwrites', False) and os.path.exists(filename):
                        self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
                        return
 
                        self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
                        return
 
@@ -338,7 +408,7 @@ class FileDownloader(object):
                        return
 
                try:
                        return
 
                try:
-                       success = self._do_download(filename, info_dict['url'])
+                       success = self._do_download(filename, info_dict['url'].encode('utf-8'))
                except (OSError, IOError), err:
                        raise UnavailableFormatError
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                except (OSError, IOError), err:
                        raise UnavailableFormatError
                except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@@ -390,21 +460,53 @@ class FileDownloader(object):
                        if info is None:
                                break
        
                        if info is None:
                                break
        
+       def _download_with_rtmpdump(self, filename, url):
+               self.report_destination(filename)
+
+               # Check for rtmpdump first
+               try:
+                       subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+               except (OSError, IOError):
+                       self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
+                       return False
+
+               # Download using rtmpdump. rtmpdump returns exit code 2 when
+               # the connection was interrumpted and resuming appears to be
+               # possible. This is part of rtmpdump's normal usage, AFAIK.
+               basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename]
+               retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
+               while retval == 2 or retval == 1:
+                       self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
+                       time.sleep(2.0) # This seems to be needed
+                       retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
+               if retval == 0:
+                       self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
+                       return True
+               else:
+                       self.trouble('ERROR: rtmpdump exited with code %d' % retval)
+                       return False
+
        def _do_download(self, filename, url):
        def _do_download(self, filename, url):
-               stream = None
-               open_mode = 'ab'
+               # Attempt to download using rtmpdump
+               if url.startswith('rtmp'):
+                       return self._download_with_rtmpdump(filename, url)
 
 
+               stream = None
+               open_mode = 'wb'
                basic_request = urllib2.Request(url, None, std_headers)
                request = urllib2.Request(url, None, std_headers)
 
                basic_request = urllib2.Request(url, None, std_headers)
                request = urllib2.Request(url, None, std_headers)
 
-               # Attempt to resume download with "continuedl" option
+               # Establish possible resume length
                if os.path.isfile(filename):
                        resume_len = os.path.getsize(filename)
                else:
                        resume_len = 0
                if os.path.isfile(filename):
                        resume_len = os.path.getsize(filename)
                else:
                        resume_len = 0
-               if self.params['continuedl'] and resume_len != 0:
+
+               # Request parameters in case of being able to resume
+               if self.params.get('continuedl', False) and resume_len != 0:
                        self.report_resuming_byte(resume_len)
                        request.add_header('Range','bytes=%d-' % resume_len)
                        self.report_resuming_byte(resume_len)
                        request.add_header('Range','bytes=%d-' % resume_len)
+                       open_mode = 'ab'
 
                # Establish connection
                try:
 
                # Establish connection
                try:
@@ -412,12 +514,16 @@ class FileDownloader(object):
                except (urllib2.HTTPError, ), err:
                        if err.code != 416: #  416 is 'Requested range not satisfiable'
                                raise
                except (urllib2.HTTPError, ), err:
                        if err.code != 416: #  416 is 'Requested range not satisfiable'
                                raise
+                       # Unable to resume
                        data = urllib2.urlopen(basic_request)
                        content_length = data.info()['Content-Length']
                        data = urllib2.urlopen(basic_request)
                        content_length = data.info()['Content-Length']
+
                        if content_length is not None and long(content_length) == resume_len:
                        if content_length is not None and long(content_length) == resume_len:
+                               # Because the file had already been fully downloaded
                                self.report_file_already_downloaded(filename)
                                return True
                        else:
                                self.report_file_already_downloaded(filename)
                                return True
                        else:
+                               # Because the server didn't let us
                                self.report_unable_to_resume()
                                open_mode = 'wb'
 
                                self.report_unable_to_resume()
                                open_mode = 'wb'
 
@@ -439,7 +545,7 @@ class FileDownloader(object):
                        # Open file just in time
                        if stream is None:
                                try:
                        # Open file just in time
                        if stream is None:
                                try:
-                                       stream = open(filename, open_mode)
+                                       (stream, filename) = sanitize_open(filename, open_mode)
                                        self.report_destination(filename)
                                except (OSError, IOError), err:
                                        self.trouble('ERROR: unable to open for writing: %s' % str(err))
                                        self.report_destination(filename)
                                except (OSError, IOError), err:
                                        self.trouble('ERROR: unable to open for writing: %s' % str(err))
@@ -525,46 +631,24 @@ class InfoExtractor(object):
 class YoutubeIE(InfoExtractor):
        """Information extractor for youtube.com."""
 
 class YoutubeIE(InfoExtractor):
        """Information extractor for youtube.com."""
 
-       _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
+       _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
        _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
        _NETRC_MACHINE = 'youtube'
        _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
        _NETRC_MACHINE = 'youtube'
-       _available_formats = ['22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
+       _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag
        _video_extensions = {
                '13': '3gp',
                '17': 'mp4',
                '18': 'mp4',
                '22': 'mp4',
        _video_extensions = {
                '13': '3gp',
                '17': 'mp4',
                '18': 'mp4',
                '22': 'mp4',
+               '37': 'mp4',
        }
 
        @staticmethod
        def suitable(url):
                return (re.match(YoutubeIE._VALID_URL, url) is not None)
 
        }
 
        @staticmethod
        def suitable(url):
                return (re.match(YoutubeIE._VALID_URL, url) is not None)
 
-       @staticmethod
-       def htmlentity_transform(matchobj):
-               """Transforms an HTML entity to a Unicode character."""
-               entity = matchobj.group(1)
-
-               # Known non-numeric HTML entity
-               if entity in htmlentitydefs.name2codepoint:
-                       return unichr(htmlentitydefs.name2codepoint[entity])
-
-               # Unicode character
-               mobj = re.match(ur'(?u)#(x?\d+)', entity)
-               if mobj is not None:
-                       numstr = mobj.group(1)
-                       if numstr.startswith(u'x'):
-                               base = 16
-                               numstr = u'0%s' % numstr
-                       else:
-                               base = 10
-                       return unichr(long(numstr, base))
-
-               # Unknown entity in name, return its literal representation
-               return (u'&%s;' % entity)
-
        def report_lang(self):
                """Report attempt to set language."""
                self._downloader.to_stdout(u'[youtube] Setting language')
        def report_lang(self):
                """Report attempt to set language."""
                self._downloader.to_stdout(u'[youtube] Setting language')
@@ -589,6 +673,10 @@ class YoutubeIE(InfoExtractor):
                """Report extracted video URL."""
                self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
        
                """Report extracted video URL."""
                self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
        
+       def report_rtmp_download(self):
+               """Indicate the download will use the RTMP protocol."""
+               self._downloader.to_stdout(u'[youtube] RTMP download detected')
+       
        def _real_initialize(self):
                if self._downloader is None:
                        return
        def _real_initialize(self):
                if self._downloader is None:
                        return
@@ -687,46 +775,47 @@ class YoutubeIE(InfoExtractor):
                        try:
                                self.report_video_info_webpage_download(video_id)
                                video_info_webpage = urllib2.urlopen(request).read()
                        try:
                                self.report_video_info_webpage_download(video_id)
                                video_info_webpage = urllib2.urlopen(request).read()
+                               video_info = parse_qs(video_info_webpage)
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                                self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
                                return
                        self.report_information_extraction(video_id)
 
                        # "t" param
                        except (urllib2.URLError, httplib.HTTPException, socket.error), err:
                                self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
                                return
                        self.report_information_extraction(video_id)
 
                        # "t" param
-                       mobj = re.search(r'(?m)&token=([^&]+)(?:&|$)', video_info_webpage)
-                       if mobj is None:
+                       if 'token' not in video_info:
                                # Attempt to see if YouTube has issued an error message
                                # Attempt to see if YouTube has issued an error message
-                               mobj = re.search(r'(?m)&reason=([^&]+)(?:&|$)', video_info_webpage)
-                               if mobj is None:
+                               if 'reason' not in video_info:
                                        self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
                                        stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
                                        stream.write(video_info_webpage)
                                        stream.close()
                                else:
                                        self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
                                        stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
                                        stream.write(video_info_webpage)
                                        stream.close()
                                else:
-                                       reason = urllib.unquote_plus(mobj.group(1))
+                                       reason = urllib.unquote_plus(video_info['reason'][0])
                                        self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
                                return
                                        self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
                                return
-                       token = urllib.unquote(mobj.group(1))
+                       token = urllib.unquote_plus(video_info['token'][0])
                        video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
                        if format_param is not None:
                                video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 
                        video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
                        if format_param is not None:
                                video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 
+                       # Check possible RTMP download
+                       if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
+                               self.report_rtmp_download()
+                               video_real_url = video_info['conn'][0]
+
                        # uploader
                        # uploader
-                       mobj = re.search(r'(?m)&author=([^&]+)(?:&|$)', video_info_webpage)
-                       if mobj is None:
+                       if 'author' not in video_info:
                                self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                                return
                                self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                                return
-                       video_uploader = urllib.unquote(mobj.group(1))
+                       video_uploader = urllib.unquote_plus(video_info['author'][0])
 
                        # title
 
                        # title
-                       mobj = re.search(r'(?m)&title=([^&]+)(?:&|$)', video_info_webpage)
-                       if mobj is None:
+                       if 'title' not in video_info:
                                self._downloader.trouble(u'ERROR: unable to extract video title')
                                return
                                self._downloader.trouble(u'ERROR: unable to extract video title')
                                return
-                       video_title = urllib.unquote(mobj.group(1))
+                       video_title = urllib.unquote_plus(video_info['title'][0])
                        video_title = video_title.decode('utf-8')
                        video_title = video_title.decode('utf-8')
-                       video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
-                       video_title = video_title.replace(os.sep, u'%')
+                       video_title = sanitize_title(video_title)
 
                        # simplified title
                        simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 
                        # simplified title
                        simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
@@ -866,8 +955,9 @@ class MetacafeIE(InfoExtractor):
                        self._downloader.trouble(u'ERROR: unable to extract title')
                        return
                video_title = mobj.group(1).decode('utf-8')
                        self._downloader.trouble(u'ERROR: unable to extract title')
                        return
                video_title = mobj.group(1).decode('utf-8')
+               video_title = sanitize_title(video_title)
 
 
-               mobj = re.search(r'(?ms)<li id="ChnlUsr">.*?Submitter:.*?<a .*?>(.*?)<', webpage)
+               mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
                if mobj is None:
                        self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                        return
                if mobj is None:
                        self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                        return
@@ -887,6 +977,260 @@ class MetacafeIE(InfoExtractor):
                        self._downloader.trouble(u'ERROR: format not available for video')
 
 
                        self._downloader.trouble(u'ERROR: format not available for video')
 
 
+class GoogleIE(InfoExtractor):
+       """Information extractor for video.google.com."""
+
+       _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
+
+       def __init__(self, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+
+       @staticmethod
+       def suitable(url):
+               return (re.match(GoogleIE._VALID_URL, url) is not None)
+
+       def report_download_webpage(self, video_id):
+               """Report webpage download."""
+               self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
+
+       def report_extraction(self, video_id):
+               """Report information extraction."""
+               self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
+
+       def _real_initialize(self):
+               return
+
+       def _real_extract(self, url):
+               # Extract id from URL
+               mobj = re.match(self._VALID_URL, url)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+                       return
+
+               video_id = mobj.group(1)
+
+               video_extension = 'mp4'
+
+               # Retrieve video webpage to extract further information
+               request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
+               try:
+                       self.report_download_webpage(video_id)
+                       webpage = urllib2.urlopen(request).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       return
+
+               # Extract URL, uploader, and title from webpage
+               self.report_extraction(video_id)
+               mobj = re.search(r"download_url:'([^']+)'", webpage)
+               if mobj is None:
+                       video_extension = 'flv'
+                       mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: unable to extract media URL')
+                       return
+               mediaURL = urllib.unquote(mobj.group(1))
+               mediaURL = mediaURL.replace('\\x3d', '\x3d')
+               mediaURL = mediaURL.replace('\\x26', '\x26')
+
+               video_url = mediaURL
+
+               mobj = re.search(r'<title>(.*)</title>', webpage)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: unable to extract title')
+                       return
+               video_title = mobj.group(1).decode('utf-8')
+               video_title = sanitize_title(video_title)
+               simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+
+               # Google Video doesn't show uploader nicknames?
+               video_uploader = 'NA'
+
+               try:
+                       # Process video information
+                       self._downloader.process_info({
+                               'id':           video_id.decode('utf-8'),
+                               'url':          video_url.decode('utf-8'),
+                               'uploader':     video_uploader.decode('utf-8'),
+                               'title':        video_title,
+                               'stitle':       simple_title,
+                               'ext':          video_extension.decode('utf-8'),
+                       })
+               except UnavailableFormatError:
+                       self._downloader.trouble(u'ERROR: format not available for video')
+
+
+class PhotobucketIE(InfoExtractor):
+       """Information extractor for photobucket.com."""
+
+       _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
+
+       def __init__(self, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+
+       @staticmethod
+       def suitable(url):
+               return (re.match(PhotobucketIE._VALID_URL, url) is not None)
+
+       def report_download_webpage(self, video_id):
+               """Report webpage download."""
+               self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
+
+       def report_extraction(self, video_id):
+               """Report information extraction."""
+               self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
+
+       def _real_initialize(self):
+               return
+
+       def _real_extract(self, url):
+               # Extract id from URL
+               mobj = re.match(self._VALID_URL, url)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+                       return
+
+               video_id = mobj.group(1)
+
+               video_extension = 'flv'
+
+               # Retrieve video webpage to extract further information
+               request = urllib2.Request(url)
+               try:
+                       self.report_download_webpage(video_id)
+                       webpage = urllib2.urlopen(request).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       return
+
+               # Extract URL, uploader, and title from webpage
+               self.report_extraction(video_id)
+               mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: unable to extract media URL')
+                       return
+               mediaURL = urllib.unquote(mobj.group(1))
+
+               video_url = mediaURL
+
+               mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: unable to extract title')
+                       return
+               video_title = mobj.group(1).decode('utf-8')
+               video_title = sanitize_title(video_title)
+               simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+
+               video_uploader = mobj.group(2).decode('utf-8')
+
+               try:
+                       # Process video information
+                       self._downloader.process_info({
+                               'id':           video_id.decode('utf-8'),
+                               'url':          video_url.decode('utf-8'),
+                               'uploader':     video_uploader,
+                               'title':        video_title,
+                               'stitle':       simple_title,
+                               'ext':          video_extension.decode('utf-8'),
+                       })
+               except UnavailableFormatError:
+                       self._downloader.trouble(u'ERROR: format not available for video')
+
+
+class GenericIE(InfoExtractor):
+       """Generic last-resort information extractor."""
+
+       def __init__(self, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+
+       @staticmethod
+       def suitable(url):
+               return True
+
+       def report_download_webpage(self, video_id):
+               """Report webpage download."""
+               self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
+               self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
+
+       def report_extraction(self, video_id):
+               """Report information extraction."""
+               self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
+
+       def _real_initialize(self):
+               return
+
+       def _real_extract(self, url):
+               video_id = url.split('/')[-1]
+               request = urllib2.Request(url)
+               try:
+                       self.report_download_webpage(video_id)
+                       webpage = urllib2.urlopen(request).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
+                       return
+               except ValueError, err:
+                       # since this is the last-resort InfoExtractor, if
+                       # this error is thrown, it'll be thrown here
+                       self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+                       return
+
+               # Start with something easy: JW Player in SWFObject
+               mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
+               if mobj is None:
+                       # Broaden the search a little bit
+                       mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+                       return
+
+               # It's possible that one of the regexes
+               # matched, but returned an empty group:
+               if mobj.group(1) is None:
+                       self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
+                       return
+
+               video_url = urllib.unquote(mobj.group(1))
+               video_id  = os.path.basename(video_url)
+
+               # here's a fun little line of code for you:
+               video_extension = os.path.splitext(video_id)[1][1:]
+               video_id        = os.path.splitext(video_id)[0]
+
+               # it's tempting to parse this further, but you would
+               # have to take into account all the variations like
+               #   Video Title - Site Name
+               #   Site Name | Video Title
+               #   Video Title - Tagline | Site Name
+               # and so on and so forth; it's just not practical
+               mobj = re.search(r'<title>(.*)</title>', webpage)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: unable to extract title')
+                       return
+               video_title = mobj.group(1).decode('utf-8')
+               video_title = sanitize_title(video_title)
+               simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+
+               # video uploader is domain name
+               mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: unable to extract title')
+                       return
+               video_uploader = mobj.group(1).decode('utf-8')
+
+               try:
+                       # Process video information
+                       self._downloader.process_info({
+                               'id':           video_id.decode('utf-8'),
+                               'url':          video_url.decode('utf-8'),
+                               'uploader':     video_uploader,
+                               'title':        video_title,
+                               'stitle':       simple_title,
+                               'ext':          video_extension.decode('utf-8'),
+                       })
+               except UnavailableFormatError:
+                       self._downloader.trouble(u'ERROR: format not available for video')
+
+
 class YoutubeSearchIE(InfoExtractor):
        """Information Extractor for YouTube search queries."""
        _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
 class YoutubeSearchIE(InfoExtractor):
        """Information Extractor for YouTube search queries."""
        _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
@@ -906,6 +1250,7 @@ class YoutubeSearchIE(InfoExtractor):
 
        def report_download_page(self, query, pagenum):
                """Report attempt to download playlist page with given number."""
 
        def report_download_page(self, query, pagenum):
                """Report attempt to download playlist page with given number."""
+               query = query.decode(preferredencoding())
                self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 
        def _real_initialize(self):
                self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
 
        def _real_initialize(self):
@@ -919,6 +1264,7 @@ class YoutubeSearchIE(InfoExtractor):
 
                prefix, query = query.split(':')
                prefix = prefix[8:]
 
                prefix, query = query.split(':')
                prefix = prefix[8:]
+               query  = query.encode('utf-8')
                if prefix == '':
                        self._download_n_results(query, 1)
                        return
                if prefix == '':
                        self._download_n_results(query, 1)
                        return
@@ -1036,6 +1382,61 @@ class YoutubePlaylistIE(InfoExtractor):
                        self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                return
 
                        self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
                return
 
+class YoutubeUserIE(InfoExtractor):
+       """Information Extractor for YouTube users."""
+
+       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
+       _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
+       _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
+       _youtube_ie = None
+
+       def __init__(self, youtube_ie, downloader=None):
+               InfoExtractor.__init__(self, downloader)
+               self._youtube_ie = youtube_ie
+       
+       @staticmethod
+       def suitable(url):
+               return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
+
+       def report_download_page(self, username):
+               """Report attempt to download user page."""
+               self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
+
+       def _real_initialize(self):
+               self._youtube_ie.initialize()
+       
+       def _real_extract(self, url):
+               # Extract username
+               mobj = re.match(self._VALID_URL, url)
+               if mobj is None:
+                       self._downloader.trouble(u'ERROR: invalid url: %s' % url)
+                       return
+
+               # Download user page
+               username = mobj.group(1)
+               video_ids = []
+               pagenum = 1
+
+               self.report_download_page(username)
+               request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
+               try:
+                       page = urllib2.urlopen(request).read()
+               except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+                       self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
+                       return
+
+               # Extract video identifiers
+               ids_in_page = []
+
+               for mobj in re.finditer(self._VIDEO_INDICATOR, page):
+                       if mobj.group(1) not in ids_in_page:
+                               ids_in_page.append(mobj.group(1))
+               video_ids.extend(ids_in_page)
+
+               for id in video_ids:
+                       self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
+               return
+
 class PostProcessor(object):
        """Post Processor class.
 
 class PostProcessor(object):
        """Post Processor class.
 
@@ -1089,6 +1490,22 @@ if __name__ == '__main__':
                import getpass
                import optparse
 
                import getpass
                import optparse
 
+               # Function to update the program file with the latest version from bitbucket.org
+               def update_self(downloader, filename):
+                       # Note: downloader only used for options
+                       if not os.access (filename, os.W_OK):
+                               sys.exit('ERROR: no write permissions on %s' % filename)
+
+                       downloader.to_stdout('Updating to latest stable version...')
+                       latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
+                       latest_version = urllib.urlopen(latest_url).read().strip()
+                       prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
+                       newcontent = urllib.urlopen(prog_url).read()
+                       stream = open(filename, 'w')
+                       stream.write(newcontent)
+                       stream.close()
+                       downloader.to_stdout('Updated to version %s' % latest_version)
+
                # General configuration
                urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
                urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
                # General configuration
                urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
                urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
@@ -1097,7 +1514,7 @@ if __name__ == '__main__':
                # Parse command line
                parser = optparse.OptionParser(
                        usage='Usage: %prog [options] url...',
                # Parse command line
                parser = optparse.OptionParser(
                        usage='Usage: %prog [options] url...',
-                       version='INTERNAL',
+                       version='2010.02.13',
                        conflict_handler='resolve',
                )
 
                        conflict_handler='resolve',
                )
 
@@ -1105,6 +1522,8 @@ if __name__ == '__main__':
                                action='help', help='print this help text and exit')
                parser.add_option('-v', '--version',
                                action='version', help='print program version and exit')
                                action='help', help='print this help text and exit')
                parser.add_option('-v', '--version',
                                action='version', help='print program version and exit')
+               parser.add_option('-U', '--update',
+                               action='store_true', dest='update_self', help='update this program to latest stable version')
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                parser.add_option('-r', '--rate-limit',
                parser.add_option('-i', '--ignore-errors',
                                action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
                parser.add_option('-r', '--rate-limit',
@@ -1157,7 +1576,7 @@ if __name__ == '__main__':
                parser.add_option_group(filesystem)
 
                (opts, args) = parser.parse_args()
                parser.add_option_group(filesystem)
 
                (opts, args) = parser.parse_args()
-
+        
                # Batch file verification
                batchurls = []
                if opts.batchfile is not None:
                # Batch file verification
                batchurls = []
                if opts.batchfile is not None:
@@ -1170,8 +1589,6 @@ if __name__ == '__main__':
                all_urls = batchurls + args
 
                # Conflicting, missing and erroneous options
                all_urls = batchurls + args
 
                # Conflicting, missing and erroneous options
-               if len(all_urls) < 1:
-                       parser.error(u'you must provide at least one URL')
                if opts.usenetrc and (opts.username is not None or opts.password is not None):
                        parser.error(u'using .netrc conflicts with giving username/password')
                if opts.password is not None and opts.username is None:
                if opts.usenetrc and (opts.username is not None or opts.password is not None):
                        parser.error(u'using .netrc conflicts with giving username/password')
                if opts.password is not None and opts.username is None:
@@ -1192,7 +1609,11 @@ if __name__ == '__main__':
                youtube_ie = YoutubeIE()
                metacafe_ie = MetacafeIE(youtube_ie)
                youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
                youtube_ie = YoutubeIE()
                metacafe_ie = MetacafeIE(youtube_ie)
                youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
+               youtube_user_ie = YoutubeUserIE(youtube_ie)
                youtube_search_ie = YoutubeSearchIE(youtube_ie)
                youtube_search_ie = YoutubeSearchIE(youtube_ie)
+               google_ie = GoogleIE()
+               photobucket_ie = PhotobucketIE()
+               generic_ie = GenericIE()
 
                # File downloader
                fd = FileDownloader({
 
                # File downloader
                fd = FileDownloader({
@@ -1215,8 +1636,26 @@ if __name__ == '__main__':
                        })
                fd.add_info_extractor(youtube_search_ie)
                fd.add_info_extractor(youtube_pl_ie)
                        })
                fd.add_info_extractor(youtube_search_ie)
                fd.add_info_extractor(youtube_pl_ie)
+               fd.add_info_extractor(youtube_user_ie)
                fd.add_info_extractor(metacafe_ie)
                fd.add_info_extractor(youtube_ie)
                fd.add_info_extractor(metacafe_ie)
                fd.add_info_extractor(youtube_ie)
+               fd.add_info_extractor(google_ie)
+               fd.add_info_extractor(photobucket_ie)
+
+               # This must come last since it's the
+               # fallback if none of the others work
+               fd.add_info_extractor(generic_ie)
+
+               # Update version
+               if opts.update_self:
+                       update_self(fd, sys.argv[0])
+
+               # Maybe do nothing
+               if len(all_urls) < 1:
+                       if not opts.update_self:
+                               parser.error(u'you must provide at least one URL')
+                       else:
+                               sys.exit()
                retcode = fd.download(all_urls)
                sys.exit(retcode)
 
                retcode = fd.download(all_urls)
                sys.exit(retcode)