New option --get-filename to print output filename
[youtube-dl] / youtube-dl
index c49c1b06448304f68d37395c7f6b23041d15b14f..be859a5a1cfe182d4b61506a4e13f4b2ef414207 100755 (executable)
@@ -4,6 +4,7 @@
 # Author: Danny Colligan
 # Author: Benjamin Johnson
 # Author: Vasyl' Vavrychuk
+# Author: Witold Baryluk
 # License: Public domain code
 import cookielib
 import ctypes
@@ -189,6 +190,14 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
                except zlib.error:
                        return zlib.decompress(data)
        
+       @staticmethod
+       def addinfourl_wrapper(stream, headers, url, code):
+               if hasattr(urllib2.addinfourl, 'getcode'):
+                       return urllib2.addinfourl(stream, headers, url, code)
+               ret = urllib2.addinfourl(stream, headers, url)
+               ret.code = code
+               return ret
+       
        def http_request(self, req):
                for h in std_headers:
                        if h in req.headers:
@@ -205,12 +214,12 @@ class YoutubeDLHandler(urllib2.HTTPHandler):
                # gzip
                if resp.headers.get('Content-encoding', '') == 'gzip':
                        gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
-                       resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+                       resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
                        resp.msg = old_resp.msg
                # deflate
                if resp.headers.get('Content-encoding', '') == 'deflate':
                        gz = StringIO.StringIO(self.deflate(resp.read()))
-                       resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+                       resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
                        resp.msg = old_resp.msg
                return resp
 
@@ -249,6 +258,7 @@ class FileDownloader(object):
        forcetitle:       Force printing title.
        forcethumbnail:   Force printing thumbnail URL.
        forcedescription: Force printing description.
+       forcefilename:    Force printing final filename.
        simulate:         Do not download the video files.
        format:           Video format code.
        format_limit:     Highest quality format to try.
@@ -485,8 +495,21 @@ class FileDownloader(object):
                """Increment the ordinal that assigns a number to each file."""
                self._num_downloads += 1
 
+       def prepare_filename(self, info_dict):
+               """Generate the output filename."""
+               try:
+                       template_dict = dict(info_dict)
+                       template_dict['epoch'] = unicode(long(time.time()))
+                       template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
+                       filename = self.params['outtmpl'] % template_dict
+                       return filename
+               except (ValueError, KeyError), err:
+                       self.trouble(u'ERROR: invalid system charset or erroneous output template')
+                       return None
+
        def process_info(self, info_dict):
                """Process a single dictionary returned by an InfoExtractor."""
+               filename = self.prepare_filename(info_dict)
                # Do nothing else if in simulate mode
                if self.params.get('simulate', False):
                        # Forced printings
@@ -498,16 +521,12 @@ class FileDownloader(object):
                                print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
                        if self.params.get('forcedescription', False) and 'description' in info_dict:
                                print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
+                       if self.params.get('forcefilename', False) and filename is not None:
+                               print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 
                        return
 
-               try:
-                       template_dict = dict(info_dict)
-                       template_dict['epoch'] = unicode(long(time.time()))
-                       template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
-                       filename = self.params['outtmpl'] % template_dict
-               except (ValueError, KeyError), err:
-                       self.trouble(u'ERROR: invalid system charset or erroneous output template')
+               if filename is None:
                        return
                if self.params.get('nooverwrites', False) and os.path.exists(filename):
                        self.to_stderr(u'WARNING: file exists and will be skipped')
@@ -805,7 +824,7 @@ class InfoExtractor(object):
 class YoutubeIE(InfoExtractor):
        """Information extractor for youtube.com."""
 
-       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
+       _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
        _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
        _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
        _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
@@ -1294,7 +1313,7 @@ class DailymotionIE(InfoExtractor):
                video_title = mobj.group(1).decode('utf-8')
                video_title = sanitize_title(video_title)
 
-               mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
+               mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
                if mobj is None:
                        self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
                        return
@@ -2044,7 +2063,7 @@ class YahooSearchIE(InfoExtractor):
 class YoutubePlaylistIE(InfoExtractor):
        """Information Extractor for YouTube playlists."""
 
-       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
+       _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*'
        _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
        _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
        _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
@@ -2296,20 +2315,26 @@ if __name__ == '__main__':
                import getpass
                import optparse
 
-               # Function to update the program file with the latest version from bitbucket.org
+               # Function to update the program file with the latest version from the repository.
                def update_self(downloader, filename):
                        # Note: downloader only used for options
-                       if not os.access (filename, os.W_OK):
+                       if not os.access(filename, os.W_OK):
                                sys.exit('ERROR: no write permissions on %s' % filename)
 
                        downloader.to_screen('Updating to latest stable version...')
-                       latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
-                       latest_version = urllib.urlopen(latest_url).read().strip()
-                       prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
-                       newcontent = urllib.urlopen(prog_url).read()
-                       stream = open(filename, 'w')
-                       stream.write(newcontent)
-                       stream.close()
+                       try:
+                               latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
+                               latest_version = urllib.urlopen(latest_url).read().strip()
+                               prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
+                               newcontent = urllib.urlopen(prog_url).read()
+                       except (IOError, OSError), err:
+                               sys.exit('ERROR: unable to download latest version')
+                       try:
+                               stream = open(filename, 'w')
+                               stream.write(newcontent)
+                               stream.close()
+                       except (IOError, OSError), err:
+                               sys.exit('ERROR: unable to overwrite current version')
                        downloader.to_screen('Updated to version %s' % latest_version)
 
                # Parse command line
@@ -2369,6 +2394,8 @@ if __name__ == '__main__':
                                action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
                verbosity.add_option('--get-description',
                                action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
+               verbosity.add_option('--get-filename',
+                               action='store_true', dest='getfilename', help='simulate, quiet but print output filename', default=False)
                verbosity.add_option('--no-progress',
                                action='store_true', dest='noprogress', help='do not print progress bar', default=False)
                verbosity.add_option('--console-title',
@@ -2488,12 +2515,13 @@ if __name__ == '__main__':
                        'usenetrc': opts.usenetrc,
                        'username': opts.username,
                        'password': opts.password,
-                       'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
+                       'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
                        'forceurl': opts.geturl,
                        'forcetitle': opts.gettitle,
                        'forcethumbnail': opts.getthumbnail,
                        'forcedescription': opts.getdescription,
-                       'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
+                       'forcefilename': opts.getfilename,
+                       'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
                        'format': opts.format,
                        'format_limit': opts.format_limit,
                        'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))