bdc33b57f2eb0992c22641797edf255987ee1a9b
[youtube-dl] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # License: Public domain code
6 import htmlentitydefs
7 import httplib
8 import locale
9 import math
10 import netrc
11 import os
12 import os.path
13 import re
14 import socket
15 import string
16 import subprocess
17 import sys
18 import time
19 import urllib
20 import urllib2
21
22 # parse_qs was moved from the cgi module to the urlparse module recently.
23 try:
24         from urlparse import parse_qs
25 except ImportError:
26         from cgi import parse_qs
27
28 std_headers = {
29         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
30         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
31         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
32         'Accept-Language': 'en-us,en;q=0.5',
33 }
34
35 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
36
37 def preferredencoding():
38         """Get preferred encoding.
39
40         Returns the best encoding scheme for the system, based on
41         locale.getpreferredencoding() and some further tweaks.
42         """
43         def yield_preferredencoding():
44                 try:
45                         pref = locale.getpreferredencoding()
46                         u'TEST'.encode(pref)
47                 except:
48                         pref = 'UTF-8'
49                 while True:
50                         yield pref
51         return yield_preferredencoding().next()
52
53 class DownloadError(Exception):
54         """Download Error exception.
55         
56         This exception may be thrown by FileDownloader objects if they are not
57         configured to continue on errors. They will contain the appropriate
58         error message.
59         """
60         pass
61
62 class SameFileError(Exception):
63         """Same File exception.
64
65         This exception will be thrown by FileDownloader objects if they detect
66         multiple files would have to be downloaded to the same file on disk.
67         """
68         pass
69
70 class PostProcessingError(Exception):
71         """Post Processing exception.
72
73         This exception may be raised by PostProcessor's .run() method to
74         indicate an error in the postprocessing task.
75         """
76         pass
77
78 class UnavailableFormatError(Exception):
79         """Unavailable Format exception.
80
81         This exception will be thrown when a video is requested
82         in a format that is not available for that video.
83         """
84         pass
85
86 class ContentTooShortError(Exception):
87         """Content Too Short exception.
88
89         This exception may be raised by FileDownloader objects when a file they
90         download is too small for what the server announced first, indicating
91         the connection was probably interrupted.
92         """
93         # Both in bytes
94         downloaded = None
95         expected = None
96
97         def __init__(self, downloaded, expected):
98                 self.downloaded = downloaded
99                 self.expected = expected
100
101 class FileDownloader(object):
102         """File Downloader class.
103
104         File downloader objects are the ones responsible of downloading the
105         actual video file and writing it to disk if the user has requested
106         it, among some other tasks. In most cases there should be one per
107         program. As, given a video URL, the downloader doesn't know how to
108         extract all the needed information, task that InfoExtractors do, it
109         has to pass the URL to one of them.
110
111         For this, file downloader objects have a method that allows
112         InfoExtractors to be registered in a given order. When it is passed
113         a URL, the file downloader handles it to the first InfoExtractor it
114         finds that reports being able to handle it. The InfoExtractor extracts
115         all the information about the video or videos the URL refers to, and
116         asks the FileDownloader to process the video information, possibly
117         downloading the video.
118
119         File downloaders accept a lot of parameters. In order not to saturate
120         the object constructor with arguments, it receives a dictionary of
121         options instead. These options are available through the params
122         attribute for the InfoExtractors to use. The FileDownloader also
123         registers itself as the downloader in charge for the InfoExtractors
124         that are added to it, so this is a "mutual registration".
125
126         Available options:
127
128         username:       Username for authentication purposes.
129         password:       Password for authentication purposes.
130         usenetrc:       Use netrc for authentication instead.
131         quiet:          Do not print messages to stdout.
132         forceurl:       Force printing final URL.
133         forcetitle:     Force printing title.
134         simulate:       Do not download the video files.
135         format:         Video format code.
136         outtmpl:        Template for output names.
137         ignoreerrors:   Do not stop on download errors.
138         ratelimit:      Download speed limit, in bytes/sec.
139         nooverwrites:   Prevent overwriting files.
140         continuedl:     Try to continue downloads if possible.
141         """
142
143         params = None
144         _ies = []
145         _pps = []
146         _download_retcode = None
147
148         def __init__(self, params):
149                 """Create a FileDownloader object with the given options."""
150                 self._ies = []
151                 self._pps = []
152                 self._download_retcode = 0
153                 self.params = params
154         
155         @staticmethod
156         def pmkdir(filename):
157                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
158                 components = filename.split(os.sep)
159                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
160                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
161                 for dir in aggregate:
162                         if not os.path.exists(dir):
163                                 os.mkdir(dir)
164         
165         @staticmethod
166         def format_bytes(bytes):
167                 if bytes is None:
168                         return 'N/A'
169                 if type(bytes) is str:
170                         bytes = float(bytes)
171                 if bytes == 0.0:
172                         exponent = 0
173                 else:
174                         exponent = long(math.log(bytes, 1024.0))
175                 suffix = 'bkMGTPEZY'[exponent]
176                 converted = float(bytes) / float(1024**exponent)
177                 return '%.2f%s' % (converted, suffix)
178
179         @staticmethod
180         def calc_percent(byte_counter, data_len):
181                 if data_len is None:
182                         return '---.-%'
183                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
184
185         @staticmethod
186         def calc_eta(start, now, total, current):
187                 if total is None:
188                         return '--:--'
189                 dif = now - start
190                 if current == 0 or dif < 0.001: # One millisecond
191                         return '--:--'
192                 rate = float(current) / dif
193                 eta = long((float(total) - float(current)) / rate)
194                 (eta_mins, eta_secs) = divmod(eta, 60)
195                 if eta_mins > 99:
196                         return '--:--'
197                 return '%02d:%02d' % (eta_mins, eta_secs)
198
199         @staticmethod
200         def calc_speed(start, now, bytes):
201                 dif = now - start
202                 if bytes == 0 or dif < 0.001: # One millisecond
203                         return '%10s' % '---b/s'
204                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
205
206         @staticmethod
207         def best_block_size(elapsed_time, bytes):
208                 new_min = max(bytes / 2.0, 1.0)
209                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
210                 if elapsed_time < 0.001:
211                         return long(new_max)
212                 rate = bytes / elapsed_time
213                 if rate > new_max:
214                         return long(new_max)
215                 if rate < new_min:
216                         return long(new_min)
217                 return long(rate)
218
219         @staticmethod
220         def parse_bytes(bytestr):
221                 """Parse a string indicating a byte quantity into a long integer."""
222                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
223                 if matchobj is None:
224                         return None
225                 number = float(matchobj.group(1))
226                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
227                 return long(round(number * multiplier))
228
229         @staticmethod
230         def verify_url(url):
231                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
232                 request = urllib2.Request(url, None, std_headers)
233                 data = urllib2.urlopen(request)
234                 data.read(1)
235                 url = data.geturl()
236                 data.close()
237                 return url
238
239         def add_info_extractor(self, ie):
240                 """Add an InfoExtractor object to the end of the list."""
241                 self._ies.append(ie)
242                 ie.set_downloader(self)
243         
244         def add_post_processor(self, pp):
245                 """Add a PostProcessor object to the end of the chain."""
246                 self._pps.append(pp)
247                 pp.set_downloader(self)
248         
249         def to_stdout(self, message, skip_eol=False):
250                 """Print message to stdout if not in quiet mode."""
251                 if not self.params.get('quiet', False):
252                         print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
253                         sys.stdout.flush()
254         
255         def to_stderr(self, message):
256                 """Print message to stderr."""
257                 print >>sys.stderr, message.encode(preferredencoding())
258         
259         def fixed_template(self):
260                 """Checks if the output template is fixed."""
261                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
262
263         def trouble(self, message=None):
264                 """Determine action to take when a download problem appears.
265
266                 Depending on if the downloader has been configured to ignore
267                 download errors or not, this method may throw an exception or
268                 not when errors are found, after printing the message.
269                 """
270                 if message is not None:
271                         self.to_stderr(message)
272                 if not self.params.get('ignoreerrors', False):
273                         raise DownloadError(message)
274                 self._download_retcode = 1
275
276         def slow_down(self, start_time, byte_counter):
277                 """Sleep if the download speed is over the rate limit."""
278                 rate_limit = self.params.get('ratelimit', None)
279                 if rate_limit is None or byte_counter == 0:
280                         return
281                 now = time.time()
282                 elapsed = now - start_time
283                 if elapsed <= 0.0:
284                         return
285                 speed = float(byte_counter) / elapsed
286                 if speed > rate_limit:
287                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
288
289         def report_destination(self, filename):
290                 """Report destination filename."""
291                 self.to_stdout(u'[download] Destination: %s' % filename)
292         
293         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
294                 """Report download progress."""
295                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
296                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
297
298         def report_resuming_byte(self, resume_len):
299                 """Report attemtp to resume at given byte."""
300                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
301         
302         def report_file_already_downloaded(self, file_name):
303                 """Report file has already been fully downloaded."""
304                 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
305         
306         def report_unable_to_resume(self):
307                 """Report it was impossible to resume download."""
308                 self.to_stdout(u'[download] Unable to resume')
309         
310         def report_finish(self):
311                 """Report download finished."""
312                 self.to_stdout(u'')
313
314         def process_info(self, info_dict):
315                 """Process a single dictionary returned by an InfoExtractor."""
316                 # Do nothing else if in simulate mode
317                 if self.params.get('simulate', False):
318                         try:
319                                 info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
320                         except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
321                                 raise UnavailableFormatError
322
323                         # Forced printings
324                         if self.params.get('forcetitle', False):
325                                 print info_dict['title'].encode(preferredencoding())
326                         if self.params.get('forceurl', False):
327                                 print info_dict['url'].encode(preferredencoding())
328
329                         return
330                         
331                 try:
332                         template_dict = dict(info_dict)
333                         template_dict['epoch'] = unicode(long(time.time()))
334                         filename = self.params['outtmpl'] % template_dict
335                 except (ValueError, KeyError), err:
336                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
337                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
338                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
339                         return
340
341                 try:
342                         self.pmkdir(filename)
343                 except (OSError, IOError), err:
344                         self.trouble('ERROR: unable to create directories: %s' % str(err))
345                         return
346
347                 try:
348                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
349                 except (OSError, IOError), err:
350                         raise UnavailableFormatError
351                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
352                         self.trouble('ERROR: unable to download video data: %s' % str(err))
353                         return
354                 except (ContentTooShortError, ), err:
355                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
356                         return
357
358                 if success:
359                         try:
360                                 self.post_process(filename, info_dict)
361                         except (PostProcessingError), err:
362                                 self.trouble('ERROR: postprocessing: %s' % str(err))
363                                 return
364
365         def download(self, url_list):
366                 """Download a given list of URLs."""
367                 if len(url_list) > 1 and self.fixed_template():
368                         raise SameFileError(self.params['outtmpl'])
369
370                 for url in url_list:
371                         suitable_found = False
372                         for ie in self._ies:
373                                 # Go to next InfoExtractor if not suitable
374                                 if not ie.suitable(url):
375                                         continue
376
377                                 # Suitable InfoExtractor found
378                                 suitable_found = True
379
380                                 # Extract information from URL and process it
381                                 ie.extract(url)
382
383                                 # Suitable InfoExtractor had been found; go to next URL
384                                 break
385
386                         if not suitable_found:
387                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
388
389                 return self._download_retcode
390
391         def post_process(self, filename, ie_info):
392                 """Run the postprocessing chain on the given file."""
393                 info = dict(ie_info)
394                 info['filepath'] = filename
395                 for pp in self._pps:
396                         info = pp.run(info)
397                         if info is None:
398                                 break
399         
400         def _download_with_rtmpdump(self, filename, url):
401                 self.report_destination(filename)
402
403                 # Check for rtmpdump first
404                 try:
405                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
406                 except (OSError, IOError):
407                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
408                         return False
409
410                 # Download using rtmpdump. rtmpdump returns exit code 2 when
411                 # the connection was interrumpted and resuming appears to be
412                 # possible. This is part of rtmpdump's normal usage, AFAIK.
413                 retval = subprocess.call(['rtmpdump', '-q', '-r', url, '-o', filename] + [[], ['-e']][self.params.get('continuedl', False)])
414                 while retval == 2:
415                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
416                         time.sleep(2.0) # This seems to be needed
417                         retval = subprocess.call(['rtmpdump', '-q', '-e', '-r', url, '-o', filename])
418                 if retval == 0:
419                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
420                         return True
421                 else:
422                         self.trouble('ERROR: rtmpdump exited with code %d' % retval)
423                         return False
424
425         def _do_download(self, filename, url):
426                 # Attempt to download using rtmpdump
427                 if url.startswith('rtmp'):
428                         return self._download_with_rtmpdump(filename, url)
429
430                 stream = None
431                 open_mode = 'wb'
432                 basic_request = urllib2.Request(url, None, std_headers)
433                 request = urllib2.Request(url, None, std_headers)
434
435                 # Establish possible resume length
436                 if os.path.isfile(filename):
437                         resume_len = os.path.getsize(filename)
438                 else:
439                         resume_len = 0
440
441                 # Request parameters in case of being able to resume
442                 if self.params.get('continuedl', False) and resume_len != 0:
443                         self.report_resuming_byte(resume_len)
444                         request.add_header('Range','bytes=%d-' % resume_len)
445                         open_mode = 'ab'
446
447                 # Establish connection
448                 try:
449                         data = urllib2.urlopen(request)
450                 except (urllib2.HTTPError, ), err:
451                         if err.code != 416: #  416 is 'Requested range not satisfiable'
452                                 raise
453                         # Unable to resume
454                         data = urllib2.urlopen(basic_request)
455                         content_length = data.info()['Content-Length']
456
457                         if content_length is not None and long(content_length) == resume_len:
458                                 # Because the file had already been fully downloaded
459                                 self.report_file_already_downloaded(filename)
460                                 return True
461                         else:
462                                 # Because the server didn't let us
463                                 self.report_unable_to_resume()
464                                 open_mode = 'wb'
465
466                 data_len = data.info().get('Content-length', None)
467                 data_len_str = self.format_bytes(data_len)
468                 byte_counter = 0
469                 block_size = 1024
470                 start = time.time()
471                 while True:
472                         # Download and write
473                         before = time.time()
474                         data_block = data.read(block_size)
475                         after = time.time()
476                         data_block_len = len(data_block)
477                         if data_block_len == 0:
478                                 break
479                         byte_counter += data_block_len
480
481                         # Open file just in time
482                         if stream is None:
483                                 try:
484                                         stream = open(filename, open_mode)
485                                         self.report_destination(filename)
486                                 except (OSError, IOError), err:
487                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
488                                         return False
489                         stream.write(data_block)
490                         block_size = self.best_block_size(after - before, data_block_len)
491
492                         # Progress message
493                         percent_str = self.calc_percent(byte_counter, data_len)
494                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
495                         speed_str = self.calc_speed(start, time.time(), byte_counter)
496                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
497
498                         # Apply rate limit
499                         self.slow_down(start, byte_counter)
500
501                 self.report_finish()
502                 if data_len is not None and str(byte_counter) != data_len:
503                         raise ContentTooShortError(byte_counter, long(data_len))
504                 return True
505
506 class InfoExtractor(object):
507         """Information Extractor class.
508
509         Information extractors are the classes that, given a URL, extract
510         information from the video (or videos) the URL refers to. This
511         information includes the real video URL, the video title and simplified
512         title, author and others. The information is stored in a dictionary
513         which is then passed to the FileDownloader. The FileDownloader
514         processes this information possibly downloading the video to the file
515         system, among other possible outcomes. The dictionaries must include
516         the following fields:
517
518         id:             Video identifier.
519         url:            Final video URL.
520         uploader:       Nickname of the video uploader.
521         title:          Literal title.
522         stitle:         Simplified title.
523         ext:            Video filename extension.
524
525         Subclasses of this one should re-define the _real_initialize() and
526         _real_extract() methods, as well as the suitable() static method.
527         Probably, they should also be instantiated and added to the main
528         downloader.
529         """
530
531         _ready = False
532         _downloader = None
533
534         def __init__(self, downloader=None):
535                 """Constructor. Receives an optional downloader."""
536                 self._ready = False
537                 self.set_downloader(downloader)
538
539         @staticmethod
540         def suitable(url):
541                 """Receives a URL and returns True if suitable for this IE."""
542                 return False
543
544         def initialize(self):
545                 """Initializes an instance (authentication, etc)."""
546                 if not self._ready:
547                         self._real_initialize()
548                         self._ready = True
549
550         def extract(self, url):
551                 """Extracts URL information and returns it in list of dicts."""
552                 self.initialize()
553                 return self._real_extract(url)
554
555         def set_downloader(self, downloader):
556                 """Sets the downloader for this IE."""
557                 self._downloader = downloader
558         
559         def _real_initialize(self):
560                 """Real initialization process. Redefine in subclasses."""
561                 pass
562
563         def _real_extract(self, url):
564                 """Real extraction process. Redefine in subclasses."""
565                 pass
566
567 class YoutubeIE(InfoExtractor):
568         """Information extractor for youtube.com."""
569
570         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
571         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
572         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
573         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
574         _NETRC_MACHINE = 'youtube'
575         _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
576         _video_extensions = {
577                 '13': '3gp',
578                 '17': 'mp4',
579                 '18': 'mp4',
580                 '22': 'mp4',
581                 '37': 'mp4',
582         }
583
584         @staticmethod
585         def suitable(url):
586                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
587
588         @staticmethod
589         def htmlentity_transform(matchobj):
590                 """Transforms an HTML entity to a Unicode character."""
591                 entity = matchobj.group(1)
592
593                 # Known non-numeric HTML entity
594                 if entity in htmlentitydefs.name2codepoint:
595                         return unichr(htmlentitydefs.name2codepoint[entity])
596
597                 # Unicode character
598                 mobj = re.match(ur'(?u)#(x?\d+)', entity)
599                 if mobj is not None:
600                         numstr = mobj.group(1)
601                         if numstr.startswith(u'x'):
602                                 base = 16
603                                 numstr = u'0%s' % numstr
604                         else:
605                                 base = 10
606                         return unichr(long(numstr, base))
607
608                 # Unknown entity in name, return its literal representation
609                 return (u'&%s;' % entity)
610
611         def report_lang(self):
612                 """Report attempt to set language."""
613                 self._downloader.to_stdout(u'[youtube] Setting language')
614
615         def report_login(self):
616                 """Report attempt to log in."""
617                 self._downloader.to_stdout(u'[youtube] Logging in')
618         
619         def report_age_confirmation(self):
620                 """Report attempt to confirm age."""
621                 self._downloader.to_stdout(u'[youtube] Confirming age')
622         
623         def report_video_info_webpage_download(self, video_id):
624                 """Report attempt to download video info webpage."""
625                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
626         
627         def report_information_extraction(self, video_id):
628                 """Report attempt to extract video information."""
629                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
630         
631         def report_unavailable_format(self, video_id, format):
632                 """Report extracted video URL."""
633                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
634         
635         def report_rtmp_download(self):
636                 """Indicate the download will use the RTMP protocol."""
637                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
638         
639         def _real_initialize(self):
640                 if self._downloader is None:
641                         return
642
643                 username = None
644                 password = None
645                 downloader_params = self._downloader.params
646
647                 # Attempt to use provided username and password or .netrc data
648                 if downloader_params.get('username', None) is not None:
649                         username = downloader_params['username']
650                         password = downloader_params['password']
651                 elif downloader_params.get('usenetrc', False):
652                         try:
653                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
654                                 if info is not None:
655                                         username = info[0]
656                                         password = info[2]
657                                 else:
658                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
659                         except (IOError, netrc.NetrcParseError), err:
660                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
661                                 return
662
663                 # Set language
664                 request = urllib2.Request(self._LANG_URL, None, std_headers)
665                 try:
666                         self.report_lang()
667                         urllib2.urlopen(request).read()
668                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
669                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
670                         return
671
672                 # No authentication to be performed
673                 if username is None:
674                         return
675
676                 # Log in
677                 login_form = {
678                                 'current_form': 'loginForm',
679                                 'next':         '/',
680                                 'action_login': 'Log In',
681                                 'username':     username,
682                                 'password':     password,
683                                 }
684                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
685                 try:
686                         self.report_login()
687                         login_results = urllib2.urlopen(request).read()
688                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
689                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
690                                 return
691                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
692                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
693                         return
694         
695                 # Confirm age
696                 age_form = {
697                                 'next_url':             '/',
698                                 'action_confirm':       'Confirm',
699                                 }
700                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
701                 try:
702                         self.report_age_confirmation()
703                         age_results = urllib2.urlopen(request).read()
704                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
705                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
706                         return
707
708         def _real_extract(self, url):
709                 # Extract video id from URL
710                 mobj = re.match(self._VALID_URL, url)
711                 if mobj is None:
712                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
713                         return
714                 video_id = mobj.group(2)
715
716                 # Downloader parameters
717                 best_quality = False
718                 format_param = None
719                 quality_index = 0
720                 if self._downloader is not None:
721                         params = self._downloader.params
722                         format_param = params.get('format', None)
723                         if format_param == '0':
724                                 format_param = self._available_formats[quality_index]
725                                 best_quality = True
726
727                 while True:
728                         # Extension
729                         video_extension = self._video_extensions.get(format_param, 'flv')
730
731                         # Get video info
732                         video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
733                         request = urllib2.Request(video_info_url, None, std_headers)
734                         try:
735                                 self.report_video_info_webpage_download(video_id)
736                                 video_info_webpage = urllib2.urlopen(request).read()
737                                 video_info = parse_qs(video_info_webpage)
738                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
739                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
740                                 return
741                         self.report_information_extraction(video_id)
742
743                         # "t" param
744                         if 'token' not in video_info:
745                                 # Attempt to see if YouTube has issued an error message
746                                 if 'reason' not in video_info:
747                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
748                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
749                                         stream.write(video_info_webpage)
750                                         stream.close()
751                                 else:
752                                         reason = urllib.unquote_plus(video_info['reason'][0])
753                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
754                                 return
755                         token = urllib.unquote_plus(video_info['token'][0])
756                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
757                         if format_param is not None:
758                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
759
760                         # Check possible RTMP download
761                         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
762                                 self.report_rtmp_download()
763                                 video_real_url = video_info['conn'][0]
764
765                         # uploader
766                         if 'author' not in video_info:
767                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
768                                 return
769                         video_uploader = urllib.unquote_plus(video_info['author'][0])
770
771                         # title
772                         if 'title' not in video_info:
773                                 self._downloader.trouble(u'ERROR: unable to extract video title')
774                                 return
775                         video_title = urllib.unquote_plus(video_info['title'][0])
776                         video_title = video_title.decode('utf-8')
777                         video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
778                         video_title = video_title.replace(os.sep, u'%')
779
780                         # simplified title
781                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
782                         simple_title = simple_title.strip(ur'_')
783
784                         try:
785                                 # Process video information
786                                 self._downloader.process_info({
787                                         'id':           video_id.decode('utf-8'),
788                                         'url':          video_real_url.decode('utf-8'),
789                                         'uploader':     video_uploader.decode('utf-8'),
790                                         'title':        video_title,
791                                         'stitle':       simple_title,
792                                         'ext':          video_extension.decode('utf-8'),
793                                 })
794
795                                 return
796
797                         except UnavailableFormatError, err:
798                                 if best_quality:
799                                         if quality_index == len(self._available_formats) - 1:
800                                                 # I don't ever expect this to happen
801                                                 self._downloader.trouble(u'ERROR: no known formats available for video')
802                                                 return
803                                         else:
804                                                 self.report_unavailable_format(video_id, format_param)
805                                                 quality_index += 1
806                                                 format_param = self._available_formats[quality_index]
807                                                 continue
808                                 else: 
809                                         self._downloader.trouble('ERROR: format not available for video')
810                                         return
811
812
813 class MetacafeIE(InfoExtractor):
814         """Information Extractor for metacafe.com."""
815
816         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
817         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
818         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
819         _youtube_ie = None
820
821         def __init__(self, youtube_ie, downloader=None):
822                 InfoExtractor.__init__(self, downloader)
823                 self._youtube_ie = youtube_ie
824
825         @staticmethod
826         def suitable(url):
827                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
828
829         def report_disclaimer(self):
830                 """Report disclaimer retrieval."""
831                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
832
833         def report_age_confirmation(self):
834                 """Report attempt to confirm age."""
835                 self._downloader.to_stdout(u'[metacafe] Confirming age')
836         
837         def report_download_webpage(self, video_id):
838                 """Report webpage download."""
839                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
840         
841         def report_extraction(self, video_id):
842                 """Report information extraction."""
843                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
844
845         def _real_initialize(self):
846                 # Retrieve disclaimer
847                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
848                 try:
849                         self.report_disclaimer()
850                         disclaimer = urllib2.urlopen(request).read()
851                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
852                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
853                         return
854
855                 # Confirm age
856                 disclaimer_form = {
857                         'filters': '0',
858                         'submit': "Continue - I'm over 18",
859                         }
860                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
861                 try:
862                         self.report_age_confirmation()
863                         disclaimer = urllib2.urlopen(request).read()
864                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
865                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
866                         return
867         
868         def _real_extract(self, url):
869                 # Extract id and simplified title from URL
870                 mobj = re.match(self._VALID_URL, url)
871                 if mobj is None:
872                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
873                         return
874
875                 video_id = mobj.group(1)
876
877                 # Check if video comes from YouTube
878                 mobj2 = re.match(r'^yt-(.*)$', video_id)
879                 if mobj2 is not None:
880                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
881                         return
882
883                 simple_title = mobj.group(2).decode('utf-8')
884                 video_extension = 'flv'
885
886                 # Retrieve video webpage to extract further information
887                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
888                 try:
889                         self.report_download_webpage(video_id)
890                         webpage = urllib2.urlopen(request).read()
891                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
892                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
893                         return
894
895                 # Extract URL, uploader and title from webpage
896                 self.report_extraction(video_id)
897                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
898                 if mobj is None:
899                         self._downloader.trouble(u'ERROR: unable to extract media URL')
900                         return
901                 mediaURL = urllib.unquote(mobj.group(1))
902
903                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
904                 #if mobj is None:
905                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
906                 #       return
907                 #gdaKey = mobj.group(1)
908                 #
909                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
910
911                 video_url = mediaURL
912
913                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
914                 if mobj is None:
915                         self._downloader.trouble(u'ERROR: unable to extract title')
916                         return
917                 video_title = mobj.group(1).decode('utf-8')
918
919                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
920                 if mobj is None:
921                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
922                         return
923                 video_uploader = mobj.group(1)
924
925                 try:
926                         # Process video information
927                         self._downloader.process_info({
928                                 'id':           video_id.decode('utf-8'),
929                                 'url':          video_url.decode('utf-8'),
930                                 'uploader':     video_uploader.decode('utf-8'),
931                                 'title':        video_title,
932                                 'stitle':       simple_title,
933                                 'ext':          video_extension.decode('utf-8'),
934                         })
935                 except UnavailableFormatError:
936                         self._downloader.trouble(u'ERROR: format not available for video')
937
938
939 class YoutubeSearchIE(InfoExtractor):
940         """Information Extractor for YouTube search queries."""
941         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
942         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
943         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
944         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
945         _youtube_ie = None
946         _max_youtube_results = 1000
947
948         def __init__(self, youtube_ie, downloader=None):
949                 InfoExtractor.__init__(self, downloader)
950                 self._youtube_ie = youtube_ie
951         
952         @staticmethod
953         def suitable(url):
954                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
955
956         def report_download_page(self, query, pagenum):
957                 """Report attempt to download playlist page with given number."""
958                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
959
960         def _real_initialize(self):
961                 self._youtube_ie.initialize()
962         
963         def _real_extract(self, query):
964                 mobj = re.match(self._VALID_QUERY, query)
965                 if mobj is None:
966                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
967                         return
968
969                 prefix, query = query.split(':')
970                 prefix = prefix[8:]
971                 if prefix == '':
972                         self._download_n_results(query, 1)
973                         return
974                 elif prefix == 'all':
975                         self._download_n_results(query, self._max_youtube_results)
976                         return
977                 else:
978                         try:
979                                 n = long(prefix)
980                                 if n <= 0:
981                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
982                                         return
983                                 elif n > self._max_youtube_results:
984                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
985                                         n = self._max_youtube_results
986                                 self._download_n_results(query, n)
987                                 return
988                         except ValueError: # parsing prefix as integer fails
989                                 self._download_n_results(query, 1)
990                                 return
991
992         def _download_n_results(self, query, n):
993                 """Downloads a specified number of results for a query"""
994
995                 video_ids = []
996                 already_seen = set()
997                 pagenum = 1
998
999                 while True:
1000                         self.report_download_page(query, pagenum)
1001                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1002                         request = urllib2.Request(result_url, None, std_headers)
1003                         try:
1004                                 page = urllib2.urlopen(request).read()
1005                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1006                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1007                                 return
1008
1009                         # Extract video identifiers
1010                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1011                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1012                                 if video_id not in already_seen:
1013                                         video_ids.append(video_id)
1014                                         already_seen.add(video_id)
1015                                         if len(video_ids) == n:
1016                                                 # Specified n videos reached
1017                                                 for id in video_ids:
1018                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1019                                                 return
1020
1021                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1022                                 for id in video_ids:
1023                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1024                                 return
1025
1026                         pagenum = pagenum + 1
1027
1028 class YoutubePlaylistIE(InfoExtractor):
1029         """Information Extractor for YouTube playlists."""
1030
1031         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1032         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1033         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1034         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1035         _youtube_ie = None
1036
1037         def __init__(self, youtube_ie, downloader=None):
1038                 InfoExtractor.__init__(self, downloader)
1039                 self._youtube_ie = youtube_ie
1040         
1041         @staticmethod
1042         def suitable(url):
1043                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1044
1045         def report_download_page(self, playlist_id, pagenum):
1046                 """Report attempt to download playlist page with given number."""
1047                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1048
1049         def _real_initialize(self):
1050                 self._youtube_ie.initialize()
1051         
1052         def _real_extract(self, url):
1053                 # Extract playlist id
1054                 mobj = re.match(self._VALID_URL, url)
1055                 if mobj is None:
1056                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1057                         return
1058
1059                 # Download playlist pages
1060                 playlist_id = mobj.group(1)
1061                 video_ids = []
1062                 pagenum = 1
1063
1064                 while True:
1065                         self.report_download_page(playlist_id, pagenum)
1066                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1067                         try:
1068                                 page = urllib2.urlopen(request).read()
1069                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1070                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1071                                 return
1072
1073                         # Extract video identifiers
1074                         ids_in_page = []
1075                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1076                                 if mobj.group(1) not in ids_in_page:
1077                                         ids_in_page.append(mobj.group(1))
1078                         video_ids.extend(ids_in_page)
1079
1080                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1081                                 break
1082                         pagenum = pagenum + 1
1083
1084                 for id in video_ids:
1085                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1086                 return
1087
1088 class YoutubeUserIE(InfoExtractor):
1089         """Information Extractor for YouTube users."""
1090
1091         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1092         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1093         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1094         _youtube_ie = None
1095
1096         def __init__(self, youtube_ie, downloader=None):
1097                 InfoExtractor.__init__(self, downloader)
1098                 self._youtube_ie = youtube_ie
1099         
1100         @staticmethod
1101         def suitable(url):
1102                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1103
1104         def report_download_page(self, username):
1105                 """Report attempt to download user page."""
1106                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1107
1108         def _real_initialize(self):
1109                 self._youtube_ie.initialize()
1110         
1111         def _real_extract(self, url):
1112                 # Extract username
1113                 mobj = re.match(self._VALID_URL, url)
1114                 if mobj is None:
1115                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1116                         return
1117
1118                 # Download user page
1119                 username = mobj.group(1)
1120                 video_ids = []
1121                 pagenum = 1
1122
1123                 self.report_download_page(username)
1124                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1125                 try:
1126                         page = urllib2.urlopen(request).read()
1127                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1128                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1129                         return
1130
1131                 # Extract video identifiers
1132                 ids_in_page = []
1133
1134                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1135                         if mobj.group(1) not in ids_in_page:
1136                                 ids_in_page.append(mobj.group(1))
1137                 video_ids.extend(ids_in_page)
1138
1139                 for id in video_ids:
1140                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1141                 return
1142
1143 class PostProcessor(object):
1144         """Post Processor class.
1145
1146         PostProcessor objects can be added to downloaders with their
1147         add_post_processor() method. When the downloader has finished a
1148         successful download, it will take its internal chain of PostProcessors
1149         and start calling the run() method on each one of them, first with
1150         an initial argument and then with the returned value of the previous
1151         PostProcessor.
1152
1153         The chain will be stopped if one of them ever returns None or the end
1154         of the chain is reached.
1155
1156         PostProcessor objects follow a "mutual registration" process similar
1157         to InfoExtractor objects.
1158         """
1159
1160         _downloader = None
1161
1162         def __init__(self, downloader=None):
1163                 self._downloader = downloader
1164
1165         def set_downloader(self, downloader):
1166                 """Sets the downloader for this PP."""
1167                 self._downloader = downloader
1168         
1169         def run(self, information):
1170                 """Run the PostProcessor.
1171
1172                 The "information" argument is a dictionary like the ones
1173                 composed by InfoExtractors. The only difference is that this
1174                 one has an extra field called "filepath" that points to the
1175                 downloaded file.
1176
1177                 When this method returns None, the postprocessing chain is
1178                 stopped. However, this method may return an information
1179                 dictionary that will be passed to the next postprocessing
1180                 object in the chain. It can be the one it received after
1181                 changing some fields.
1182
1183                 In addition, this method may raise a PostProcessingError
1184                 exception that will be taken into account by the downloader
1185                 it was called from.
1186                 """
1187                 return information # by default, do nothing
1188         
1189 ### MAIN PROGRAM ###
1190 if __name__ == '__main__':
1191         try:
1192                 # Modules needed only when running the main program
1193                 import getpass
1194                 import optparse
1195
1196                 # Function to update the program file with the latest version from bitbucket.org
1197                 def update_self(downloader, filename):
1198                         # Note: downloader only used for options
1199                         if not os.access (filename, os.W_OK):
1200                                 sys.exit('ERROR: no write permissions on %s' % filename)
1201
1202                         downloader.to_stdout('Updating to latest stable version...')
1203                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1204                         latest_version = urllib.urlopen(latest_url).read().strip()
1205                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1206                         newcontent = urllib.urlopen(prog_url).read()
1207                         stream = open(filename, 'w')
1208                         stream.write(newcontent)
1209                         stream.close()
1210                         downloader.to_stdout('Updated to version %s' % latest_version)
1211
1212                 # General configuration
1213                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1214                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1215                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1216
1217                 # Parse command line
1218                 parser = optparse.OptionParser(
1219                         usage='Usage: %prog [options] url...',
1220                         version='2010.01.05',
1221                         conflict_handler='resolve',
1222                 )
1223
1224                 parser.add_option('-h', '--help',
1225                                 action='help', help='print this help text and exit')
1226                 parser.add_option('-v', '--version',
1227                                 action='version', help='print program version and exit')
1228                 parser.add_option('-U', '--update',
1229                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1230                 parser.add_option('-i', '--ignore-errors',
1231                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1232                 parser.add_option('-r', '--rate-limit',
1233                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1234
1235                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1236                 authentication.add_option('-u', '--username',
1237                                 dest='username', metavar='UN', help='account username')
1238                 authentication.add_option('-p', '--password',
1239                                 dest='password', metavar='PW', help='account password')
1240                 authentication.add_option('-n', '--netrc',
1241                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1242                 parser.add_option_group(authentication)
1243
1244                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1245                 video_format.add_option('-f', '--format',
1246                                 action='store', dest='format', metavar='FMT', help='video format code')
1247                 video_format.add_option('-b', '--best-quality',
1248                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1249                 video_format.add_option('-m', '--mobile-version',
1250                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1251                 video_format.add_option('-d', '--high-def',
1252                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1253                 parser.add_option_group(video_format)
1254
1255                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1256                 verbosity.add_option('-q', '--quiet',
1257                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1258                 verbosity.add_option('-s', '--simulate',
1259                                 action='store_true', dest='simulate', help='do not download video', default=False)
1260                 verbosity.add_option('-g', '--get-url',
1261                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1262                 verbosity.add_option('-e', '--get-title',
1263                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1264                 parser.add_option_group(verbosity)
1265
1266                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1267                 filesystem.add_option('-t', '--title',
1268                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1269                 filesystem.add_option('-l', '--literal',
1270                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1271                 filesystem.add_option('-o', '--output',
1272                                 dest='outtmpl', metavar='TPL', help='output filename template')
1273                 filesystem.add_option('-a', '--batch-file',
1274                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1275                 filesystem.add_option('-w', '--no-overwrites',
1276                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1277                 filesystem.add_option('-c', '--continue',
1278                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1279                 parser.add_option_group(filesystem)
1280
1281                 (opts, args) = parser.parse_args()
1282         
1283                 # Batch file verification
1284                 batchurls = []
1285                 if opts.batchfile is not None:
1286                         try:
1287                                 batchurls = open(opts.batchfile, 'r').readlines()
1288                                 batchurls = [x.strip() for x in batchurls]
1289                                 batchurls = [x for x in batchurls if len(x) > 0]
1290                         except IOError:
1291                                 sys.exit(u'ERROR: batch file could not be read')
1292                 all_urls = batchurls + args
1293
1294                 # Conflicting, missing and erroneous options
1295                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1296                         parser.error(u'using .netrc conflicts with giving username/password')
1297                 if opts.password is not None and opts.username is None:
1298                         parser.error(u'account username missing')
1299                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1300                         parser.error(u'using output template conflicts with using title or literal title')
1301                 if opts.usetitle and opts.useliteral:
1302                         parser.error(u'using title conflicts with using literal title')
1303                 if opts.username is not None and opts.password is None:
1304                         opts.password = getpass.getpass(u'Type account password and press return:')
1305                 if opts.ratelimit is not None:
1306                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1307                         if numeric_limit is None:
1308                                 parser.error(u'invalid rate limit specified')
1309                         opts.ratelimit = numeric_limit
1310
1311                 # Information extractors
1312                 youtube_ie = YoutubeIE()
1313                 metacafe_ie = MetacafeIE(youtube_ie)
1314                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1315                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1316                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1317
1318                 # File downloader
1319                 fd = FileDownloader({
1320                         'usenetrc': opts.usenetrc,
1321                         'username': opts.username,
1322                         'password': opts.password,
1323                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1324                         'forceurl': opts.geturl,
1325                         'forcetitle': opts.gettitle,
1326                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1327                         'format': opts.format,
1328                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1329                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1330                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1331                                 or u'%(id)s.%(ext)s'),
1332                         'ignoreerrors': opts.ignoreerrors,
1333                         'ratelimit': opts.ratelimit,
1334                         'nooverwrites': opts.nooverwrites,
1335                         'continuedl': opts.continue_dl,
1336                         })
1337                 fd.add_info_extractor(youtube_search_ie)
1338                 fd.add_info_extractor(youtube_pl_ie)
1339                 fd.add_info_extractor(youtube_user_ie)
1340                 fd.add_info_extractor(metacafe_ie)
1341                 fd.add_info_extractor(youtube_ie)
1342
1343                 # Update version
1344                 if opts.update_self:
1345                         update_self(fd, sys.argv[0])
1346
1347                 # Maybe do nothing
1348                 if len(all_urls) < 1:
1349                         if not opts.update_self:
1350                                 parser.error(u'you must provide at least one URL')
1351                         else:
1352                                 sys.exit()
1353                 retcode = fd.download(all_urls)
1354                 sys.exit(retcode)
1355
1356         except DownloadError:
1357                 sys.exit(1)
1358         except SameFileError:
1359                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1360         except KeyboardInterrupt:
1361                 sys.exit(u'\nERROR: Interrupted by user')