f445c4de6e9c0186e6cae2e8002fbf0457c7cb08
[youtube-dl] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
7 import cookielib
8 import htmlentitydefs
9 import httplib
10 import locale
11 import math
12 import netrc
13 import os
14 import os.path
15 import re
16 import socket
17 import string
18 import subprocess
19 import sys
20 import time
21 import urllib
22 import urllib2
23
24 # parse_qs was moved from the cgi module to the urlparse module recently.
25 try:
26         from urlparse import parse_qs
27 except ImportError:
28         from cgi import parse_qs
29
30 std_headers = {
31         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.11) Gecko/20101019 Firefox/3.6.11',
32         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
33         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
34         'Accept-Language': 'en-us,en;q=0.5',
35 }
36
37 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38
39 def preferredencoding():
40         """Get preferred encoding.
41
42         Returns the best encoding scheme for the system, based on
43         locale.getpreferredencoding() and some further tweaks.
44         """
45         def yield_preferredencoding():
46                 try:
47                         pref = locale.getpreferredencoding()
48                         u'TEST'.encode(pref)
49                 except:
50                         pref = 'UTF-8'
51                 while True:
52                         yield pref
53         return yield_preferredencoding().next()
54
55 def htmlentity_transform(matchobj):
56         """Transforms an HTML entity to a Unicode character.
57         
58         This function receives a match object and is intended to be used with
59         the re.sub() function.
60         """
61         entity = matchobj.group(1)
62
63         # Known non-numeric HTML entity
64         if entity in htmlentitydefs.name2codepoint:
65                 return unichr(htmlentitydefs.name2codepoint[entity])
66
67         # Unicode character
68         mobj = re.match(ur'(?u)#(x?\d+)', entity)
69         if mobj is not None:
70                 numstr = mobj.group(1)
71                 if numstr.startswith(u'x'):
72                         base = 16
73                         numstr = u'0%s' % numstr
74                 else:
75                         base = 10
76                 return unichr(long(numstr, base))
77
78         # Unknown entity in name, return its literal representation
79         return (u'&%s;' % entity)
80
81 def sanitize_title(utitle):
82         """Sanitizes a video title so it could be used as part of a filename."""
83         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
84         return utitle.replace(unicode(os.sep), u'%')
85
86 def sanitize_open(filename, open_mode):
87         """Try to open the given filename, and slightly tweak it if this fails.
88
89         Attempts to open the given filename. If this fails, it tries to change
90         the filename slightly, step by step, until it's either able to open it
91         or it fails and raises a final exception, like the standard open()
92         function.
93
94         It returns the tuple (stream, definitive_file_name).
95         """
96         try:
97                 if filename == u'-':
98                         if sys.platform == 'win32':
99                                 import msvcrt
100                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
101                         return (sys.stdout, filename)
102                 stream = open(filename, open_mode)
103                 return (stream, filename)
104         except (IOError, OSError), err:
105                 # In case of error, try to remove win32 forbidden chars
106                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
107
108                 # An exception here should be caught in the caller
109                 stream = open(filename, open_mode)
110                 return (stream, filename)
111
112
113 class DownloadError(Exception):
114         """Download Error exception.
115         
116         This exception may be thrown by FileDownloader objects if they are not
117         configured to continue on errors. They will contain the appropriate
118         error message.
119         """
120         pass
121
122 class SameFileError(Exception):
123         """Same File exception.
124
125         This exception will be thrown by FileDownloader objects if they detect
126         multiple files would have to be downloaded to the same file on disk.
127         """
128         pass
129
130 class PostProcessingError(Exception):
131         """Post Processing exception.
132
133         This exception may be raised by PostProcessor's .run() method to
134         indicate an error in the postprocessing task.
135         """
136         pass
137
138 class UnavailableVideoError(Exception):
139         """Unavailable Format exception.
140
141         This exception will be thrown when a video is requested
142         in a format that is not available for that video.
143         """
144         pass
145
146 class ContentTooShortError(Exception):
147         """Content Too Short exception.
148
149         This exception may be raised by FileDownloader objects when a file they
150         download is too small for what the server announced first, indicating
151         the connection was probably interrupted.
152         """
153         # Both in bytes
154         downloaded = None
155         expected = None
156
157         def __init__(self, downloaded, expected):
158                 self.downloaded = downloaded
159                 self.expected = expected
160
161 class FileDownloader(object):
162         """File Downloader class.
163
164         File downloader objects are the ones responsible of downloading the
165         actual video file and writing it to disk if the user has requested
166         it, among some other tasks. In most cases there should be one per
167         program. As, given a video URL, the downloader doesn't know how to
168         extract all the needed information, task that InfoExtractors do, it
169         has to pass the URL to one of them.
170
171         For this, file downloader objects have a method that allows
172         InfoExtractors to be registered in a given order. When it is passed
173         a URL, the file downloader handles it to the first InfoExtractor it
174         finds that reports being able to handle it. The InfoExtractor extracts
175         all the information about the video or videos the URL refers to, and
176         asks the FileDownloader to process the video information, possibly
177         downloading the video.
178
179         File downloaders accept a lot of parameters. In order not to saturate
180         the object constructor with arguments, it receives a dictionary of
181         options instead. These options are available through the params
182         attribute for the InfoExtractors to use. The FileDownloader also
183         registers itself as the downloader in charge for the InfoExtractors
184         that are added to it, so this is a "mutual registration".
185
186         Available options:
187
188         username:         Username for authentication purposes.
189         password:         Password for authentication purposes.
190         usenetrc:         Use netrc for authentication instead.
191         quiet:            Do not print messages to stdout.
192         forceurl:         Force printing final URL.
193         forcetitle:       Force printing title.
194         forcethumbnail:   Force printing thumbnail URL.
195         forcedescription: Force printing description.
196         simulate:         Do not download the video files.
197         format:           Video format code.
198         format_limit:     Highest quality format to try.
199         outtmpl:          Template for output names.
200         ignoreerrors:     Do not stop on download errors.
201         ratelimit:        Download speed limit, in bytes/sec.
202         nooverwrites:     Prevent overwriting files.
203         retries:          Number of times to retry for HTTP error 5xx
204         continuedl:       Try to continue downloads if possible.
205         noprogress:       Do not print the progress bar.
206         playliststart:    Playlist item to start at.
207         logtostderr:      Log messages to stderr instead of stdout.
208         """
209
210         params = None
211         _ies = []
212         _pps = []
213         _download_retcode = None
214         _num_downloads = None
215         _screen_file = None
216
217         def __init__(self, params):
218                 """Create a FileDownloader object with the given options."""
219                 self._ies = []
220                 self._pps = []
221                 self._download_retcode = 0
222                 self._num_downloads = 0
223                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
224                 self.params = params
225         
226         @staticmethod
227         def pmkdir(filename):
228                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
229                 components = filename.split(os.sep)
230                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
231                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
232                 for dir in aggregate:
233                         if not os.path.exists(dir):
234                                 os.mkdir(dir)
235         
236         @staticmethod
237         def format_bytes(bytes):
238                 if bytes is None:
239                         return 'N/A'
240                 if type(bytes) is str:
241                         bytes = float(bytes)
242                 if bytes == 0.0:
243                         exponent = 0
244                 else:
245                         exponent = long(math.log(bytes, 1024.0))
246                 suffix = 'bkMGTPEZY'[exponent]
247                 converted = float(bytes) / float(1024**exponent)
248                 return '%.2f%s' % (converted, suffix)
249
250         @staticmethod
251         def calc_percent(byte_counter, data_len):
252                 if data_len is None:
253                         return '---.-%'
254                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
255
256         @staticmethod
257         def calc_eta(start, now, total, current):
258                 if total is None:
259                         return '--:--'
260                 dif = now - start
261                 if current == 0 or dif < 0.001: # One millisecond
262                         return '--:--'
263                 rate = float(current) / dif
264                 eta = long((float(total) - float(current)) / rate)
265                 (eta_mins, eta_secs) = divmod(eta, 60)
266                 if eta_mins > 99:
267                         return '--:--'
268                 return '%02d:%02d' % (eta_mins, eta_secs)
269
270         @staticmethod
271         def calc_speed(start, now, bytes):
272                 dif = now - start
273                 if bytes == 0 or dif < 0.001: # One millisecond
274                         return '%10s' % '---b/s'
275                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
276
277         @staticmethod
278         def best_block_size(elapsed_time, bytes):
279                 new_min = max(bytes / 2.0, 1.0)
280                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
281                 if elapsed_time < 0.001:
282                         return long(new_max)
283                 rate = bytes / elapsed_time
284                 if rate > new_max:
285                         return long(new_max)
286                 if rate < new_min:
287                         return long(new_min)
288                 return long(rate)
289
290         @staticmethod
291         def parse_bytes(bytestr):
292                 """Parse a string indicating a byte quantity into a long integer."""
293                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
294                 if matchobj is None:
295                         return None
296                 number = float(matchobj.group(1))
297                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
298                 return long(round(number * multiplier))
299
300         def add_info_extractor(self, ie):
301                 """Add an InfoExtractor object to the end of the list."""
302                 self._ies.append(ie)
303                 ie.set_downloader(self)
304         
305         def add_post_processor(self, pp):
306                 """Add a PostProcessor object to the end of the chain."""
307                 self._pps.append(pp)
308                 pp.set_downloader(self)
309         
310         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
311                 """Print message to stdout if not in quiet mode."""
312                 try:
313                         if not self.params.get('quiet', False):
314                                 terminator = [u'\n', u''][skip_eol]
315                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
316                         self._screen_file.flush()
317                 except (UnicodeEncodeError), err:
318                         if not ignore_encoding_errors:
319                                 raise
320         
321         def to_stderr(self, message):
322                 """Print message to stderr."""
323                 print >>sys.stderr, message.encode(preferredencoding())
324         
325         def fixed_template(self):
326                 """Checks if the output template is fixed."""
327                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
328
329         def trouble(self, message=None):
330                 """Determine action to take when a download problem appears.
331
332                 Depending on if the downloader has been configured to ignore
333                 download errors or not, this method may throw an exception or
334                 not when errors are found, after printing the message.
335                 """
336                 if message is not None:
337                         self.to_stderr(message)
338                 if not self.params.get('ignoreerrors', False):
339                         raise DownloadError(message)
340                 self._download_retcode = 1
341
342         def slow_down(self, start_time, byte_counter):
343                 """Sleep if the download speed is over the rate limit."""
344                 rate_limit = self.params.get('ratelimit', None)
345                 if rate_limit is None or byte_counter == 0:
346                         return
347                 now = time.time()
348                 elapsed = now - start_time
349                 if elapsed <= 0.0:
350                         return
351                 speed = float(byte_counter) / elapsed
352                 if speed > rate_limit:
353                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
354
355         def report_destination(self, filename):
356                 """Report destination filename."""
357                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
358         
359         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
360                 """Report download progress."""
361                 if self.params.get('noprogress', False):
362                         return
363                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
364                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
365
366         def report_resuming_byte(self, resume_len):
367                 """Report attempt to resume at given byte."""
368                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
369         
370         def report_retry(self, count, retries):
371                 """Report retry in case of HTTP error 5xx"""
372                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
373         
374         def report_file_already_downloaded(self, file_name):
375                 """Report file has already been fully downloaded."""
376                 try:
377                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
378                 except (UnicodeEncodeError), err:
379                         self.to_screen(u'[download] The file has already been downloaded')
380         
381         def report_unable_to_resume(self):
382                 """Report it was impossible to resume download."""
383                 self.to_screen(u'[download] Unable to resume')
384         
385         def report_finish(self):
386                 """Report download finished."""
387                 if self.params.get('noprogress', False):
388                         self.to_screen(u'[download] Download completed')
389                 else:
390                         self.to_screen(u'')
391         
392         def increment_downloads(self):
393                 """Increment the ordinal that assigns a number to each file."""
394                 self._num_downloads += 1
395
396         def process_info(self, info_dict):
397                 """Process a single dictionary returned by an InfoExtractor."""
398                 # Do nothing else if in simulate mode
399                 if self.params.get('simulate', False):
400                         # Forced printings
401                         if self.params.get('forcetitle', False):
402                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
403                         if self.params.get('forceurl', False):
404                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
405                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
406                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
407                         if self.params.get('forcedescription', False) and 'description' in info_dict:
408                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
409
410                         return
411                         
412                 try:
413                         template_dict = dict(info_dict)
414                         template_dict['epoch'] = unicode(long(time.time()))
415                         template_dict['ord'] = unicode('%05d' % self._num_downloads)
416                         filename = self.params['outtmpl'] % template_dict
417                 except (ValueError, KeyError), err:
418                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
419                         return
420                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
421                         self.to_stderr(u'WARNING: file exists and will be skipped')
422                         return
423
424                 try:
425                         self.pmkdir(filename)
426                 except (OSError, IOError), err:
427                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
428                         return
429
430                 try:
431                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
432                 except (OSError, IOError), err:
433                         raise UnavailableVideoError
434                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
435                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
436                         return
437                 except (ContentTooShortError, ), err:
438                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
439                         return
440
441                 if success:
442                         try:
443                                 self.post_process(filename, info_dict)
444                         except (PostProcessingError), err:
445                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
446                                 return
447
448         def download(self, url_list):
449                 """Download a given list of URLs."""
450                 if len(url_list) > 1 and self.fixed_template():
451                         raise SameFileError(self.params['outtmpl'])
452
453                 for url in url_list:
454                         suitable_found = False
455                         for ie in self._ies:
456                                 # Go to next InfoExtractor if not suitable
457                                 if not ie.suitable(url):
458                                         continue
459
460                                 # Suitable InfoExtractor found
461                                 suitable_found = True
462
463                                 # Extract information from URL and process it
464                                 ie.extract(url)
465
466                                 # Suitable InfoExtractor had been found; go to next URL
467                                 break
468
469                         if not suitable_found:
470                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
471
472                 return self._download_retcode
473
474         def post_process(self, filename, ie_info):
475                 """Run the postprocessing chain on the given file."""
476                 info = dict(ie_info)
477                 info['filepath'] = filename
478                 for pp in self._pps:
479                         info = pp.run(info)
480                         if info is None:
481                                 break
482         
483         def _download_with_rtmpdump(self, filename, url, player_url):
484                 self.report_destination(filename)
485
486                 # Check for rtmpdump first
487                 try:
488                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
489                 except (OSError, IOError):
490                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
491                         return False
492
493                 # Download using rtmpdump. rtmpdump returns exit code 2 when
494                 # the connection was interrumpted and resuming appears to be
495                 # possible. This is part of rtmpdump's normal usage, AFAIK.
496                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
497                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
498                 while retval == 2 or retval == 1:
499                         prevsize = os.path.getsize(filename)
500                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
501                         time.sleep(5.0) # This seems to be needed
502                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
503                         cursize = os.path.getsize(filename)
504                         if prevsize == cursize and retval == 1:
505                                 break
506                 if retval == 0:
507                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
508                         return True
509                 else:
510                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
511                         return False
512
513         def _do_download(self, filename, url, player_url):
514                 # Attempt to download using rtmpdump
515                 if url.startswith('rtmp'):
516                         return self._download_with_rtmpdump(filename, url, player_url)
517
518                 stream = None
519                 open_mode = 'wb'
520                 basic_request = urllib2.Request(url, None, std_headers)
521                 request = urllib2.Request(url, None, std_headers)
522
523                 # Establish possible resume length
524                 if os.path.isfile(filename):
525                         resume_len = os.path.getsize(filename)
526                 else:
527                         resume_len = 0
528
529                 # Request parameters in case of being able to resume
530                 if self.params.get('continuedl', False) and resume_len != 0:
531                         self.report_resuming_byte(resume_len)
532                         request.add_header('Range','bytes=%d-' % resume_len)
533                         open_mode = 'ab'
534
535                 count = 0
536                 retries = self.params.get('retries', 0)
537                 while count <= retries:
538                         # Establish connection
539                         try:
540                                 data = urllib2.urlopen(request)
541                                 break
542                         except (urllib2.HTTPError, ), err:
543                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
544                                         # Unexpected HTTP error
545                                         raise
546                                 elif err.code == 416:
547                                         # Unable to resume (requested range not satisfiable)
548                                         try:
549                                                 # Open the connection again without the range header
550                                                 data = urllib2.urlopen(basic_request)
551                                                 content_length = data.info()['Content-Length']
552                                         except (urllib2.HTTPError, ), err:
553                                                 if err.code < 500 or err.code >= 600:
554                                                         raise
555                                         else:
556                                                 # Examine the reported length
557                                                 if (content_length is not None and
558                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
559                                                         # The file had already been fully downloaded.
560                                                         # Explanation to the above condition: in issue #175 it was revealed that
561                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
562                                                         # changing the file size slightly and causing problems for some users. So
563                                                         # I decided to implement a suggested change and consider the file
564                                                         # completely downloaded if the file size differs less than 100 bytes from
565                                                         # the one in the hard drive.
566                                                         self.report_file_already_downloaded(filename)
567                                                         return True
568                                                 else:
569                                                         # The length does not match, we start the download over
570                                                         self.report_unable_to_resume()
571                                                         open_mode = 'wb'
572                                                         break
573                         # Retry
574                         count += 1
575                         if count <= retries:
576                                 self.report_retry(count, retries)
577
578                 if count > retries:
579                         self.trouble(u'ERROR: giving up after %s retries' % retries)
580                         return False
581
582                 data_len = data.info().get('Content-length', None)
583                 data_len_str = self.format_bytes(data_len)
584                 byte_counter = 0
585                 block_size = 1024
586                 start = time.time()
587                 while True:
588                         # Download and write
589                         before = time.time()
590                         data_block = data.read(block_size)
591                         after = time.time()
592                         data_block_len = len(data_block)
593                         if data_block_len == 0:
594                                 break
595                         byte_counter += data_block_len
596
597                         # Open file just in time
598                         if stream is None:
599                                 try:
600                                         (stream, filename) = sanitize_open(filename, open_mode)
601                                         self.report_destination(filename)
602                                 except (OSError, IOError), err:
603                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
604                                         return False
605                         try:
606                                 stream.write(data_block)
607                         except (IOError, OSError), err:
608                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
609                                 return False
610                         block_size = self.best_block_size(after - before, data_block_len)
611
612                         # Progress message
613                         percent_str = self.calc_percent(byte_counter, data_len)
614                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
615                         speed_str = self.calc_speed(start, time.time(), byte_counter)
616                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
617
618                         # Apply rate limit
619                         self.slow_down(start, byte_counter)
620
621                 self.report_finish()
622                 if data_len is not None and str(byte_counter) != data_len:
623                         raise ContentTooShortError(byte_counter, long(data_len))
624                 return True
625
626 class InfoExtractor(object):
627         """Information Extractor class.
628
629         Information extractors are the classes that, given a URL, extract
630         information from the video (or videos) the URL refers to. This
631         information includes the real video URL, the video title and simplified
632         title, author and others. The information is stored in a dictionary
633         which is then passed to the FileDownloader. The FileDownloader
634         processes this information possibly downloading the video to the file
635         system, among other possible outcomes. The dictionaries must include
636         the following fields:
637
638         id:             Video identifier.
639         url:            Final video URL.
640         uploader:       Nickname of the video uploader.
641         title:          Literal title.
642         stitle:         Simplified title.
643         ext:            Video filename extension.
644         format:         Video format.
645         player_url:     SWF Player URL (may be None).
646
647         The following fields are optional. Their primary purpose is to allow
648         youtube-dl to serve as the backend for a video search function, such
649         as the one in youtube2mp3.  They are only used when their respective
650         forced printing functions are called:
651
652         thumbnail:      Full URL to a video thumbnail image.
653         description:    One-line video description.
654
655         Subclasses of this one should re-define the _real_initialize() and
656         _real_extract() methods, as well as the suitable() static method.
657         Probably, they should also be instantiated and added to the main
658         downloader.
659         """
660
661         _ready = False
662         _downloader = None
663
664         def __init__(self, downloader=None):
665                 """Constructor. Receives an optional downloader."""
666                 self._ready = False
667                 self.set_downloader(downloader)
668
669         @staticmethod
670         def suitable(url):
671                 """Receives a URL and returns True if suitable for this IE."""
672                 return False
673
674         def initialize(self):
675                 """Initializes an instance (authentication, etc)."""
676                 if not self._ready:
677                         self._real_initialize()
678                         self._ready = True
679
680         def extract(self, url):
681                 """Extracts URL information and returns it in list of dicts."""
682                 self.initialize()
683                 return self._real_extract(url)
684
685         def set_downloader(self, downloader):
686                 """Sets the downloader for this IE."""
687                 self._downloader = downloader
688         
689         def _real_initialize(self):
690                 """Real initialization process. Redefine in subclasses."""
691                 pass
692
693         def _real_extract(self, url):
694                 """Real extraction process. Redefine in subclasses."""
695                 pass
696
697 class YoutubeIE(InfoExtractor):
698         """Information extractor for youtube.com."""
699
700         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
701         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
702         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
703         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
704         _NETRC_MACHINE = 'youtube'
705         # Listed in order of quality
706         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
707         _video_extensions = {
708                 '13': '3gp',
709                 '17': 'mp4',
710                 '18': 'mp4',
711                 '22': 'mp4',
712                 '37': 'mp4',
713                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
714                 '43': 'webm',
715                 '45': 'webm',
716         }
717
718         @staticmethod
719         def suitable(url):
720                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
721
722         def report_lang(self):
723                 """Report attempt to set language."""
724                 self._downloader.to_screen(u'[youtube] Setting language')
725
726         def report_login(self):
727                 """Report attempt to log in."""
728                 self._downloader.to_screen(u'[youtube] Logging in')
729         
730         def report_age_confirmation(self):
731                 """Report attempt to confirm age."""
732                 self._downloader.to_screen(u'[youtube] Confirming age')
733         
734         def report_video_webpage_download(self, video_id):
735                 """Report attempt to download video webpage."""
736                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
737         
738         def report_video_info_webpage_download(self, video_id):
739                 """Report attempt to download video info webpage."""
740                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
741         
742         def report_information_extraction(self, video_id):
743                 """Report attempt to extract video information."""
744                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
745         
746         def report_unavailable_format(self, video_id, format):
747                 """Report extracted video URL."""
748                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
749         
750         def report_rtmp_download(self):
751                 """Indicate the download will use the RTMP protocol."""
752                 self._downloader.to_screen(u'[youtube] RTMP download detected')
753         
754         def _real_initialize(self):
755                 if self._downloader is None:
756                         return
757
758                 username = None
759                 password = None
760                 downloader_params = self._downloader.params
761
762                 # Attempt to use provided username and password or .netrc data
763                 if downloader_params.get('username', None) is not None:
764                         username = downloader_params['username']
765                         password = downloader_params['password']
766                 elif downloader_params.get('usenetrc', False):
767                         try:
768                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
769                                 if info is not None:
770                                         username = info[0]
771                                         password = info[2]
772                                 else:
773                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
774                         except (IOError, netrc.NetrcParseError), err:
775                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
776                                 return
777
778                 # Set language
779                 request = urllib2.Request(self._LANG_URL, None, std_headers)
780                 try:
781                         self.report_lang()
782                         urllib2.urlopen(request).read()
783                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
784                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
785                         return
786
787                 # No authentication to be performed
788                 if username is None:
789                         return
790
791                 # Log in
792                 login_form = {
793                                 'current_form': 'loginForm',
794                                 'next':         '/',
795                                 'action_login': 'Log In',
796                                 'username':     username,
797                                 'password':     password,
798                                 }
799                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
800                 try:
801                         self.report_login()
802                         login_results = urllib2.urlopen(request).read()
803                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
804                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
805                                 return
806                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
807                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
808                         return
809         
810                 # Confirm age
811                 age_form = {
812                                 'next_url':             '/',
813                                 'action_confirm':       'Confirm',
814                                 }
815                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
816                 try:
817                         self.report_age_confirmation()
818                         age_results = urllib2.urlopen(request).read()
819                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
820                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
821                         return
822
823         def _real_extract(self, url):
824                 # Extract video id from URL
825                 mobj = re.match(self._VALID_URL, url)
826                 if mobj is None:
827                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
828                         return
829                 video_id = mobj.group(2)
830
831                 # Get video webpage
832                 self.report_video_webpage_download(video_id)
833                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers)
834                 try:
835                         video_webpage = urllib2.urlopen(request).read()
836                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
837                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
838                         return
839
840                 # Attempt to extract SWF player URL
841                 mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage)
842                 if mobj is not None:
843                         player_url = mobj.group(1)
844                 else:
845                         player_url = None
846
847                 # Get video info
848                 self.report_video_info_webpage_download(video_id)
849                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
850                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
851                                            % (video_id, el_type))
852                         request = urllib2.Request(video_info_url, None, std_headers)
853                         try:
854                                 video_info_webpage = urllib2.urlopen(request).read()
855                                 video_info = parse_qs(video_info_webpage)
856                                 if 'token' in video_info:
857                                         break
858                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
859                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
860                                 return
861                 if 'token' not in video_info:
862                         if 'reason' in video_info:
863                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
864                         else:
865                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
866                         return
867
868                 # Start extracting information
869                 self.report_information_extraction(video_id)
870
871                 # uploader
872                 if 'author' not in video_info:
873                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
874                         return
875                 video_uploader = urllib.unquote_plus(video_info['author'][0])
876
877                 # title
878                 if 'title' not in video_info:
879                         self._downloader.trouble(u'ERROR: unable to extract video title')
880                         return
881                 video_title = urllib.unquote_plus(video_info['title'][0])
882                 video_title = video_title.decode('utf-8')
883                 video_title = sanitize_title(video_title)
884
885                 # simplified title
886                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
887                 simple_title = simple_title.strip(ur'_')
888
889                 # thumbnail image
890                 if 'thumbnail_url' not in video_info:
891                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
892                         video_thumbnail = ''
893                 else:   # don't panic if we can't find it
894                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
895
896                 # description
897                 video_description = 'No description available.'
898                 if self._downloader.params.get('forcedescription', False):
899                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
900                         if mobj is not None:
901                                 video_description = mobj.group(1)
902
903                 # token
904                 video_token = urllib.unquote_plus(video_info['token'][0])
905
906                 # Decide which formats to download
907                 requested_format = self._downloader.params.get('format', None)
908                 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
909
910                 if 'fmt_url_map' in video_info:
911                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
912                         format_limit = self._downloader.params.get('format_limit', None)
913                         if format_limit is not None and format_limit in self._available_formats:
914                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
915                         else:
916                                 format_list = self._available_formats
917                         existing_formats = [x for x in format_list if x in url_map]
918                         if len(existing_formats) == 0:
919                                 self._downloader.trouble(u'ERROR: no known formats available for video')
920                                 return
921                         if requested_format is None:
922                                 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
923                         elif requested_format == '-1':
924                                 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
925                         else:
926                                 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
927
928                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
929                         self.report_rtmp_download()
930                         video_url_list = [(None, video_info['conn'][0])]
931
932                 else:
933                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
934                         return
935
936                 for format_param, video_real_url in video_url_list:
937                         # At this point we have a new video
938                         self._downloader.increment_downloads()
939
940                         # Extension
941                         video_extension = self._video_extensions.get(format_param, 'flv')
942
943                         # Find the video URL in fmt_url_map or conn paramters
944                         try:
945                                 # Process video information
946                                 self._downloader.process_info({
947                                         'id':           video_id.decode('utf-8'),
948                                         'url':          video_real_url.decode('utf-8'),
949                                         'uploader':     video_uploader.decode('utf-8'),
950                                         'title':        video_title,
951                                         'stitle':       simple_title,
952                                         'ext':          video_extension.decode('utf-8'),
953                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
954                                         'thumbnail':    video_thumbnail.decode('utf-8'),
955                                         'description':  video_description.decode('utf-8'),
956                                         'player_url':   player_url,
957                                 })
958                         except UnavailableVideoError, err:
959                                 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
960
961
962 class MetacafeIE(InfoExtractor):
963         """Information Extractor for metacafe.com."""
964
965         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
966         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
967         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
968         _youtube_ie = None
969
970         def __init__(self, youtube_ie, downloader=None):
971                 InfoExtractor.__init__(self, downloader)
972                 self._youtube_ie = youtube_ie
973
974         @staticmethod
975         def suitable(url):
976                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
977
978         def report_disclaimer(self):
979                 """Report disclaimer retrieval."""
980                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
981
982         def report_age_confirmation(self):
983                 """Report attempt to confirm age."""
984                 self._downloader.to_screen(u'[metacafe] Confirming age')
985         
986         def report_download_webpage(self, video_id):
987                 """Report webpage download."""
988                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
989         
990         def report_extraction(self, video_id):
991                 """Report information extraction."""
992                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
993
994         def _real_initialize(self):
995                 # Retrieve disclaimer
996                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
997                 try:
998                         self.report_disclaimer()
999                         disclaimer = urllib2.urlopen(request).read()
1000                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1001                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1002                         return
1003
1004                 # Confirm age
1005                 disclaimer_form = {
1006                         'filters': '0',
1007                         'submit': "Continue - I'm over 18",
1008                         }
1009                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1010                 try:
1011                         self.report_age_confirmation()
1012                         disclaimer = urllib2.urlopen(request).read()
1013                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1014                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1015                         return
1016         
1017         def _real_extract(self, url):
1018                 # Extract id and simplified title from URL
1019                 mobj = re.match(self._VALID_URL, url)
1020                 if mobj is None:
1021                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1022                         return
1023
1024                 video_id = mobj.group(1)
1025
1026                 # Check if video comes from YouTube
1027                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1028                 if mobj2 is not None:
1029                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1030                         return
1031
1032                 # At this point we have a new video
1033                 self._downloader.increment_downloads()
1034
1035                 simple_title = mobj.group(2).decode('utf-8')
1036
1037                 # Retrieve video webpage to extract further information
1038                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1039                 try:
1040                         self.report_download_webpage(video_id)
1041                         webpage = urllib2.urlopen(request).read()
1042                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1043                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1044                         return
1045
1046                 # Extract URL, uploader and title from webpage
1047                 self.report_extraction(video_id)
1048                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1049                 if mobj is not None:
1050                         mediaURL = urllib.unquote(mobj.group(1))
1051                         video_extension = mediaURL[-3:]
1052                         
1053                         # Extract gdaKey if available
1054                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1055                         if mobj is None:
1056                                 video_url = mediaURL
1057                         else:
1058                                 gdaKey = mobj.group(1)
1059                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1060                 else:
1061                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1062                         if mobj is None:
1063                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1064                                 return
1065                         vardict = parse_qs(mobj.group(1))
1066                         if 'mediaData' not in vardict:
1067                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1068                                 return
1069                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1070                         if mobj is None:
1071                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1072                                 return
1073                         mediaURL = mobj.group(1).replace('\\/', '/')
1074                         video_extension = mediaURL[-3:]
1075                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1076
1077                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1078                 if mobj is None:
1079                         self._downloader.trouble(u'ERROR: unable to extract title')
1080                         return
1081                 video_title = mobj.group(1).decode('utf-8')
1082                 video_title = sanitize_title(video_title)
1083
1084                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1085                 if mobj is None:
1086                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1087                         return
1088                 video_uploader = mobj.group(1)
1089
1090                 try:
1091                         # Process video information
1092                         self._downloader.process_info({
1093                                 'id':           video_id.decode('utf-8'),
1094                                 'url':          video_url.decode('utf-8'),
1095                                 'uploader':     video_uploader.decode('utf-8'),
1096                                 'title':        video_title,
1097                                 'stitle':       simple_title,
1098                                 'ext':          video_extension.decode('utf-8'),
1099                                 'format':       u'NA',
1100                                 'player_url':   None,
1101                         })
1102                 except UnavailableVideoError:
1103                         self._downloader.trouble(u'ERROR: unable to download video')
1104
1105
1106 class DailymotionIE(InfoExtractor):
1107         """Information Extractor for Dailymotion"""
1108
1109         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1110
1111         def __init__(self, downloader=None):
1112                 InfoExtractor.__init__(self, downloader)
1113
1114         @staticmethod
1115         def suitable(url):
1116                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1117
1118         def report_download_webpage(self, video_id):
1119                 """Report webpage download."""
1120                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1121         
1122         def report_extraction(self, video_id):
1123                 """Report information extraction."""
1124                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1125
1126         def _real_initialize(self):
1127                 return
1128
1129         def _real_extract(self, url):
1130                 # Extract id and simplified title from URL
1131                 mobj = re.match(self._VALID_URL, url)
1132                 if mobj is None:
1133                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1134                         return
1135
1136                 # At this point we have a new video
1137                 self._downloader.increment_downloads()
1138                 video_id = mobj.group(1)
1139
1140                 simple_title = mobj.group(2).decode('utf-8')
1141                 video_extension = 'flv'
1142
1143                 # Retrieve video webpage to extract further information
1144                 request = urllib2.Request(url)
1145                 try:
1146                         self.report_download_webpage(video_id)
1147                         webpage = urllib2.urlopen(request).read()
1148                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1149                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1150                         return
1151
1152                 # Extract URL, uploader and title from webpage
1153                 self.report_extraction(video_id)
1154                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1155                 if mobj is None:
1156                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1157                         return
1158                 mediaURL = urllib.unquote(mobj.group(1))
1159
1160                 # if needed add http://www.dailymotion.com/ if relative URL
1161
1162                 video_url = mediaURL
1163
1164                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1165                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1166                 if mobj is None:
1167                         self._downloader.trouble(u'ERROR: unable to extract title')
1168                         return
1169                 video_title = mobj.group(1).decode('utf-8')
1170                 video_title = sanitize_title(video_title)
1171
1172                 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1173                 if mobj is None:
1174                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1175                         return
1176                 video_uploader = mobj.group(1)
1177
1178                 try:
1179                         # Process video information
1180                         self._downloader.process_info({
1181                                 'id':           video_id.decode('utf-8'),
1182                                 'url':          video_url.decode('utf-8'),
1183                                 'uploader':     video_uploader.decode('utf-8'),
1184                                 'title':        video_title,
1185                                 'stitle':       simple_title,
1186                                 'ext':          video_extension.decode('utf-8'),
1187                                 'format':       u'NA',
1188                                 'player_url':   None,
1189                         })
1190                 except UnavailableVideoError:
1191                         self._downloader.trouble(u'ERROR: unable to download video')
1192
1193 class GoogleIE(InfoExtractor):
1194         """Information extractor for video.google.com."""
1195
1196         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1197
1198         def __init__(self, downloader=None):
1199                 InfoExtractor.__init__(self, downloader)
1200
1201         @staticmethod
1202         def suitable(url):
1203                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1204
1205         def report_download_webpage(self, video_id):
1206                 """Report webpage download."""
1207                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1208
1209         def report_extraction(self, video_id):
1210                 """Report information extraction."""
1211                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1212
1213         def _real_initialize(self):
1214                 return
1215
1216         def _real_extract(self, url):
1217                 # Extract id from URL
1218                 mobj = re.match(self._VALID_URL, url)
1219                 if mobj is None:
1220                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1221                         return
1222
1223                 # At this point we have a new video
1224                 self._downloader.increment_downloads()
1225                 video_id = mobj.group(1)
1226
1227                 video_extension = 'mp4'
1228
1229                 # Retrieve video webpage to extract further information
1230                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1231                 try:
1232                         self.report_download_webpage(video_id)
1233                         webpage = urllib2.urlopen(request).read()
1234                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1235                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1236                         return
1237
1238                 # Extract URL, uploader, and title from webpage
1239                 self.report_extraction(video_id)
1240                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1241                 if mobj is None:
1242                         video_extension = 'flv'
1243                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1244                 if mobj is None:
1245                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1246                         return
1247                 mediaURL = urllib.unquote(mobj.group(1))
1248                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1249                 mediaURL = mediaURL.replace('\\x26', '\x26')
1250
1251                 video_url = mediaURL
1252
1253                 mobj = re.search(r'<title>(.*)</title>', webpage)
1254                 if mobj is None:
1255                         self._downloader.trouble(u'ERROR: unable to extract title')
1256                         return
1257                 video_title = mobj.group(1).decode('utf-8')
1258                 video_title = sanitize_title(video_title)
1259                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1260
1261                 # Extract video description
1262                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1263                 if mobj is None:
1264                         self._downloader.trouble(u'ERROR: unable to extract video description')
1265                         return
1266                 video_description = mobj.group(1).decode('utf-8')
1267                 if not video_description:
1268                         video_description = 'No description available.'
1269
1270                 # Extract video thumbnail
1271                 if self._downloader.params.get('forcethumbnail', False):
1272                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1273                         try:
1274                                 webpage = urllib2.urlopen(request).read()
1275                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1276                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1277                                 return
1278                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1279                         if mobj is None:
1280                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1281                                 return
1282                         video_thumbnail = mobj.group(1)
1283                 else:   # we need something to pass to process_info
1284                         video_thumbnail = ''
1285
1286
1287                 try:
1288                         # Process video information
1289                         self._downloader.process_info({
1290                                 'id':           video_id.decode('utf-8'),
1291                                 'url':          video_url.decode('utf-8'),
1292                                 'uploader':     u'NA',
1293                                 'title':        video_title,
1294                                 'stitle':       simple_title,
1295                                 'ext':          video_extension.decode('utf-8'),
1296                                 'format':       u'NA',
1297                                 'player_url':   None,
1298                         })
1299                 except UnavailableVideoError:
1300                         self._downloader.trouble(u'ERROR: unable to download video')
1301
1302
1303 class PhotobucketIE(InfoExtractor):
1304         """Information extractor for photobucket.com."""
1305
1306         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1307
1308         def __init__(self, downloader=None):
1309                 InfoExtractor.__init__(self, downloader)
1310
1311         @staticmethod
1312         def suitable(url):
1313                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1314
1315         def report_download_webpage(self, video_id):
1316                 """Report webpage download."""
1317                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1318
1319         def report_extraction(self, video_id):
1320                 """Report information extraction."""
1321                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1322
1323         def _real_initialize(self):
1324                 return
1325
1326         def _real_extract(self, url):
1327                 # Extract id from URL
1328                 mobj = re.match(self._VALID_URL, url)
1329                 if mobj is None:
1330                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1331                         return
1332
1333                 # At this point we have a new video
1334                 self._downloader.increment_downloads()
1335                 video_id = mobj.group(1)
1336
1337                 video_extension = 'flv'
1338
1339                 # Retrieve video webpage to extract further information
1340                 request = urllib2.Request(url)
1341                 try:
1342                         self.report_download_webpage(video_id)
1343                         webpage = urllib2.urlopen(request).read()
1344                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1345                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1346                         return
1347
1348                 # Extract URL, uploader, and title from webpage
1349                 self.report_extraction(video_id)
1350                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1351                 if mobj is None:
1352                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1353                         return
1354                 mediaURL = urllib.unquote(mobj.group(1))
1355
1356                 video_url = mediaURL
1357
1358                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1359                 if mobj is None:
1360                         self._downloader.trouble(u'ERROR: unable to extract title')
1361                         return
1362                 video_title = mobj.group(1).decode('utf-8')
1363                 video_title = sanitize_title(video_title)
1364                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1365
1366                 video_uploader = mobj.group(2).decode('utf-8')
1367
1368                 try:
1369                         # Process video information
1370                         self._downloader.process_info({
1371                                 'id':           video_id.decode('utf-8'),
1372                                 'url':          video_url.decode('utf-8'),
1373                                 'uploader':     video_uploader,
1374                                 'title':        video_title,
1375                                 'stitle':       simple_title,
1376                                 'ext':          video_extension.decode('utf-8'),
1377                                 'format':       u'NA',
1378                                 'player_url':   None,
1379                         })
1380                 except UnavailableVideoError:
1381                         self._downloader.trouble(u'ERROR: unable to download video')
1382
1383
1384 class YahooIE(InfoExtractor):
1385         """Information extractor for video.yahoo.com."""
1386
1387         # _VALID_URL matches all Yahoo! Video URLs
1388         # _VPAGE_URL matches only the extractable '/watch/' URLs
1389         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1390         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1391
1392         def __init__(self, downloader=None):
1393                 InfoExtractor.__init__(self, downloader)
1394
1395         @staticmethod
1396         def suitable(url):
1397                 return (re.match(YahooIE._VALID_URL, url) is not None)
1398
1399         def report_download_webpage(self, video_id):
1400                 """Report webpage download."""
1401                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1402
1403         def report_extraction(self, video_id):
1404                 """Report information extraction."""
1405                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1406
1407         def _real_initialize(self):
1408                 return
1409
1410         def _real_extract(self, url, new_video=True):
1411                 # Extract ID from URL
1412                 mobj = re.match(self._VALID_URL, url)
1413                 if mobj is None:
1414                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1415                         return
1416
1417                 # At this point we have a new video
1418                 self._downloader.increment_downloads()
1419                 video_id = mobj.group(2)
1420                 video_extension = 'flv'
1421
1422                 # Rewrite valid but non-extractable URLs as
1423                 # extractable English language /watch/ URLs
1424                 if re.match(self._VPAGE_URL, url) is None:
1425                         request = urllib2.Request(url)
1426                         try:
1427                                 webpage = urllib2.urlopen(request).read()
1428                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1429                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1430                                 return
1431
1432                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1433                         if mobj is None:
1434                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1435                                 return
1436                         yahoo_id = mobj.group(1)
1437
1438                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1439                         if mobj is None:
1440                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1441                                 return
1442                         yahoo_vid = mobj.group(1)
1443
1444                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1445                         return self._real_extract(url, new_video=False)
1446
1447                 # Retrieve video webpage to extract further information
1448                 request = urllib2.Request(url)
1449                 try:
1450                         self.report_download_webpage(video_id)
1451                         webpage = urllib2.urlopen(request).read()
1452                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1453                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1454                         return
1455
1456                 # Extract uploader and title from webpage
1457                 self.report_extraction(video_id)
1458                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1459                 if mobj is None:
1460                         self._downloader.trouble(u'ERROR: unable to extract video title')
1461                         return
1462                 video_title = mobj.group(1).decode('utf-8')
1463                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1464
1465                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1466                 if mobj is None:
1467                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1468                         return
1469                 video_uploader = mobj.group(1).decode('utf-8')
1470
1471                 # Extract video thumbnail
1472                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1473                 if mobj is None:
1474                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1475                         return
1476                 video_thumbnail = mobj.group(1).decode('utf-8')
1477
1478                 # Extract video description
1479                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1480                 if mobj is None:
1481                         self._downloader.trouble(u'ERROR: unable to extract video description')
1482                         return
1483                 video_description = mobj.group(1).decode('utf-8')
1484                 if not video_description: video_description = 'No description available.'
1485
1486                 # Extract video height and width
1487                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1488                 if mobj is None:
1489                         self._downloader.trouble(u'ERROR: unable to extract video height')
1490                         return
1491                 yv_video_height = mobj.group(1)
1492
1493                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1494                 if mobj is None:
1495                         self._downloader.trouble(u'ERROR: unable to extract video width')
1496                         return
1497                 yv_video_width = mobj.group(1)
1498
1499                 # Retrieve video playlist to extract media URL
1500                 # I'm not completely sure what all these options are, but we
1501                 # seem to need most of them, otherwise the server sends a 401.
1502                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1503                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1504                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1505                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1506                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1507                 try:
1508                         self.report_download_webpage(video_id)
1509                         webpage = urllib2.urlopen(request).read()
1510                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1511                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1512                         return
1513
1514                 # Extract media URL from playlist XML
1515                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1516                 if mobj is None:
1517                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1518                         return
1519                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1520                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1521
1522                 try:
1523                         # Process video information
1524                         self._downloader.process_info({
1525                                 'id':           video_id.decode('utf-8'),
1526                                 'url':          video_url,
1527                                 'uploader':     video_uploader,
1528                                 'title':        video_title,
1529                                 'stitle':       simple_title,
1530                                 'ext':          video_extension.decode('utf-8'),
1531                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1532                                 'description':  video_description,
1533                                 'thumbnail':    video_thumbnail,
1534                                 'description':  video_description,
1535                                 'player_url':   None,
1536                         })
1537                 except UnavailableVideoError:
1538                         self._downloader.trouble(u'ERROR: unable to download video')
1539
1540
1541 class GenericIE(InfoExtractor):
1542         """Generic last-resort information extractor."""
1543
1544         def __init__(self, downloader=None):
1545                 InfoExtractor.__init__(self, downloader)
1546
1547         @staticmethod
1548         def suitable(url):
1549                 return True
1550
1551         def report_download_webpage(self, video_id):
1552                 """Report webpage download."""
1553                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1554                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1555
1556         def report_extraction(self, video_id):
1557                 """Report information extraction."""
1558                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1559
1560         def _real_initialize(self):
1561                 return
1562
1563         def _real_extract(self, url):
1564                 # At this point we have a new video
1565                 self._downloader.increment_downloads()
1566
1567                 video_id = url.split('/')[-1]
1568                 request = urllib2.Request(url)
1569                 try:
1570                         self.report_download_webpage(video_id)
1571                         webpage = urllib2.urlopen(request).read()
1572                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1573                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1574                         return
1575                 except ValueError, err:
1576                         # since this is the last-resort InfoExtractor, if
1577                         # this error is thrown, it'll be thrown here
1578                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1579                         return
1580
1581                 # Start with something easy: JW Player in SWFObject
1582                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1583                 if mobj is None:
1584                         # Broaden the search a little bit
1585                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1586                 if mobj is None:
1587                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1588                         return
1589
1590                 # It's possible that one of the regexes
1591                 # matched, but returned an empty group:
1592                 if mobj.group(1) is None:
1593                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1594                         return
1595
1596                 video_url = urllib.unquote(mobj.group(1))
1597                 video_id  = os.path.basename(video_url)
1598
1599                 # here's a fun little line of code for you:
1600                 video_extension = os.path.splitext(video_id)[1][1:]
1601                 video_id        = os.path.splitext(video_id)[0]
1602
1603                 # it's tempting to parse this further, but you would
1604                 # have to take into account all the variations like
1605                 #   Video Title - Site Name
1606                 #   Site Name | Video Title
1607                 #   Video Title - Tagline | Site Name
1608                 # and so on and so forth; it's just not practical
1609                 mobj = re.search(r'<title>(.*)</title>', webpage)
1610                 if mobj is None:
1611                         self._downloader.trouble(u'ERROR: unable to extract title')
1612                         return
1613                 video_title = mobj.group(1).decode('utf-8')
1614                 video_title = sanitize_title(video_title)
1615                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1616
1617                 # video uploader is domain name
1618                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1619                 if mobj is None:
1620                         self._downloader.trouble(u'ERROR: unable to extract title')
1621                         return
1622                 video_uploader = mobj.group(1).decode('utf-8')
1623
1624                 try:
1625                         # Process video information
1626                         self._downloader.process_info({
1627                                 'id':           video_id.decode('utf-8'),
1628                                 'url':          video_url.decode('utf-8'),
1629                                 'uploader':     video_uploader,
1630                                 'title':        video_title,
1631                                 'stitle':       simple_title,
1632                                 'ext':          video_extension.decode('utf-8'),
1633                                 'format':       u'NA',
1634                                 'player_url':   None,
1635                         })
1636                 except UnavailableVideoError, err:
1637                         self._downloader.trouble(u'ERROR: unable to download video')
1638
1639
1640 class YoutubeSearchIE(InfoExtractor):
1641         """Information Extractor for YouTube search queries."""
1642         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1643         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1644         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1645         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1646         _youtube_ie = None
1647         _max_youtube_results = 1000
1648
1649         def __init__(self, youtube_ie, downloader=None):
1650                 InfoExtractor.__init__(self, downloader)
1651                 self._youtube_ie = youtube_ie
1652         
1653         @staticmethod
1654         def suitable(url):
1655                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1656
1657         def report_download_page(self, query, pagenum):
1658                 """Report attempt to download playlist page with given number."""
1659                 query = query.decode(preferredencoding())
1660                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1661
1662         def _real_initialize(self):
1663                 self._youtube_ie.initialize()
1664         
1665         def _real_extract(self, query):
1666                 mobj = re.match(self._VALID_QUERY, query)
1667                 if mobj is None:
1668                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1669                         return
1670
1671                 prefix, query = query.split(':')
1672                 prefix = prefix[8:]
1673                 query  = query.encode('utf-8')
1674                 if prefix == '':
1675                         self._download_n_results(query, 1)
1676                         return
1677                 elif prefix == 'all':
1678                         self._download_n_results(query, self._max_youtube_results)
1679                         return
1680                 else:
1681                         try:
1682                                 n = long(prefix)
1683                                 if n <= 0:
1684                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1685                                         return
1686                                 elif n > self._max_youtube_results:
1687                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1688                                         n = self._max_youtube_results
1689                                 self._download_n_results(query, n)
1690                                 return
1691                         except ValueError: # parsing prefix as integer fails
1692                                 self._download_n_results(query, 1)
1693                                 return
1694
1695         def _download_n_results(self, query, n):
1696                 """Downloads a specified number of results for a query"""
1697
1698                 video_ids = []
1699                 already_seen = set()
1700                 pagenum = 1
1701
1702                 while True:
1703                         self.report_download_page(query, pagenum)
1704                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1705                         request = urllib2.Request(result_url, None, std_headers)
1706                         try:
1707                                 page = urllib2.urlopen(request).read()
1708                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1709                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1710                                 return
1711
1712                         # Extract video identifiers
1713                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1714                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1715                                 if video_id not in already_seen:
1716                                         video_ids.append(video_id)
1717                                         already_seen.add(video_id)
1718                                         if len(video_ids) == n:
1719                                                 # Specified n videos reached
1720                                                 for id in video_ids:
1721                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1722                                                 return
1723
1724                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1725                                 for id in video_ids:
1726                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1727                                 return
1728
1729                         pagenum = pagenum + 1
1730
1731 class GoogleSearchIE(InfoExtractor):
1732         """Information Extractor for Google Video search queries."""
1733         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1734         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1735         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1736         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1737         _google_ie = None
1738         _max_google_results = 1000
1739
1740         def __init__(self, google_ie, downloader=None):
1741                 InfoExtractor.__init__(self, downloader)
1742                 self._google_ie = google_ie
1743         
1744         @staticmethod
1745         def suitable(url):
1746                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1747
1748         def report_download_page(self, query, pagenum):
1749                 """Report attempt to download playlist page with given number."""
1750                 query = query.decode(preferredencoding())
1751                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1752
1753         def _real_initialize(self):
1754                 self._google_ie.initialize()
1755         
1756         def _real_extract(self, query):
1757                 mobj = re.match(self._VALID_QUERY, query)
1758                 if mobj is None:
1759                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1760                         return
1761
1762                 prefix, query = query.split(':')
1763                 prefix = prefix[8:]
1764                 query  = query.encode('utf-8')
1765                 if prefix == '':
1766                         self._download_n_results(query, 1)
1767                         return
1768                 elif prefix == 'all':
1769                         self._download_n_results(query, self._max_google_results)
1770                         return
1771                 else:
1772                         try:
1773                                 n = long(prefix)
1774                                 if n <= 0:
1775                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1776                                         return
1777                                 elif n > self._max_google_results:
1778                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1779                                         n = self._max_google_results
1780                                 self._download_n_results(query, n)
1781                                 return
1782                         except ValueError: # parsing prefix as integer fails
1783                                 self._download_n_results(query, 1)
1784                                 return
1785
1786         def _download_n_results(self, query, n):
1787                 """Downloads a specified number of results for a query"""
1788
1789                 video_ids = []
1790                 already_seen = set()
1791                 pagenum = 1
1792
1793                 while True:
1794                         self.report_download_page(query, pagenum)
1795                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1796                         request = urllib2.Request(result_url, None, std_headers)
1797                         try:
1798                                 page = urllib2.urlopen(request).read()
1799                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1800                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1801                                 return
1802
1803                         # Extract video identifiers
1804                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1805                                 video_id = mobj.group(1)
1806                                 if video_id not in already_seen:
1807                                         video_ids.append(video_id)
1808                                         already_seen.add(video_id)
1809                                         if len(video_ids) == n:
1810                                                 # Specified n videos reached
1811                                                 for id in video_ids:
1812                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1813                                                 return
1814
1815                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1816                                 for id in video_ids:
1817                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1818                                 return
1819
1820                         pagenum = pagenum + 1
1821
1822 class YahooSearchIE(InfoExtractor):
1823         """Information Extractor for Yahoo! Video search queries."""
1824         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1825         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1826         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1827         _MORE_PAGES_INDICATOR = r'\s*Next'
1828         _yahoo_ie = None
1829         _max_yahoo_results = 1000
1830
1831         def __init__(self, yahoo_ie, downloader=None):
1832                 InfoExtractor.__init__(self, downloader)
1833                 self._yahoo_ie = yahoo_ie
1834         
1835         @staticmethod
1836         def suitable(url):
1837                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1838
1839         def report_download_page(self, query, pagenum):
1840                 """Report attempt to download playlist page with given number."""
1841                 query = query.decode(preferredencoding())
1842                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1843
1844         def _real_initialize(self):
1845                 self._yahoo_ie.initialize()
1846         
1847         def _real_extract(self, query):
1848                 mobj = re.match(self._VALID_QUERY, query)
1849                 if mobj is None:
1850                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1851                         return
1852
1853                 prefix, query = query.split(':')
1854                 prefix = prefix[8:]
1855                 query  = query.encode('utf-8')
1856                 if prefix == '':
1857                         self._download_n_results(query, 1)
1858                         return
1859                 elif prefix == 'all':
1860                         self._download_n_results(query, self._max_yahoo_results)
1861                         return
1862                 else:
1863                         try:
1864                                 n = long(prefix)
1865                                 if n <= 0:
1866                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1867                                         return
1868                                 elif n > self._max_yahoo_results:
1869                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
1870                                         n = self._max_yahoo_results
1871                                 self._download_n_results(query, n)
1872                                 return
1873                         except ValueError: # parsing prefix as integer fails
1874                                 self._download_n_results(query, 1)
1875                                 return
1876
1877         def _download_n_results(self, query, n):
1878                 """Downloads a specified number of results for a query"""
1879
1880                 video_ids = []
1881                 already_seen = set()
1882                 pagenum = 1
1883
1884                 while True:
1885                         self.report_download_page(query, pagenum)
1886                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1887                         request = urllib2.Request(result_url, None, std_headers)
1888                         try:
1889                                 page = urllib2.urlopen(request).read()
1890                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1891                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1892                                 return
1893
1894                         # Extract video identifiers
1895                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1896                                 video_id = mobj.group(1)
1897                                 if video_id not in already_seen:
1898                                         video_ids.append(video_id)
1899                                         already_seen.add(video_id)
1900                                         if len(video_ids) == n:
1901                                                 # Specified n videos reached
1902                                                 for id in video_ids:
1903                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1904                                                 return
1905
1906                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1907                                 for id in video_ids:
1908                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1909                                 return
1910
1911                         pagenum = pagenum + 1
1912
1913 class YoutubePlaylistIE(InfoExtractor):
1914         """Information Extractor for YouTube playlists."""
1915
1916         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1917         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1918         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1919         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1920         _youtube_ie = None
1921
1922         def __init__(self, youtube_ie, downloader=None):
1923                 InfoExtractor.__init__(self, downloader)
1924                 self._youtube_ie = youtube_ie
1925         
1926         @staticmethod
1927         def suitable(url):
1928                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1929
1930         def report_download_page(self, playlist_id, pagenum):
1931                 """Report attempt to download playlist page with given number."""
1932                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1933
1934         def _real_initialize(self):
1935                 self._youtube_ie.initialize()
1936         
1937         def _real_extract(self, url):
1938                 # Extract playlist id
1939                 mobj = re.match(self._VALID_URL, url)
1940                 if mobj is None:
1941                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1942                         return
1943
1944                 # Download playlist pages
1945                 playlist_id = mobj.group(1)
1946                 video_ids = []
1947                 pagenum = 1
1948
1949                 while True:
1950                         self.report_download_page(playlist_id, pagenum)
1951                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1952                         try:
1953                                 page = urllib2.urlopen(request).read()
1954                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1955                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1956                                 return
1957
1958                         # Extract video identifiers
1959                         ids_in_page = []
1960                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1961                                 if mobj.group(1) not in ids_in_page:
1962                                         ids_in_page.append(mobj.group(1))
1963                         video_ids.extend(ids_in_page)
1964
1965                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1966                                 break
1967                         pagenum = pagenum + 1
1968
1969                 playliststart = self._downloader.params.get('playliststart', 1)
1970                 playliststart -= 1 #our arrays are zero-based but the playlist is 1-based
1971                 if playliststart > 0:
1972                         video_ids = video_ids[playliststart:]
1973                         
1974                 for id in video_ids:
1975                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1976                 return
1977
1978 class YoutubeUserIE(InfoExtractor):
1979         """Information Extractor for YouTube users."""
1980
1981         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1982         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1983         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1984         _youtube_ie = None
1985
1986         def __init__(self, youtube_ie, downloader=None):
1987                 InfoExtractor.__init__(self, downloader)
1988                 self._youtube_ie = youtube_ie
1989         
1990         @staticmethod
1991         def suitable(url):
1992                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1993
1994         def report_download_page(self, username):
1995                 """Report attempt to download user page."""
1996                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
1997
1998         def _real_initialize(self):
1999                 self._youtube_ie.initialize()
2000         
2001         def _real_extract(self, url):
2002                 # Extract username
2003                 mobj = re.match(self._VALID_URL, url)
2004                 if mobj is None:
2005                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2006                         return
2007
2008                 # Download user page
2009                 username = mobj.group(1)
2010                 video_ids = []
2011                 pagenum = 1
2012
2013                 self.report_download_page(username)
2014                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2015                 try:
2016                         page = urllib2.urlopen(request).read()
2017                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2018                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2019                         return
2020
2021                 # Extract video identifiers
2022                 ids_in_page = []
2023
2024                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2025                         if mobj.group(1) not in ids_in_page:
2026                                 ids_in_page.append(mobj.group(1))
2027                 video_ids.extend(ids_in_page)
2028
2029                 playliststart = self._downloader.params.get('playliststart', 1)
2030                 playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based
2031                 if playliststart > 0:
2032                         video_ids = video_ids[playliststart:]   
2033
2034                 for id in video_ids:
2035                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2036                 return
2037
2038 class PostProcessor(object):
2039         """Post Processor class.
2040
2041         PostProcessor objects can be added to downloaders with their
2042         add_post_processor() method. When the downloader has finished a
2043         successful download, it will take its internal chain of PostProcessors
2044         and start calling the run() method on each one of them, first with
2045         an initial argument and then with the returned value of the previous
2046         PostProcessor.
2047
2048         The chain will be stopped if one of them ever returns None or the end
2049         of the chain is reached.
2050
2051         PostProcessor objects follow a "mutual registration" process similar
2052         to InfoExtractor objects.
2053         """
2054
2055         _downloader = None
2056
2057         def __init__(self, downloader=None):
2058                 self._downloader = downloader
2059
2060         def set_downloader(self, downloader):
2061                 """Sets the downloader for this PP."""
2062                 self._downloader = downloader
2063         
2064         def run(self, information):
2065                 """Run the PostProcessor.
2066
2067                 The "information" argument is a dictionary like the ones
2068                 composed by InfoExtractors. The only difference is that this
2069                 one has an extra field called "filepath" that points to the
2070                 downloaded file.
2071
2072                 When this method returns None, the postprocessing chain is
2073                 stopped. However, this method may return an information
2074                 dictionary that will be passed to the next postprocessing
2075                 object in the chain. It can be the one it received after
2076                 changing some fields.
2077
2078                 In addition, this method may raise a PostProcessingError
2079                 exception that will be taken into account by the downloader
2080                 it was called from.
2081                 """
2082                 return information # by default, do nothing
2083         
2084 ### MAIN PROGRAM ###
2085 if __name__ == '__main__':
2086         try:
2087                 # Modules needed only when running the main program
2088                 import getpass
2089                 import optparse
2090
2091                 # Function to update the program file with the latest version from bitbucket.org
2092                 def update_self(downloader, filename):
2093                         # Note: downloader only used for options
2094                         if not os.access (filename, os.W_OK):
2095                                 sys.exit('ERROR: no write permissions on %s' % filename)
2096
2097                         downloader.to_screen('Updating to latest stable version...')
2098                         latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2099                         latest_version = urllib.urlopen(latest_url).read().strip()
2100                         prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2101                         newcontent = urllib.urlopen(prog_url).read()
2102                         stream = open(filename, 'w')
2103                         stream.write(newcontent)
2104                         stream.close()
2105                         downloader.to_screen('Updated to version %s' % latest_version)
2106
2107                 # Parse command line
2108                 parser = optparse.OptionParser(
2109                         usage='Usage: %prog [options] url...',
2110                         version='2010.10.24',
2111                         conflict_handler='resolve',
2112                 )
2113
2114                 parser.add_option('-h', '--help',
2115                                 action='help', help='print this help text and exit')
2116                 parser.add_option('-v', '--version',
2117                                 action='version', help='print program version and exit')
2118                 parser.add_option('-U', '--update',
2119                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2120                 parser.add_option('-i', '--ignore-errors',
2121                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2122                 parser.add_option('-r', '--rate-limit',
2123                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2124                 parser.add_option('-R', '--retries',
2125                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2126                 parser.add_option('--playlist-start',
2127                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2128
2129                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2130                 authentication.add_option('-u', '--username',
2131                                 dest='username', metavar='USERNAME', help='account username')
2132                 authentication.add_option('-p', '--password',
2133                                 dest='password', metavar='PASSWORD', help='account password')
2134                 authentication.add_option('-n', '--netrc',
2135                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2136                 parser.add_option_group(authentication)
2137
2138                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2139                 video_format.add_option('-f', '--format',
2140                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2141                 video_format.add_option('-m', '--mobile-version',
2142                                 action='store_const', dest='format', help='alias for -f 17', const='17')
2143                 video_format.add_option('--all-formats',
2144                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2145                 video_format.add_option('--max-quality',
2146                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2147                 video_format.add_option('-b', '--best-quality',
2148                                 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2149                 parser.add_option_group(video_format)
2150
2151                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2152                 verbosity.add_option('-q', '--quiet',
2153                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2154                 verbosity.add_option('-s', '--simulate',
2155                                 action='store_true', dest='simulate', help='do not download video', default=False)
2156                 verbosity.add_option('-g', '--get-url',
2157                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2158                 verbosity.add_option('-e', '--get-title',
2159                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2160                 verbosity.add_option('--get-thumbnail',
2161                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2162                 verbosity.add_option('--get-description',
2163                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2164                 verbosity.add_option('--no-progress',
2165                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2166                 parser.add_option_group(verbosity)
2167
2168                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2169                 filesystem.add_option('-t', '--title',
2170                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2171                 filesystem.add_option('-l', '--literal',
2172                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2173                 filesystem.add_option('-o', '--output',
2174                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2175                 filesystem.add_option('-a', '--batch-file',
2176                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2177                 filesystem.add_option('-w', '--no-overwrites',
2178                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2179                 filesystem.add_option('-c', '--continue',
2180                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2181                 filesystem.add_option('--cookies',
2182                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2183                 parser.add_option_group(filesystem)
2184
2185                 (opts, args) = parser.parse_args()
2186
2187                 # Open appropriate CookieJar
2188                 if opts.cookiefile is None:
2189                         jar = cookielib.CookieJar()
2190                 else:
2191                         try:
2192                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2193                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2194                                         jar.load()
2195                         except (IOError, OSError), err:
2196                                 sys.exit(u'ERROR: unable to open cookie file')
2197
2198                 # General configuration
2199                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2200                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2201                 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2202                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2203
2204                 # Batch file verification
2205                 batchurls = []
2206                 if opts.batchfile is not None:
2207                         try:
2208                                 if opts.batchfile == '-':
2209                                         batchfd = sys.stdin
2210                                 else:
2211                                         batchfd = open(opts.batchfile, 'r')
2212                                 batchurls = batchfd.readlines()
2213                                 batchurls = [x.strip() for x in batchurls]
2214                                 batchurls = [x for x in batchurls if len(x) > 0]
2215                         except IOError:
2216                                 sys.exit(u'ERROR: batch file could not be read')
2217                 all_urls = batchurls + args
2218
2219                 # Conflicting, missing and erroneous options
2220                 if opts.bestquality:
2221                         print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2222                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2223                         parser.error(u'using .netrc conflicts with giving username/password')
2224                 if opts.password is not None and opts.username is None:
2225                         parser.error(u'account username missing')
2226                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
2227                         parser.error(u'using output template conflicts with using title or literal title')
2228                 if opts.usetitle and opts.useliteral:
2229                         parser.error(u'using title conflicts with using literal title')
2230                 if opts.username is not None and opts.password is None:
2231                         opts.password = getpass.getpass(u'Type account password and press return:')
2232                 if opts.ratelimit is not None:
2233                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2234                         if numeric_limit is None:
2235                                 parser.error(u'invalid rate limit specified')
2236                         opts.ratelimit = numeric_limit
2237                 if opts.retries is not None:
2238                         try:
2239                                 opts.retries = long(opts.retries)
2240                         except (TypeError, ValueError), err:
2241                                 parser.error(u'invalid retry count specified')
2242                 if opts.playliststart is not None:
2243                         try:
2244                                 opts.playliststart = long(opts.playliststart)
2245                         except (TypeError, ValueError), err:
2246                                 parser.error(u'invalid playlist page specified')
2247
2248                 # Information extractors
2249                 youtube_ie = YoutubeIE()
2250                 metacafe_ie = MetacafeIE(youtube_ie)
2251                 dailymotion_ie = DailymotionIE()
2252                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2253                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2254                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2255                 google_ie = GoogleIE()
2256                 google_search_ie = GoogleSearchIE(google_ie)
2257                 photobucket_ie = PhotobucketIE()
2258                 yahoo_ie = YahooIE()
2259                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2260                 generic_ie = GenericIE()
2261
2262                 # File downloader
2263                 fd = FileDownloader({
2264                         'usenetrc': opts.usenetrc,
2265                         'username': opts.username,
2266                         'password': opts.password,
2267                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2268                         'forceurl': opts.geturl,
2269                         'forcetitle': opts.gettitle,
2270                         'forcethumbnail': opts.getthumbnail,
2271                         'forcedescription': opts.getdescription,
2272                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2273                         'format': opts.format,
2274                         'format_limit': opts.format_limit,
2275                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2276                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2277                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2278                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2279                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2280                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2281                                 or u'%(id)s.%(ext)s'),
2282                         'ignoreerrors': opts.ignoreerrors,
2283                         'ratelimit': opts.ratelimit,
2284                         'nooverwrites': opts.nooverwrites,
2285                         'retries': opts.retries,
2286                         'continuedl': opts.continue_dl,
2287                         'noprogress': opts.noprogress,
2288                         'playliststart': opts.playliststart,
2289                         'logtostderr': opts.outtmpl == '-',
2290                         })
2291                 fd.add_info_extractor(youtube_search_ie)
2292                 fd.add_info_extractor(youtube_pl_ie)
2293                 fd.add_info_extractor(youtube_user_ie)
2294                 fd.add_info_extractor(metacafe_ie)
2295                 fd.add_info_extractor(dailymotion_ie)
2296                 fd.add_info_extractor(youtube_ie)
2297                 fd.add_info_extractor(google_ie)
2298                 fd.add_info_extractor(google_search_ie)
2299                 fd.add_info_extractor(photobucket_ie)
2300                 fd.add_info_extractor(yahoo_ie)
2301                 fd.add_info_extractor(yahoo_search_ie)
2302
2303                 # This must come last since it's the
2304                 # fallback if none of the others work
2305                 fd.add_info_extractor(generic_ie)
2306
2307                 # Update version
2308                 if opts.update_self:
2309                         update_self(fd, sys.argv[0])
2310
2311                 # Maybe do nothing
2312                 if len(all_urls) < 1:
2313                         if not opts.update_self:
2314                                 parser.error(u'you must provide at least one URL')
2315                         else:
2316                                 sys.exit()
2317                 retcode = fd.download(all_urls)
2318
2319                 # Dump cookie jar if requested
2320                 if opts.cookiefile is not None:
2321                         try:
2322                                 jar.save()
2323                         except (IOError, OSError), err:
2324                                 sys.exit(u'ERROR: unable to save cookie jar')
2325
2326                 sys.exit(retcode)
2327
2328         except DownloadError:
2329                 sys.exit(1)
2330         except SameFileError:
2331                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2332         except KeyboardInterrupt:
2333                 sys.exit(u'\nERROR: Interrupted by user')