24722d292ecadfd4b16e2775ca9a53efc145628a
[youtube-dl] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
7 import cookielib
8 import htmlentitydefs
9 import httplib
10 import locale
11 import math
12 import netrc
13 import os
14 import os.path
15 import re
16 import socket
17 import string
18 import subprocess
19 import sys
20 import time
21 import urllib
22 import urllib2
23
24 # parse_qs was moved from the cgi module to the urlparse module recently.
25 try:
26         from urlparse import parse_qs
27 except ImportError:
28         from cgi import parse_qs
29
30 std_headers = {
31         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
32         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
33         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
34         'Accept-Language': 'en-us,en;q=0.5',
35 }
36
37 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38
39 month_name_to_number = {
40         'January':      '01',
41         'February':     '02',
42         'March':        '03',
43         'April':        '04',
44         'May':          '05',
45         'June':         '06',
46         'July':         '07',
47         'August':       '08',
48         'September':    '09',
49         'October':      '10',
50         'November':     '11',
51         'December':     '12',
52 }
53
54 def preferredencoding():
55         """Get preferred encoding.
56
57         Returns the best encoding scheme for the system, based on
58         locale.getpreferredencoding() and some further tweaks.
59         """
60         def yield_preferredencoding():
61                 try:
62                         pref = locale.getpreferredencoding()
63                         u'TEST'.encode(pref)
64                 except:
65                         pref = 'UTF-8'
66                 while True:
67                         yield pref
68         return yield_preferredencoding().next()
69
70 def htmlentity_transform(matchobj):
71         """Transforms an HTML entity to a Unicode character.
72         
73         This function receives a match object and is intended to be used with
74         the re.sub() function.
75         """
76         entity = matchobj.group(1)
77
78         # Known non-numeric HTML entity
79         if entity in htmlentitydefs.name2codepoint:
80                 return unichr(htmlentitydefs.name2codepoint[entity])
81
82         # Unicode character
83         mobj = re.match(ur'(?u)#(x?\d+)', entity)
84         if mobj is not None:
85                 numstr = mobj.group(1)
86                 if numstr.startswith(u'x'):
87                         base = 16
88                         numstr = u'0%s' % numstr
89                 else:
90                         base = 10
91                 return unichr(long(numstr, base))
92
93         # Unknown entity in name, return its literal representation
94         return (u'&%s;' % entity)
95
96 def sanitize_title(utitle):
97         """Sanitizes a video title so it could be used as part of a filename."""
98         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
99         return utitle.replace(unicode(os.sep), u'%')
100
101 def sanitize_open(filename, open_mode):
102         """Try to open the given filename, and slightly tweak it if this fails.
103
104         Attempts to open the given filename. If this fails, it tries to change
105         the filename slightly, step by step, until it's either able to open it
106         or it fails and raises a final exception, like the standard open()
107         function.
108
109         It returns the tuple (stream, definitive_file_name).
110         """
111         try:
112                 if filename == u'-':
113                         if sys.platform == 'win32':
114                                 import msvcrt
115                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
116                         return (sys.stdout, filename)
117                 stream = open(filename, open_mode)
118                 return (stream, filename)
119         except (IOError, OSError), err:
120                 # In case of error, try to remove win32 forbidden chars
121                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
122
123                 # An exception here should be caught in the caller
124                 stream = open(filename, open_mode)
125                 return (stream, filename)
126
127
128 class DownloadError(Exception):
129         """Download Error exception.
130         
131         This exception may be thrown by FileDownloader objects if they are not
132         configured to continue on errors. They will contain the appropriate
133         error message.
134         """
135         pass
136
137 class SameFileError(Exception):
138         """Same File exception.
139
140         This exception will be thrown by FileDownloader objects if they detect
141         multiple files would have to be downloaded to the same file on disk.
142         """
143         pass
144
145 class PostProcessingError(Exception):
146         """Post Processing exception.
147
148         This exception may be raised by PostProcessor's .run() method to
149         indicate an error in the postprocessing task.
150         """
151         pass
152
153 class UnavailableVideoError(Exception):
154         """Unavailable Format exception.
155
156         This exception will be thrown when a video is requested
157         in a format that is not available for that video.
158         """
159         pass
160
161 class ContentTooShortError(Exception):
162         """Content Too Short exception.
163
164         This exception may be raised by FileDownloader objects when a file they
165         download is too small for what the server announced first, indicating
166         the connection was probably interrupted.
167         """
168         # Both in bytes
169         downloaded = None
170         expected = None
171
172         def __init__(self, downloaded, expected):
173                 self.downloaded = downloaded
174                 self.expected = expected
175
176 class FileDownloader(object):
177         """File Downloader class.
178
179         File downloader objects are the ones responsible of downloading the
180         actual video file and writing it to disk if the user has requested
181         it, among some other tasks. In most cases there should be one per
182         program. As, given a video URL, the downloader doesn't know how to
183         extract all the needed information, task that InfoExtractors do, it
184         has to pass the URL to one of them.
185
186         For this, file downloader objects have a method that allows
187         InfoExtractors to be registered in a given order. When it is passed
188         a URL, the file downloader handles it to the first InfoExtractor it
189         finds that reports being able to handle it. The InfoExtractor extracts
190         all the information about the video or videos the URL refers to, and
191         asks the FileDownloader to process the video information, possibly
192         downloading the video.
193
194         File downloaders accept a lot of parameters. In order not to saturate
195         the object constructor with arguments, it receives a dictionary of
196         options instead. These options are available through the params
197         attribute for the InfoExtractors to use. The FileDownloader also
198         registers itself as the downloader in charge for the InfoExtractors
199         that are added to it, so this is a "mutual registration".
200
201         Available options:
202
203         username:         Username for authentication purposes.
204         password:         Password for authentication purposes.
205         usenetrc:         Use netrc for authentication instead.
206         quiet:            Do not print messages to stdout.
207         forceurl:         Force printing final URL.
208         forcetitle:       Force printing title.
209         forcethumbnail:   Force printing thumbnail URL.
210         forcedescription: Force printing description.
211         simulate:         Do not download the video files.
212         format:           Video format code.
213         format_limit:     Highest quality format to try.
214         outtmpl:          Template for output names.
215         ignoreerrors:     Do not stop on download errors.
216         ratelimit:        Download speed limit, in bytes/sec.
217         nooverwrites:     Prevent overwriting files.
218         retries:          Number of times to retry for HTTP error 5xx
219         continuedl:       Try to continue downloads if possible.
220         noprogress:       Do not print the progress bar.
221         playliststart:    Playlist item to start at.
222         playlistend:      Playlist item to end at.
223         logtostderr:      Log messages to stderr instead of stdout.
224         """
225
226         params = None
227         _ies = []
228         _pps = []
229         _download_retcode = None
230         _num_downloads = None
231         _screen_file = None
232
233         def __init__(self, params):
234                 """Create a FileDownloader object with the given options."""
235                 self._ies = []
236                 self._pps = []
237                 self._download_retcode = 0
238                 self._num_downloads = 0
239                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
240                 self.params = params
241         
242         @staticmethod
243         def pmkdir(filename):
244                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
245                 components = filename.split(os.sep)
246                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
247                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
248                 for dir in aggregate:
249                         if not os.path.exists(dir):
250                                 os.mkdir(dir)
251         
252         @staticmethod
253         def format_bytes(bytes):
254                 if bytes is None:
255                         return 'N/A'
256                 if type(bytes) is str:
257                         bytes = float(bytes)
258                 if bytes == 0.0:
259                         exponent = 0
260                 else:
261                         exponent = long(math.log(bytes, 1024.0))
262                 suffix = 'bkMGTPEZY'[exponent]
263                 converted = float(bytes) / float(1024**exponent)
264                 return '%.2f%s' % (converted, suffix)
265
266         @staticmethod
267         def calc_percent(byte_counter, data_len):
268                 if data_len is None:
269                         return '---.-%'
270                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
271
272         @staticmethod
273         def calc_eta(start, now, total, current):
274                 if total is None:
275                         return '--:--'
276                 dif = now - start
277                 if current == 0 or dif < 0.001: # One millisecond
278                         return '--:--'
279                 rate = float(current) / dif
280                 eta = long((float(total) - float(current)) / rate)
281                 (eta_mins, eta_secs) = divmod(eta, 60)
282                 if eta_mins > 99:
283                         return '--:--'
284                 return '%02d:%02d' % (eta_mins, eta_secs)
285
286         @staticmethod
287         def calc_speed(start, now, bytes):
288                 dif = now - start
289                 if bytes == 0 or dif < 0.001: # One millisecond
290                         return '%10s' % '---b/s'
291                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
292
293         @staticmethod
294         def best_block_size(elapsed_time, bytes):
295                 new_min = max(bytes / 2.0, 1.0)
296                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
297                 if elapsed_time < 0.001:
298                         return long(new_max)
299                 rate = bytes / elapsed_time
300                 if rate > new_max:
301                         return long(new_max)
302                 if rate < new_min:
303                         return long(new_min)
304                 return long(rate)
305
306         @staticmethod
307         def parse_bytes(bytestr):
308                 """Parse a string indicating a byte quantity into a long integer."""
309                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
310                 if matchobj is None:
311                         return None
312                 number = float(matchobj.group(1))
313                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
314                 return long(round(number * multiplier))
315
316         def add_info_extractor(self, ie):
317                 """Add an InfoExtractor object to the end of the list."""
318                 self._ies.append(ie)
319                 ie.set_downloader(self)
320         
321         def add_post_processor(self, pp):
322                 """Add a PostProcessor object to the end of the chain."""
323                 self._pps.append(pp)
324                 pp.set_downloader(self)
325         
326         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
327                 """Print message to stdout if not in quiet mode."""
328                 try:
329                         if not self.params.get('quiet', False):
330                                 terminator = [u'\n', u''][skip_eol]
331                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
332                         self._screen_file.flush()
333                 except (UnicodeEncodeError), err:
334                         if not ignore_encoding_errors:
335                                 raise
336         
337         def to_stderr(self, message):
338                 """Print message to stderr."""
339                 print >>sys.stderr, message.encode(preferredencoding())
340         
341         def fixed_template(self):
342                 """Checks if the output template is fixed."""
343                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
344
345         def trouble(self, message=None):
346                 """Determine action to take when a download problem appears.
347
348                 Depending on if the downloader has been configured to ignore
349                 download errors or not, this method may throw an exception or
350                 not when errors are found, after printing the message.
351                 """
352                 if message is not None:
353                         self.to_stderr(message)
354                 if not self.params.get('ignoreerrors', False):
355                         raise DownloadError(message)
356                 self._download_retcode = 1
357
358         def slow_down(self, start_time, byte_counter):
359                 """Sleep if the download speed is over the rate limit."""
360                 rate_limit = self.params.get('ratelimit', None)
361                 if rate_limit is None or byte_counter == 0:
362                         return
363                 now = time.time()
364                 elapsed = now - start_time
365                 if elapsed <= 0.0:
366                         return
367                 speed = float(byte_counter) / elapsed
368                 if speed > rate_limit:
369                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
370
371         def report_destination(self, filename):
372                 """Report destination filename."""
373                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
374         
375         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
376                 """Report download progress."""
377                 if self.params.get('noprogress', False):
378                         return
379                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
380                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
381
382         def report_resuming_byte(self, resume_len):
383                 """Report attempt to resume at given byte."""
384                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
385         
386         def report_retry(self, count, retries):
387                 """Report retry in case of HTTP error 5xx"""
388                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
389         
390         def report_file_already_downloaded(self, file_name):
391                 """Report file has already been fully downloaded."""
392                 try:
393                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
394                 except (UnicodeEncodeError), err:
395                         self.to_screen(u'[download] The file has already been downloaded')
396         
397         def report_unable_to_resume(self):
398                 """Report it was impossible to resume download."""
399                 self.to_screen(u'[download] Unable to resume')
400         
401         def report_finish(self):
402                 """Report download finished."""
403                 if self.params.get('noprogress', False):
404                         self.to_screen(u'[download] Download completed')
405                 else:
406                         self.to_screen(u'')
407         
408         def increment_downloads(self):
409                 """Increment the ordinal that assigns a number to each file."""
410                 self._num_downloads += 1
411
412         def process_info(self, info_dict):
413                 """Process a single dictionary returned by an InfoExtractor."""
414                 # Do nothing else if in simulate mode
415                 if self.params.get('simulate', False):
416                         # Forced printings
417                         if self.params.get('forcetitle', False):
418                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
419                         if self.params.get('forceurl', False):
420                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
421                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
422                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
423                         if self.params.get('forcedescription', False) and 'description' in info_dict:
424                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
425
426                         return
427                         
428                 try:
429                         template_dict = dict(info_dict)
430                         template_dict['epoch'] = unicode(long(time.time()))
431                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
432                         filename = self.params['outtmpl'] % template_dict
433                 except (ValueError, KeyError), err:
434                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
435                         return
436                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
437                         self.to_stderr(u'WARNING: file exists and will be skipped')
438                         return
439
440                 try:
441                         self.pmkdir(filename)
442                 except (OSError, IOError), err:
443                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
444                         return
445
446                 try:
447                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
448                 except (OSError, IOError), err:
449                         raise UnavailableVideoError
450                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
451                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
452                         return
453                 except (ContentTooShortError, ), err:
454                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
455                         return
456
457                 if success:
458                         try:
459                                 self.post_process(filename, info_dict)
460                         except (PostProcessingError), err:
461                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
462                                 return
463
464         def download(self, url_list):
465                 """Download a given list of URLs."""
466                 if len(url_list) > 1 and self.fixed_template():
467                         raise SameFileError(self.params['outtmpl'])
468
469                 for url in url_list:
470                         suitable_found = False
471                         for ie in self._ies:
472                                 # Go to next InfoExtractor if not suitable
473                                 if not ie.suitable(url):
474                                         continue
475
476                                 # Suitable InfoExtractor found
477                                 suitable_found = True
478
479                                 # Extract information from URL and process it
480                                 ie.extract(url)
481
482                                 # Suitable InfoExtractor had been found; go to next URL
483                                 break
484
485                         if not suitable_found:
486                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
487
488                 return self._download_retcode
489
490         def post_process(self, filename, ie_info):
491                 """Run the postprocessing chain on the given file."""
492                 info = dict(ie_info)
493                 info['filepath'] = filename
494                 for pp in self._pps:
495                         info = pp.run(info)
496                         if info is None:
497                                 break
498         
499         def _download_with_rtmpdump(self, filename, url, player_url):
500                 self.report_destination(filename)
501
502                 # Check for rtmpdump first
503                 try:
504                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
505                 except (OSError, IOError):
506                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
507                         return False
508
509                 # Download using rtmpdump. rtmpdump returns exit code 2 when
510                 # the connection was interrumpted and resuming appears to be
511                 # possible. This is part of rtmpdump's normal usage, AFAIK.
512                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
513                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
514                 while retval == 2 or retval == 1:
515                         prevsize = os.path.getsize(filename)
516                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
517                         time.sleep(5.0) # This seems to be needed
518                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
519                         cursize = os.path.getsize(filename)
520                         if prevsize == cursize and retval == 1:
521                                 break
522                 if retval == 0:
523                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
524                         return True
525                 else:
526                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
527                         return False
528
529         def _do_download(self, filename, url, player_url):
530                 # Attempt to download using rtmpdump
531                 if url.startswith('rtmp'):
532                         return self._download_with_rtmpdump(filename, url, player_url)
533
534                 stream = None
535                 open_mode = 'wb'
536                 basic_request = urllib2.Request(url, None, std_headers)
537                 request = urllib2.Request(url, None, std_headers)
538
539                 # Establish possible resume length
540                 if os.path.isfile(filename):
541                         resume_len = os.path.getsize(filename)
542                 else:
543                         resume_len = 0
544
545                 # Request parameters in case of being able to resume
546                 if self.params.get('continuedl', False) and resume_len != 0:
547                         self.report_resuming_byte(resume_len)
548                         request.add_header('Range','bytes=%d-' % resume_len)
549                         open_mode = 'ab'
550
551                 count = 0
552                 retries = self.params.get('retries', 0)
553                 while count <= retries:
554                         # Establish connection
555                         try:
556                                 data = urllib2.urlopen(request)
557                                 break
558                         except (urllib2.HTTPError, ), err:
559                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
560                                         # Unexpected HTTP error
561                                         raise
562                                 elif err.code == 416:
563                                         # Unable to resume (requested range not satisfiable)
564                                         try:
565                                                 # Open the connection again without the range header
566                                                 data = urllib2.urlopen(basic_request)
567                                                 content_length = data.info()['Content-Length']
568                                         except (urllib2.HTTPError, ), err:
569                                                 if err.code < 500 or err.code >= 600:
570                                                         raise
571                                         else:
572                                                 # Examine the reported length
573                                                 if (content_length is not None and
574                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
575                                                         # The file had already been fully downloaded.
576                                                         # Explanation to the above condition: in issue #175 it was revealed that
577                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
578                                                         # changing the file size slightly and causing problems for some users. So
579                                                         # I decided to implement a suggested change and consider the file
580                                                         # completely downloaded if the file size differs less than 100 bytes from
581                                                         # the one in the hard drive.
582                                                         self.report_file_already_downloaded(filename)
583                                                         return True
584                                                 else:
585                                                         # The length does not match, we start the download over
586                                                         self.report_unable_to_resume()
587                                                         open_mode = 'wb'
588                                                         break
589                         # Retry
590                         count += 1
591                         if count <= retries:
592                                 self.report_retry(count, retries)
593
594                 if count > retries:
595                         self.trouble(u'ERROR: giving up after %s retries' % retries)
596                         return False
597
598                 data_len = data.info().get('Content-length', None)
599                 data_len_str = self.format_bytes(data_len)
600                 byte_counter = 0
601                 block_size = 1024
602                 start = time.time()
603                 while True:
604                         # Download and write
605                         before = time.time()
606                         data_block = data.read(block_size)
607                         after = time.time()
608                         data_block_len = len(data_block)
609                         if data_block_len == 0:
610                                 break
611                         byte_counter += data_block_len
612
613                         # Open file just in time
614                         if stream is None:
615                                 try:
616                                         (stream, filename) = sanitize_open(filename, open_mode)
617                                         self.report_destination(filename)
618                                 except (OSError, IOError), err:
619                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
620                                         return False
621                         try:
622                                 stream.write(data_block)
623                         except (IOError, OSError), err:
624                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
625                                 return False
626                         block_size = self.best_block_size(after - before, data_block_len)
627
628                         # Progress message
629                         percent_str = self.calc_percent(byte_counter, data_len)
630                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
631                         speed_str = self.calc_speed(start, time.time(), byte_counter)
632                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
633
634                         # Apply rate limit
635                         self.slow_down(start, byte_counter)
636
637                 self.report_finish()
638                 if data_len is not None and str(byte_counter) != data_len:
639                         raise ContentTooShortError(byte_counter, long(data_len))
640                 return True
641
642 class InfoExtractor(object):
643         """Information Extractor class.
644
645         Information extractors are the classes that, given a URL, extract
646         information from the video (or videos) the URL refers to. This
647         information includes the real video URL, the video title and simplified
648         title, author and others. The information is stored in a dictionary
649         which is then passed to the FileDownloader. The FileDownloader
650         processes this information possibly downloading the video to the file
651         system, among other possible outcomes. The dictionaries must include
652         the following fields:
653
654         id:             Video identifier.
655         url:            Final video URL.
656         uploader:       Nickname of the video uploader.
657         title:          Literal title.
658         stitle:         Simplified title.
659         ext:            Video filename extension.
660         format:         Video format.
661         player_url:     SWF Player URL (may be None).
662
663         The following fields are optional. Their primary purpose is to allow
664         youtube-dl to serve as the backend for a video search function, such
665         as the one in youtube2mp3.  They are only used when their respective
666         forced printing functions are called:
667
668         thumbnail:      Full URL to a video thumbnail image.
669         description:    One-line video description.
670
671         Subclasses of this one should re-define the _real_initialize() and
672         _real_extract() methods, as well as the suitable() static method.
673         Probably, they should also be instantiated and added to the main
674         downloader.
675         """
676
677         _ready = False
678         _downloader = None
679
680         def __init__(self, downloader=None):
681                 """Constructor. Receives an optional downloader."""
682                 self._ready = False
683                 self.set_downloader(downloader)
684
685         @staticmethod
686         def suitable(url):
687                 """Receives a URL and returns True if suitable for this IE."""
688                 return False
689
690         def initialize(self):
691                 """Initializes an instance (authentication, etc)."""
692                 if not self._ready:
693                         self._real_initialize()
694                         self._ready = True
695
696         def extract(self, url):
697                 """Extracts URL information and returns it in list of dicts."""
698                 self.initialize()
699                 return self._real_extract(url)
700
701         def set_downloader(self, downloader):
702                 """Sets the downloader for this IE."""
703                 self._downloader = downloader
704         
705         def _real_initialize(self):
706                 """Real initialization process. Redefine in subclasses."""
707                 pass
708
709         def _real_extract(self, url):
710                 """Real extraction process. Redefine in subclasses."""
711                 pass
712
713 class YoutubeIE(InfoExtractor):
714         """Information extractor for youtube.com."""
715
716         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
717         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
718         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
719         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
720         _NETRC_MACHINE = 'youtube'
721         # Listed in order of quality
722         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
723         _video_extensions = {
724                 '13': '3gp',
725                 '17': 'mp4',
726                 '18': 'mp4',
727                 '22': 'mp4',
728                 '37': 'mp4',
729                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
730                 '43': 'webm',
731                 '45': 'webm',
732         }
733
734         @staticmethod
735         def suitable(url):
736                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
737
738         def report_lang(self):
739                 """Report attempt to set language."""
740                 self._downloader.to_screen(u'[youtube] Setting language')
741
742         def report_login(self):
743                 """Report attempt to log in."""
744                 self._downloader.to_screen(u'[youtube] Logging in')
745         
746         def report_age_confirmation(self):
747                 """Report attempt to confirm age."""
748                 self._downloader.to_screen(u'[youtube] Confirming age')
749         
750         def report_video_webpage_download(self, video_id):
751                 """Report attempt to download video webpage."""
752                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
753         
754         def report_video_info_webpage_download(self, video_id):
755                 """Report attempt to download video info webpage."""
756                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
757         
758         def report_information_extraction(self, video_id):
759                 """Report attempt to extract video information."""
760                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
761         
762         def report_unavailable_format(self, video_id, format):
763                 """Report extracted video URL."""
764                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
765         
766         def report_rtmp_download(self):
767                 """Indicate the download will use the RTMP protocol."""
768                 self._downloader.to_screen(u'[youtube] RTMP download detected')
769         
770         def _real_initialize(self):
771                 if self._downloader is None:
772                         return
773
774                 username = None
775                 password = None
776                 downloader_params = self._downloader.params
777
778                 # Attempt to use provided username and password or .netrc data
779                 if downloader_params.get('username', None) is not None:
780                         username = downloader_params['username']
781                         password = downloader_params['password']
782                 elif downloader_params.get('usenetrc', False):
783                         try:
784                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
785                                 if info is not None:
786                                         username = info[0]
787                                         password = info[2]
788                                 else:
789                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
790                         except (IOError, netrc.NetrcParseError), err:
791                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
792                                 return
793
794                 # Set language
795                 request = urllib2.Request(self._LANG_URL, None, std_headers)
796                 try:
797                         self.report_lang()
798                         urllib2.urlopen(request).read()
799                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
800                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
801                         return
802
803                 # No authentication to be performed
804                 if username is None:
805                         return
806
807                 # Log in
808                 login_form = {
809                                 'current_form': 'loginForm',
810                                 'next':         '/',
811                                 'action_login': 'Log In',
812                                 'username':     username,
813                                 'password':     password,
814                                 }
815                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
816                 try:
817                         self.report_login()
818                         login_results = urllib2.urlopen(request).read()
819                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
820                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
821                                 return
822                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
823                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
824                         return
825         
826                 # Confirm age
827                 age_form = {
828                                 'next_url':             '/',
829                                 'action_confirm':       'Confirm',
830                                 }
831                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
832                 try:
833                         self.report_age_confirmation()
834                         age_results = urllib2.urlopen(request).read()
835                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
836                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
837                         return
838
839         def _real_extract(self, url):
840                 # Extract video id from URL
841                 mobj = re.match(self._VALID_URL, url)
842                 if mobj is None:
843                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
844                         return
845                 video_id = mobj.group(2)
846
847                 # Get video webpage
848                 self.report_video_webpage_download(video_id)
849                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id, None, std_headers)
850                 try:
851                         video_webpage = urllib2.urlopen(request).read()
852                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
853                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
854                         return
855
856                 # Attempt to extract SWF player URL
857                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
858                 if mobj is not None:
859                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
860                 else:
861                         player_url = None
862
863                 # Get video info
864                 self.report_video_info_webpage_download(video_id)
865                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
866                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
867                                            % (video_id, el_type))
868                         request = urllib2.Request(video_info_url, None, std_headers)
869                         try:
870                                 video_info_webpage = urllib2.urlopen(request).read()
871                                 video_info = parse_qs(video_info_webpage)
872                                 if 'token' in video_info:
873                                         break
874                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
875                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
876                                 return
877                 if 'token' not in video_info:
878                         if 'reason' in video_info:
879                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
880                         else:
881                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
882                         return
883
884                 # Start extracting information
885                 self.report_information_extraction(video_id)
886
887                 # uploader
888                 if 'author' not in video_info:
889                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
890                         return
891                 video_uploader = urllib.unquote_plus(video_info['author'][0])
892
893                 # title
894                 if 'title' not in video_info:
895                         self._downloader.trouble(u'ERROR: unable to extract video title')
896                         return
897                 video_title = urllib.unquote_plus(video_info['title'][0])
898                 video_title = video_title.decode('utf-8')
899                 video_title = sanitize_title(video_title)
900
901                 # simplified title
902                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
903                 simple_title = simple_title.strip(ur'_')
904
905                 # thumbnail image
906                 if 'thumbnail_url' not in video_info:
907                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
908                         video_thumbnail = ''
909                 else:   # don't panic if we can't find it
910                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
911
912                 # upload date
913                 upload_date = u'NA'
914                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
915                 if mobj is not None:
916                         try:
917                                 if ',' in mobj.group(1):
918                                         # Month Day, Year
919                                         m, d, y = mobj.group(1).replace(',', '').split()
920                                 else:
921                                         # Day Month Year, we'll suppose
922                                         d, m, y = mobj.group(1).split()
923                                 m = month_name_to_number[m]
924                                 d = '%02d' % (long(d))
925                                 upload_date = '%s%s%s' % (y, m, d)
926                         except:
927                                 upload_date = u'NA'
928
929                 # description
930                 video_description = 'No description available.'
931                 if self._downloader.params.get('forcedescription', False):
932                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
933                         if mobj is not None:
934                                 video_description = mobj.group(1)
935
936                 # token
937                 video_token = urllib.unquote_plus(video_info['token'][0])
938
939                 # Decide which formats to download
940                 requested_format = self._downloader.params.get('format', None)
941                 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
942
943                 if 'fmt_url_map' in video_info:
944                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
945                         format_limit = self._downloader.params.get('format_limit', None)
946                         if format_limit is not None and format_limit in self._available_formats:
947                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
948                         else:
949                                 format_list = self._available_formats
950                         existing_formats = [x for x in format_list if x in url_map]
951                         if len(existing_formats) == 0:
952                                 self._downloader.trouble(u'ERROR: no known formats available for video')
953                                 return
954                         if requested_format is None:
955                                 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
956                         elif requested_format == '-1':
957                                 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
958                         else:
959                                 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
960
961                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
962                         self.report_rtmp_download()
963                         video_url_list = [(None, video_info['conn'][0])]
964
965                 else:
966                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
967                         return
968
969                 for format_param, video_real_url in video_url_list:
970                         # At this point we have a new video
971                         self._downloader.increment_downloads()
972
973                         # Extension
974                         video_extension = self._video_extensions.get(format_param, 'flv')
975
976                         # Find the video URL in fmt_url_map or conn paramters
977                         try:
978                                 # Process video information
979                                 self._downloader.process_info({
980                                         'id':           video_id.decode('utf-8'),
981                                         'url':          video_real_url.decode('utf-8'),
982                                         'uploader':     video_uploader.decode('utf-8'),
983                                         'upload_date':  upload_date,
984                                         'title':        video_title,
985                                         'stitle':       simple_title,
986                                         'ext':          video_extension.decode('utf-8'),
987                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
988                                         'thumbnail':    video_thumbnail.decode('utf-8'),
989                                         'description':  video_description.decode('utf-8'),
990                                         'player_url':   player_url,
991                                 })
992                         except UnavailableVideoError, err:
993                                 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
994
995
996 class MetacafeIE(InfoExtractor):
997         """Information Extractor for metacafe.com."""
998
999         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1000         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1001         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1002         _youtube_ie = None
1003
1004         def __init__(self, youtube_ie, downloader=None):
1005                 InfoExtractor.__init__(self, downloader)
1006                 self._youtube_ie = youtube_ie
1007
1008         @staticmethod
1009         def suitable(url):
1010                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1011
1012         def report_disclaimer(self):
1013                 """Report disclaimer retrieval."""
1014                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1015
1016         def report_age_confirmation(self):
1017                 """Report attempt to confirm age."""
1018                 self._downloader.to_screen(u'[metacafe] Confirming age')
1019         
1020         def report_download_webpage(self, video_id):
1021                 """Report webpage download."""
1022                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1023         
1024         def report_extraction(self, video_id):
1025                 """Report information extraction."""
1026                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1027
1028         def _real_initialize(self):
1029                 # Retrieve disclaimer
1030                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
1031                 try:
1032                         self.report_disclaimer()
1033                         disclaimer = urllib2.urlopen(request).read()
1034                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1035                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1036                         return
1037
1038                 # Confirm age
1039                 disclaimer_form = {
1040                         'filters': '0',
1041                         'submit': "Continue - I'm over 18",
1042                         }
1043                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1044                 try:
1045                         self.report_age_confirmation()
1046                         disclaimer = urllib2.urlopen(request).read()
1047                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1048                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1049                         return
1050         
1051         def _real_extract(self, url):
1052                 # Extract id and simplified title from URL
1053                 mobj = re.match(self._VALID_URL, url)
1054                 if mobj is None:
1055                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1056                         return
1057
1058                 video_id = mobj.group(1)
1059
1060                 # Check if video comes from YouTube
1061                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1062                 if mobj2 is not None:
1063                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1064                         return
1065
1066                 # At this point we have a new video
1067                 self._downloader.increment_downloads()
1068
1069                 simple_title = mobj.group(2).decode('utf-8')
1070
1071                 # Retrieve video webpage to extract further information
1072                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1073                 try:
1074                         self.report_download_webpage(video_id)
1075                         webpage = urllib2.urlopen(request).read()
1076                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1077                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1078                         return
1079
1080                 # Extract URL, uploader and title from webpage
1081                 self.report_extraction(video_id)
1082                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1083                 if mobj is not None:
1084                         mediaURL = urllib.unquote(mobj.group(1))
1085                         video_extension = mediaURL[-3:]
1086                         
1087                         # Extract gdaKey if available
1088                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1089                         if mobj is None:
1090                                 video_url = mediaURL
1091                         else:
1092                                 gdaKey = mobj.group(1)
1093                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1094                 else:
1095                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1096                         if mobj is None:
1097                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1098                                 return
1099                         vardict = parse_qs(mobj.group(1))
1100                         if 'mediaData' not in vardict:
1101                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1102                                 return
1103                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1104                         if mobj is None:
1105                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1106                                 return
1107                         mediaURL = mobj.group(1).replace('\\/', '/')
1108                         video_extension = mediaURL[-3:]
1109                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1110
1111                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1112                 if mobj is None:
1113                         self._downloader.trouble(u'ERROR: unable to extract title')
1114                         return
1115                 video_title = mobj.group(1).decode('utf-8')
1116                 video_title = sanitize_title(video_title)
1117
1118                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1119                 if mobj is None:
1120                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1121                         return
1122                 video_uploader = mobj.group(1)
1123
1124                 try:
1125                         # Process video information
1126                         self._downloader.process_info({
1127                                 'id':           video_id.decode('utf-8'),
1128                                 'url':          video_url.decode('utf-8'),
1129                                 'uploader':     video_uploader.decode('utf-8'),
1130                                 'upload_date':  u'NA',
1131                                 'title':        video_title,
1132                                 'stitle':       simple_title,
1133                                 'ext':          video_extension.decode('utf-8'),
1134                                 'format':       u'NA',
1135                                 'player_url':   None,
1136                         })
1137                 except UnavailableVideoError:
1138                         self._downloader.trouble(u'ERROR: unable to download video')
1139
1140
1141 class DailymotionIE(InfoExtractor):
1142         """Information Extractor for Dailymotion"""
1143
1144         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1145
1146         def __init__(self, downloader=None):
1147                 InfoExtractor.__init__(self, downloader)
1148
1149         @staticmethod
1150         def suitable(url):
1151                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1152
1153         def report_download_webpage(self, video_id):
1154                 """Report webpage download."""
1155                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1156         
1157         def report_extraction(self, video_id):
1158                 """Report information extraction."""
1159                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1160
1161         def _real_initialize(self):
1162                 return
1163
1164         def _real_extract(self, url):
1165                 # Extract id and simplified title from URL
1166                 mobj = re.match(self._VALID_URL, url)
1167                 if mobj is None:
1168                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1169                         return
1170
1171                 # At this point we have a new video
1172                 self._downloader.increment_downloads()
1173                 video_id = mobj.group(1)
1174
1175                 simple_title = mobj.group(2).decode('utf-8')
1176                 video_extension = 'flv'
1177
1178                 # Retrieve video webpage to extract further information
1179                 request = urllib2.Request(url)
1180                 try:
1181                         self.report_download_webpage(video_id)
1182                         webpage = urllib2.urlopen(request).read()
1183                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1184                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1185                         return
1186
1187                 # Extract URL, uploader and title from webpage
1188                 self.report_extraction(video_id)
1189                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1190                 if mobj is None:
1191                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1192                         return
1193                 mediaURL = urllib.unquote(mobj.group(1))
1194
1195                 # if needed add http://www.dailymotion.com/ if relative URL
1196
1197                 video_url = mediaURL
1198
1199                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1200                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1201                 if mobj is None:
1202                         self._downloader.trouble(u'ERROR: unable to extract title')
1203                         return
1204                 video_title = mobj.group(1).decode('utf-8')
1205                 video_title = sanitize_title(video_title)
1206
1207                 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1208                 if mobj is None:
1209                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1210                         return
1211                 video_uploader = mobj.group(1)
1212
1213                 try:
1214                         # Process video information
1215                         self._downloader.process_info({
1216                                 'id':           video_id.decode('utf-8'),
1217                                 'url':          video_url.decode('utf-8'),
1218                                 'uploader':     video_uploader.decode('utf-8'),
1219                                 'upload_date':  u'NA',
1220                                 'title':        video_title,
1221                                 'stitle':       simple_title,
1222                                 'ext':          video_extension.decode('utf-8'),
1223                                 'format':       u'NA',
1224                                 'player_url':   None,
1225                         })
1226                 except UnavailableVideoError:
1227                         self._downloader.trouble(u'ERROR: unable to download video')
1228
1229 class GoogleIE(InfoExtractor):
1230         """Information extractor for video.google.com."""
1231
1232         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1233
1234         def __init__(self, downloader=None):
1235                 InfoExtractor.__init__(self, downloader)
1236
1237         @staticmethod
1238         def suitable(url):
1239                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1240
1241         def report_download_webpage(self, video_id):
1242                 """Report webpage download."""
1243                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1244
1245         def report_extraction(self, video_id):
1246                 """Report information extraction."""
1247                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1248
1249         def _real_initialize(self):
1250                 return
1251
1252         def _real_extract(self, url):
1253                 # Extract id from URL
1254                 mobj = re.match(self._VALID_URL, url)
1255                 if mobj is None:
1256                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1257                         return
1258
1259                 # At this point we have a new video
1260                 self._downloader.increment_downloads()
1261                 video_id = mobj.group(1)
1262
1263                 video_extension = 'mp4'
1264
1265                 # Retrieve video webpage to extract further information
1266                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1267                 try:
1268                         self.report_download_webpage(video_id)
1269                         webpage = urllib2.urlopen(request).read()
1270                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1271                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1272                         return
1273
1274                 # Extract URL, uploader, and title from webpage
1275                 self.report_extraction(video_id)
1276                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1277                 if mobj is None:
1278                         video_extension = 'flv'
1279                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1280                 if mobj is None:
1281                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1282                         return
1283                 mediaURL = urllib.unquote(mobj.group(1))
1284                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1285                 mediaURL = mediaURL.replace('\\x26', '\x26')
1286
1287                 video_url = mediaURL
1288
1289                 mobj = re.search(r'<title>(.*)</title>', webpage)
1290                 if mobj is None:
1291                         self._downloader.trouble(u'ERROR: unable to extract title')
1292                         return
1293                 video_title = mobj.group(1).decode('utf-8')
1294                 video_title = sanitize_title(video_title)
1295                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1296
1297                 # Extract video description
1298                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1299                 if mobj is None:
1300                         self._downloader.trouble(u'ERROR: unable to extract video description')
1301                         return
1302                 video_description = mobj.group(1).decode('utf-8')
1303                 if not video_description:
1304                         video_description = 'No description available.'
1305
1306                 # Extract video thumbnail
1307                 if self._downloader.params.get('forcethumbnail', False):
1308                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1309                         try:
1310                                 webpage = urllib2.urlopen(request).read()
1311                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1312                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1313                                 return
1314                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1315                         if mobj is None:
1316                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1317                                 return
1318                         video_thumbnail = mobj.group(1)
1319                 else:   # we need something to pass to process_info
1320                         video_thumbnail = ''
1321
1322
1323                 try:
1324                         # Process video information
1325                         self._downloader.process_info({
1326                                 'id':           video_id.decode('utf-8'),
1327                                 'url':          video_url.decode('utf-8'),
1328                                 'uploader':     u'NA',
1329                                 'upload_date':  u'NA',
1330                                 'title':        video_title,
1331                                 'stitle':       simple_title,
1332                                 'ext':          video_extension.decode('utf-8'),
1333                                 'format':       u'NA',
1334                                 'player_url':   None,
1335                         })
1336                 except UnavailableVideoError:
1337                         self._downloader.trouble(u'ERROR: unable to download video')
1338
1339
1340 class PhotobucketIE(InfoExtractor):
1341         """Information extractor for photobucket.com."""
1342
1343         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1344
1345         def __init__(self, downloader=None):
1346                 InfoExtractor.__init__(self, downloader)
1347
1348         @staticmethod
1349         def suitable(url):
1350                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1351
1352         def report_download_webpage(self, video_id):
1353                 """Report webpage download."""
1354                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1355
1356         def report_extraction(self, video_id):
1357                 """Report information extraction."""
1358                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1359
1360         def _real_initialize(self):
1361                 return
1362
1363         def _real_extract(self, url):
1364                 # Extract id from URL
1365                 mobj = re.match(self._VALID_URL, url)
1366                 if mobj is None:
1367                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1368                         return
1369
1370                 # At this point we have a new video
1371                 self._downloader.increment_downloads()
1372                 video_id = mobj.group(1)
1373
1374                 video_extension = 'flv'
1375
1376                 # Retrieve video webpage to extract further information
1377                 request = urllib2.Request(url)
1378                 try:
1379                         self.report_download_webpage(video_id)
1380                         webpage = urllib2.urlopen(request).read()
1381                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1382                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1383                         return
1384
1385                 # Extract URL, uploader, and title from webpage
1386                 self.report_extraction(video_id)
1387                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1388                 if mobj is None:
1389                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1390                         return
1391                 mediaURL = urllib.unquote(mobj.group(1))
1392
1393                 video_url = mediaURL
1394
1395                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1396                 if mobj is None:
1397                         self._downloader.trouble(u'ERROR: unable to extract title')
1398                         return
1399                 video_title = mobj.group(1).decode('utf-8')
1400                 video_title = sanitize_title(video_title)
1401                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1402
1403                 video_uploader = mobj.group(2).decode('utf-8')
1404
1405                 try:
1406                         # Process video information
1407                         self._downloader.process_info({
1408                                 'id':           video_id.decode('utf-8'),
1409                                 'url':          video_url.decode('utf-8'),
1410                                 'uploader':     video_uploader,
1411                                 'upload_date':  u'NA',
1412                                 'title':        video_title,
1413                                 'stitle':       simple_title,
1414                                 'ext':          video_extension.decode('utf-8'),
1415                                 'format':       u'NA',
1416                                 'player_url':   None,
1417                         })
1418                 except UnavailableVideoError:
1419                         self._downloader.trouble(u'ERROR: unable to download video')
1420
1421
1422 class YahooIE(InfoExtractor):
1423         """Information extractor for video.yahoo.com."""
1424
1425         # _VALID_URL matches all Yahoo! Video URLs
1426         # _VPAGE_URL matches only the extractable '/watch/' URLs
1427         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1428         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1429
1430         def __init__(self, downloader=None):
1431                 InfoExtractor.__init__(self, downloader)
1432
1433         @staticmethod
1434         def suitable(url):
1435                 return (re.match(YahooIE._VALID_URL, url) is not None)
1436
1437         def report_download_webpage(self, video_id):
1438                 """Report webpage download."""
1439                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1440
1441         def report_extraction(self, video_id):
1442                 """Report information extraction."""
1443                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1444
1445         def _real_initialize(self):
1446                 return
1447
1448         def _real_extract(self, url, new_video=True):
1449                 # Extract ID from URL
1450                 mobj = re.match(self._VALID_URL, url)
1451                 if mobj is None:
1452                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1453                         return
1454
1455                 # At this point we have a new video
1456                 self._downloader.increment_downloads()
1457                 video_id = mobj.group(2)
1458                 video_extension = 'flv'
1459
1460                 # Rewrite valid but non-extractable URLs as
1461                 # extractable English language /watch/ URLs
1462                 if re.match(self._VPAGE_URL, url) is None:
1463                         request = urllib2.Request(url)
1464                         try:
1465                                 webpage = urllib2.urlopen(request).read()
1466                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1467                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1468                                 return
1469
1470                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1471                         if mobj is None:
1472                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1473                                 return
1474                         yahoo_id = mobj.group(1)
1475
1476                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1477                         if mobj is None:
1478                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1479                                 return
1480                         yahoo_vid = mobj.group(1)
1481
1482                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1483                         return self._real_extract(url, new_video=False)
1484
1485                 # Retrieve video webpage to extract further information
1486                 request = urllib2.Request(url)
1487                 try:
1488                         self.report_download_webpage(video_id)
1489                         webpage = urllib2.urlopen(request).read()
1490                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1491                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1492                         return
1493
1494                 # Extract uploader and title from webpage
1495                 self.report_extraction(video_id)
1496                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1497                 if mobj is None:
1498                         self._downloader.trouble(u'ERROR: unable to extract video title')
1499                         return
1500                 video_title = mobj.group(1).decode('utf-8')
1501                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1502
1503                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1504                 if mobj is None:
1505                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1506                         return
1507                 video_uploader = mobj.group(1).decode('utf-8')
1508
1509                 # Extract video thumbnail
1510                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1511                 if mobj is None:
1512                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1513                         return
1514                 video_thumbnail = mobj.group(1).decode('utf-8')
1515
1516                 # Extract video description
1517                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1518                 if mobj is None:
1519                         self._downloader.trouble(u'ERROR: unable to extract video description')
1520                         return
1521                 video_description = mobj.group(1).decode('utf-8')
1522                 if not video_description: video_description = 'No description available.'
1523
1524                 # Extract video height and width
1525                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1526                 if mobj is None:
1527                         self._downloader.trouble(u'ERROR: unable to extract video height')
1528                         return
1529                 yv_video_height = mobj.group(1)
1530
1531                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1532                 if mobj is None:
1533                         self._downloader.trouble(u'ERROR: unable to extract video width')
1534                         return
1535                 yv_video_width = mobj.group(1)
1536
1537                 # Retrieve video playlist to extract media URL
1538                 # I'm not completely sure what all these options are, but we
1539                 # seem to need most of them, otherwise the server sends a 401.
1540                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1541                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1542                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1543                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1544                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1545                 try:
1546                         self.report_download_webpage(video_id)
1547                         webpage = urllib2.urlopen(request).read()
1548                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1549                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1550                         return
1551
1552                 # Extract media URL from playlist XML
1553                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1554                 if mobj is None:
1555                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1556                         return
1557                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1558                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1559
1560                 try:
1561                         # Process video information
1562                         self._downloader.process_info({
1563                                 'id':           video_id.decode('utf-8'),
1564                                 'url':          video_url,
1565                                 'uploader':     video_uploader,
1566                                 'upload_date':  u'NA',
1567                                 'title':        video_title,
1568                                 'stitle':       simple_title,
1569                                 'ext':          video_extension.decode('utf-8'),
1570                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1571                                 'description':  video_description,
1572                                 'thumbnail':    video_thumbnail,
1573                                 'description':  video_description,
1574                                 'player_url':   None,
1575                         })
1576                 except UnavailableVideoError:
1577                         self._downloader.trouble(u'ERROR: unable to download video')
1578
1579
1580 class GenericIE(InfoExtractor):
1581         """Generic last-resort information extractor."""
1582
1583         def __init__(self, downloader=None):
1584                 InfoExtractor.__init__(self, downloader)
1585
1586         @staticmethod
1587         def suitable(url):
1588                 return True
1589
1590         def report_download_webpage(self, video_id):
1591                 """Report webpage download."""
1592                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1593                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1594
1595         def report_extraction(self, video_id):
1596                 """Report information extraction."""
1597                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1598
1599         def _real_initialize(self):
1600                 return
1601
1602         def _real_extract(self, url):
1603                 # At this point we have a new video
1604                 self._downloader.increment_downloads()
1605
1606                 video_id = url.split('/')[-1]
1607                 request = urllib2.Request(url)
1608                 try:
1609                         self.report_download_webpage(video_id)
1610                         webpage = urllib2.urlopen(request).read()
1611                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1612                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1613                         return
1614                 except ValueError, err:
1615                         # since this is the last-resort InfoExtractor, if
1616                         # this error is thrown, it'll be thrown here
1617                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1618                         return
1619
1620                 # Start with something easy: JW Player in SWFObject
1621                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1622                 if mobj is None:
1623                         # Broaden the search a little bit
1624                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1625                 if mobj is None:
1626                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1627                         return
1628
1629                 # It's possible that one of the regexes
1630                 # matched, but returned an empty group:
1631                 if mobj.group(1) is None:
1632                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1633                         return
1634
1635                 video_url = urllib.unquote(mobj.group(1))
1636                 video_id  = os.path.basename(video_url)
1637
1638                 # here's a fun little line of code for you:
1639                 video_extension = os.path.splitext(video_id)[1][1:]
1640                 video_id        = os.path.splitext(video_id)[0]
1641
1642                 # it's tempting to parse this further, but you would
1643                 # have to take into account all the variations like
1644                 #   Video Title - Site Name
1645                 #   Site Name | Video Title
1646                 #   Video Title - Tagline | Site Name
1647                 # and so on and so forth; it's just not practical
1648                 mobj = re.search(r'<title>(.*)</title>', webpage)
1649                 if mobj is None:
1650                         self._downloader.trouble(u'ERROR: unable to extract title')
1651                         return
1652                 video_title = mobj.group(1).decode('utf-8')
1653                 video_title = sanitize_title(video_title)
1654                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1655
1656                 # video uploader is domain name
1657                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1658                 if mobj is None:
1659                         self._downloader.trouble(u'ERROR: unable to extract title')
1660                         return
1661                 video_uploader = mobj.group(1).decode('utf-8')
1662
1663                 try:
1664                         # Process video information
1665                         self._downloader.process_info({
1666                                 'id':           video_id.decode('utf-8'),
1667                                 'url':          video_url.decode('utf-8'),
1668                                 'uploader':     video_uploader,
1669                                 'upload_date':  u'NA',
1670                                 'title':        video_title,
1671                                 'stitle':       simple_title,
1672                                 'ext':          video_extension.decode('utf-8'),
1673                                 'format':       u'NA',
1674                                 'player_url':   None,
1675                         })
1676                 except UnavailableVideoError, err:
1677                         self._downloader.trouble(u'ERROR: unable to download video')
1678
1679
1680 class YoutubeSearchIE(InfoExtractor):
1681         """Information Extractor for YouTube search queries."""
1682         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1683         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1684         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1685         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1686         _youtube_ie = None
1687         _max_youtube_results = 1000
1688
1689         def __init__(self, youtube_ie, downloader=None):
1690                 InfoExtractor.__init__(self, downloader)
1691                 self._youtube_ie = youtube_ie
1692         
1693         @staticmethod
1694         def suitable(url):
1695                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1696
1697         def report_download_page(self, query, pagenum):
1698                 """Report attempt to download playlist page with given number."""
1699                 query = query.decode(preferredencoding())
1700                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1701
1702         def _real_initialize(self):
1703                 self._youtube_ie.initialize()
1704         
1705         def _real_extract(self, query):
1706                 mobj = re.match(self._VALID_QUERY, query)
1707                 if mobj is None:
1708                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1709                         return
1710
1711                 prefix, query = query.split(':')
1712                 prefix = prefix[8:]
1713                 query  = query.encode('utf-8')
1714                 if prefix == '':
1715                         self._download_n_results(query, 1)
1716                         return
1717                 elif prefix == 'all':
1718                         self._download_n_results(query, self._max_youtube_results)
1719                         return
1720                 else:
1721                         try:
1722                                 n = long(prefix)
1723                                 if n <= 0:
1724                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1725                                         return
1726                                 elif n > self._max_youtube_results:
1727                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1728                                         n = self._max_youtube_results
1729                                 self._download_n_results(query, n)
1730                                 return
1731                         except ValueError: # parsing prefix as integer fails
1732                                 self._download_n_results(query, 1)
1733                                 return
1734
1735         def _download_n_results(self, query, n):
1736                 """Downloads a specified number of results for a query"""
1737
1738                 video_ids = []
1739                 already_seen = set()
1740                 pagenum = 1
1741
1742                 while True:
1743                         self.report_download_page(query, pagenum)
1744                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1745                         request = urllib2.Request(result_url, None, std_headers)
1746                         try:
1747                                 page = urllib2.urlopen(request).read()
1748                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1749                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1750                                 return
1751
1752                         # Extract video identifiers
1753                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1754                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1755                                 if video_id not in already_seen:
1756                                         video_ids.append(video_id)
1757                                         already_seen.add(video_id)
1758                                         if len(video_ids) == n:
1759                                                 # Specified n videos reached
1760                                                 for id in video_ids:
1761                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1762                                                 return
1763
1764                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1765                                 for id in video_ids:
1766                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1767                                 return
1768
1769                         pagenum = pagenum + 1
1770
1771 class GoogleSearchIE(InfoExtractor):
1772         """Information Extractor for Google Video search queries."""
1773         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1774         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1775         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1776         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1777         _google_ie = None
1778         _max_google_results = 1000
1779
1780         def __init__(self, google_ie, downloader=None):
1781                 InfoExtractor.__init__(self, downloader)
1782                 self._google_ie = google_ie
1783         
1784         @staticmethod
1785         def suitable(url):
1786                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1787
1788         def report_download_page(self, query, pagenum):
1789                 """Report attempt to download playlist page with given number."""
1790                 query = query.decode(preferredencoding())
1791                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1792
1793         def _real_initialize(self):
1794                 self._google_ie.initialize()
1795         
1796         def _real_extract(self, query):
1797                 mobj = re.match(self._VALID_QUERY, query)
1798                 if mobj is None:
1799                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1800                         return
1801
1802                 prefix, query = query.split(':')
1803                 prefix = prefix[8:]
1804                 query  = query.encode('utf-8')
1805                 if prefix == '':
1806                         self._download_n_results(query, 1)
1807                         return
1808                 elif prefix == 'all':
1809                         self._download_n_results(query, self._max_google_results)
1810                         return
1811                 else:
1812                         try:
1813                                 n = long(prefix)
1814                                 if n <= 0:
1815                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1816                                         return
1817                                 elif n > self._max_google_results:
1818                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1819                                         n = self._max_google_results
1820                                 self._download_n_results(query, n)
1821                                 return
1822                         except ValueError: # parsing prefix as integer fails
1823                                 self._download_n_results(query, 1)
1824                                 return
1825
1826         def _download_n_results(self, query, n):
1827                 """Downloads a specified number of results for a query"""
1828
1829                 video_ids = []
1830                 already_seen = set()
1831                 pagenum = 1
1832
1833                 while True:
1834                         self.report_download_page(query, pagenum)
1835                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1836                         request = urllib2.Request(result_url, None, std_headers)
1837                         try:
1838                                 page = urllib2.urlopen(request).read()
1839                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1840                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1841                                 return
1842
1843                         # Extract video identifiers
1844                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1845                                 video_id = mobj.group(1)
1846                                 if video_id not in already_seen:
1847                                         video_ids.append(video_id)
1848                                         already_seen.add(video_id)
1849                                         if len(video_ids) == n:
1850                                                 # Specified n videos reached
1851                                                 for id in video_ids:
1852                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1853                                                 return
1854
1855                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1856                                 for id in video_ids:
1857                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1858                                 return
1859
1860                         pagenum = pagenum + 1
1861
1862 class YahooSearchIE(InfoExtractor):
1863         """Information Extractor for Yahoo! Video search queries."""
1864         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1865         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1866         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1867         _MORE_PAGES_INDICATOR = r'\s*Next'
1868         _yahoo_ie = None
1869         _max_yahoo_results = 1000
1870
1871         def __init__(self, yahoo_ie, downloader=None):
1872                 InfoExtractor.__init__(self, downloader)
1873                 self._yahoo_ie = yahoo_ie
1874         
1875         @staticmethod
1876         def suitable(url):
1877                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1878
1879         def report_download_page(self, query, pagenum):
1880                 """Report attempt to download playlist page with given number."""
1881                 query = query.decode(preferredencoding())
1882                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1883
1884         def _real_initialize(self):
1885                 self._yahoo_ie.initialize()
1886         
1887         def _real_extract(self, query):
1888                 mobj = re.match(self._VALID_QUERY, query)
1889                 if mobj is None:
1890                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1891                         return
1892
1893                 prefix, query = query.split(':')
1894                 prefix = prefix[8:]
1895                 query  = query.encode('utf-8')
1896                 if prefix == '':
1897                         self._download_n_results(query, 1)
1898                         return
1899                 elif prefix == 'all':
1900                         self._download_n_results(query, self._max_yahoo_results)
1901                         return
1902                 else:
1903                         try:
1904                                 n = long(prefix)
1905                                 if n <= 0:
1906                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1907                                         return
1908                                 elif n > self._max_yahoo_results:
1909                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
1910                                         n = self._max_yahoo_results
1911                                 self._download_n_results(query, n)
1912                                 return
1913                         except ValueError: # parsing prefix as integer fails
1914                                 self._download_n_results(query, 1)
1915                                 return
1916
1917         def _download_n_results(self, query, n):
1918                 """Downloads a specified number of results for a query"""
1919
1920                 video_ids = []
1921                 already_seen = set()
1922                 pagenum = 1
1923
1924                 while True:
1925                         self.report_download_page(query, pagenum)
1926                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1927                         request = urllib2.Request(result_url, None, std_headers)
1928                         try:
1929                                 page = urllib2.urlopen(request).read()
1930                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1931                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1932                                 return
1933
1934                         # Extract video identifiers
1935                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1936                                 video_id = mobj.group(1)
1937                                 if video_id not in already_seen:
1938                                         video_ids.append(video_id)
1939                                         already_seen.add(video_id)
1940                                         if len(video_ids) == n:
1941                                                 # Specified n videos reached
1942                                                 for id in video_ids:
1943                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1944                                                 return
1945
1946                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1947                                 for id in video_ids:
1948                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1949                                 return
1950
1951                         pagenum = pagenum + 1
1952
1953 class YoutubePlaylistIE(InfoExtractor):
1954         """Information Extractor for YouTube playlists."""
1955
1956         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1957         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1958         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1959         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1960         _youtube_ie = None
1961
1962         def __init__(self, youtube_ie, downloader=None):
1963                 InfoExtractor.__init__(self, downloader)
1964                 self._youtube_ie = youtube_ie
1965         
1966         @staticmethod
1967         def suitable(url):
1968                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1969
1970         def report_download_page(self, playlist_id, pagenum):
1971                 """Report attempt to download playlist page with given number."""
1972                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1973
1974         def _real_initialize(self):
1975                 self._youtube_ie.initialize()
1976         
1977         def _real_extract(self, url):
1978                 # Extract playlist id
1979                 mobj = re.match(self._VALID_URL, url)
1980                 if mobj is None:
1981                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1982                         return
1983
1984                 # Download playlist pages
1985                 playlist_id = mobj.group(1)
1986                 video_ids = []
1987                 pagenum = 1
1988
1989                 while True:
1990                         self.report_download_page(playlist_id, pagenum)
1991                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1992                         try:
1993                                 page = urllib2.urlopen(request).read()
1994                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1995                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1996                                 return
1997
1998                         # Extract video identifiers
1999                         ids_in_page = []
2000                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2001                                 if mobj.group(1) not in ids_in_page:
2002                                         ids_in_page.append(mobj.group(1))
2003                         video_ids.extend(ids_in_page)
2004
2005                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2006                                 break
2007                         pagenum = pagenum + 1
2008
2009                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2010                 playlistend = self._downloader.params.get('playlistend', -1)
2011                 video_ids = video_ids[playliststart:playlistend]
2012
2013                 for id in video_ids:
2014                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2015                 return
2016
2017 class YoutubeUserIE(InfoExtractor):
2018         """Information Extractor for YouTube users."""
2019
2020         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2021         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2022         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2023         _youtube_ie = None
2024
2025         def __init__(self, youtube_ie, downloader=None):
2026                 InfoExtractor.__init__(self, downloader)
2027                 self._youtube_ie = youtube_ie
2028         
2029         @staticmethod
2030         def suitable(url):
2031                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2032
2033         def report_download_page(self, username):
2034                 """Report attempt to download user page."""
2035                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2036
2037         def _real_initialize(self):
2038                 self._youtube_ie.initialize()
2039         
2040         def _real_extract(self, url):
2041                 # Extract username
2042                 mobj = re.match(self._VALID_URL, url)
2043                 if mobj is None:
2044                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2045                         return
2046
2047                 # Download user page
2048                 username = mobj.group(1)
2049                 video_ids = []
2050                 pagenum = 1
2051
2052                 self.report_download_page(username)
2053                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2054                 try:
2055                         page = urllib2.urlopen(request).read()
2056                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2057                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2058                         return
2059
2060                 # Extract video identifiers
2061                 ids_in_page = []
2062
2063                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2064                         if mobj.group(1) not in ids_in_page:
2065                                 ids_in_page.append(mobj.group(1))
2066                 video_ids.extend(ids_in_page)
2067
2068                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2069                 playlistend = self._downloader.params.get('playlistend', -1)
2070                 video_ids = video_ids[playliststart:playlistend]
2071
2072                 for id in video_ids:
2073                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2074                 return
2075
2076 class PostProcessor(object):
2077         """Post Processor class.
2078
2079         PostProcessor objects can be added to downloaders with their
2080         add_post_processor() method. When the downloader has finished a
2081         successful download, it will take its internal chain of PostProcessors
2082         and start calling the run() method on each one of them, first with
2083         an initial argument and then with the returned value of the previous
2084         PostProcessor.
2085
2086         The chain will be stopped if one of them ever returns None or the end
2087         of the chain is reached.
2088
2089         PostProcessor objects follow a "mutual registration" process similar
2090         to InfoExtractor objects.
2091         """
2092
2093         _downloader = None
2094
2095         def __init__(self, downloader=None):
2096                 self._downloader = downloader
2097
2098         def set_downloader(self, downloader):
2099                 """Sets the downloader for this PP."""
2100                 self._downloader = downloader
2101         
2102         def run(self, information):
2103                 """Run the PostProcessor.
2104
2105                 The "information" argument is a dictionary like the ones
2106                 composed by InfoExtractors. The only difference is that this
2107                 one has an extra field called "filepath" that points to the
2108                 downloaded file.
2109
2110                 When this method returns None, the postprocessing chain is
2111                 stopped. However, this method may return an information
2112                 dictionary that will be passed to the next postprocessing
2113                 object in the chain. It can be the one it received after
2114                 changing some fields.
2115
2116                 In addition, this method may raise a PostProcessingError
2117                 exception that will be taken into account by the downloader
2118                 it was called from.
2119                 """
2120                 return information # by default, do nothing
2121         
2122 ### MAIN PROGRAM ###
2123 if __name__ == '__main__':
2124         try:
2125                 # Modules needed only when running the main program
2126                 import getpass
2127                 import optparse
2128
2129                 # Function to update the program file with the latest version from bitbucket.org
2130                 def update_self(downloader, filename):
2131                         # Note: downloader only used for options
2132                         if not os.access (filename, os.W_OK):
2133                                 sys.exit('ERROR: no write permissions on %s' % filename)
2134
2135                         downloader.to_screen('Updating to latest stable version...')
2136                         latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2137                         latest_version = urllib.urlopen(latest_url).read().strip()
2138                         prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2139                         newcontent = urllib.urlopen(prog_url).read()
2140                         stream = open(filename, 'w')
2141                         stream.write(newcontent)
2142                         stream.close()
2143                         downloader.to_screen('Updated to version %s' % latest_version)
2144
2145                 # Parse command line
2146                 parser = optparse.OptionParser(
2147                         usage='Usage: %prog [options] url...',
2148                         version='2010.11.19',
2149                         conflict_handler='resolve',
2150                 )
2151
2152                 parser.add_option('-h', '--help',
2153                                 action='help', help='print this help text and exit')
2154                 parser.add_option('-v', '--version',
2155                                 action='version', help='print program version and exit')
2156                 parser.add_option('-U', '--update',
2157                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2158                 parser.add_option('-i', '--ignore-errors',
2159                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2160                 parser.add_option('-r', '--rate-limit',
2161                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2162                 parser.add_option('-R', '--retries',
2163                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2164                 parser.add_option('--playlist-start',
2165                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2166                 parser.add_option('--playlist-end',
2167                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2168
2169                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2170                 authentication.add_option('-u', '--username',
2171                                 dest='username', metavar='USERNAME', help='account username')
2172                 authentication.add_option('-p', '--password',
2173                                 dest='password', metavar='PASSWORD', help='account password')
2174                 authentication.add_option('-n', '--netrc',
2175                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2176                 parser.add_option_group(authentication)
2177
2178                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2179                 video_format.add_option('-f', '--format',
2180                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2181                 video_format.add_option('-m', '--mobile-version',
2182                                 action='store_const', dest='format', help='alias for -f 17', const='17')
2183                 video_format.add_option('--all-formats',
2184                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2185                 video_format.add_option('--max-quality',
2186                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2187                 video_format.add_option('-b', '--best-quality',
2188                                 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2189                 parser.add_option_group(video_format)
2190
2191                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2192                 verbosity.add_option('-q', '--quiet',
2193                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2194                 verbosity.add_option('-s', '--simulate',
2195                                 action='store_true', dest='simulate', help='do not download video', default=False)
2196                 verbosity.add_option('-g', '--get-url',
2197                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2198                 verbosity.add_option('-e', '--get-title',
2199                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2200                 verbosity.add_option('--get-thumbnail',
2201                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2202                 verbosity.add_option('--get-description',
2203                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2204                 verbosity.add_option('--no-progress',
2205                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2206                 parser.add_option_group(verbosity)
2207
2208                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2209                 filesystem.add_option('-t', '--title',
2210                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2211                 filesystem.add_option('-l', '--literal',
2212                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2213                 filesystem.add_option('-A', '--auto-number',
2214                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2215                 filesystem.add_option('-o', '--output',
2216                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2217                 filesystem.add_option('-a', '--batch-file',
2218                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2219                 filesystem.add_option('-w', '--no-overwrites',
2220                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2221                 filesystem.add_option('-c', '--continue',
2222                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2223                 filesystem.add_option('--cookies',
2224                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2225                 parser.add_option_group(filesystem)
2226
2227                 (opts, args) = parser.parse_args()
2228
2229                 # Open appropriate CookieJar
2230                 if opts.cookiefile is None:
2231                         jar = cookielib.CookieJar()
2232                 else:
2233                         try:
2234                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2235                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2236                                         jar.load()
2237                         except (IOError, OSError), err:
2238                                 sys.exit(u'ERROR: unable to open cookie file')
2239
2240                 # General configuration
2241                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2242                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2243                 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2244                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2245
2246                 # Batch file verification
2247                 batchurls = []
2248                 if opts.batchfile is not None:
2249                         try:
2250                                 if opts.batchfile == '-':
2251                                         batchfd = sys.stdin
2252                                 else:
2253                                         batchfd = open(opts.batchfile, 'r')
2254                                 batchurls = batchfd.readlines()
2255                                 batchurls = [x.strip() for x in batchurls]
2256                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2257                         except IOError:
2258                                 sys.exit(u'ERROR: batch file could not be read')
2259                 all_urls = batchurls + args
2260
2261                 # Conflicting, missing and erroneous options
2262                 if opts.bestquality:
2263                         print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2264                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2265                         parser.error(u'using .netrc conflicts with giving username/password')
2266                 if opts.password is not None and opts.username is None:
2267                         parser.error(u'account username missing')
2268                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2269                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2270                 if opts.usetitle and opts.useliteral:
2271                         parser.error(u'using title conflicts with using literal title')
2272                 if opts.username is not None and opts.password is None:
2273                         opts.password = getpass.getpass(u'Type account password and press return:')
2274                 if opts.ratelimit is not None:
2275                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2276                         if numeric_limit is None:
2277                                 parser.error(u'invalid rate limit specified')
2278                         opts.ratelimit = numeric_limit
2279                 if opts.retries is not None:
2280                         try:
2281                                 opts.retries = long(opts.retries)
2282                         except (TypeError, ValueError), err:
2283                                 parser.error(u'invalid retry count specified')
2284                 try:
2285                         opts.playliststart = long(opts.playliststart)
2286                         if opts.playliststart <= 0:
2287                                 raise ValueError
2288                 except (TypeError, ValueError), err:
2289                         parser.error(u'invalid playlist start number specified')
2290                 try:
2291                         opts.playlistend = long(opts.playlistend)
2292                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2293                                 raise ValueError
2294                 except (TypeError, ValueError), err:
2295                         parser.error(u'invalid playlist end number specified')
2296
2297                 # Information extractors
2298                 youtube_ie = YoutubeIE()
2299                 metacafe_ie = MetacafeIE(youtube_ie)
2300                 dailymotion_ie = DailymotionIE()
2301                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2302                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2303                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2304                 google_ie = GoogleIE()
2305                 google_search_ie = GoogleSearchIE(google_ie)
2306                 photobucket_ie = PhotobucketIE()
2307                 yahoo_ie = YahooIE()
2308                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2309                 generic_ie = GenericIE()
2310
2311                 # File downloader
2312                 fd = FileDownloader({
2313                         'usenetrc': opts.usenetrc,
2314                         'username': opts.username,
2315                         'password': opts.password,
2316                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2317                         'forceurl': opts.geturl,
2318                         'forcetitle': opts.gettitle,
2319                         'forcethumbnail': opts.getthumbnail,
2320                         'forcedescription': opts.getdescription,
2321                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2322                         'format': opts.format,
2323                         'format_limit': opts.format_limit,
2324                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2325                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2326                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2327                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2328                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2329                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2330                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2331                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2332                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2333                                 or u'%(id)s.%(ext)s'),
2334                         'ignoreerrors': opts.ignoreerrors,
2335                         'ratelimit': opts.ratelimit,
2336                         'nooverwrites': opts.nooverwrites,
2337                         'retries': opts.retries,
2338                         'continuedl': opts.continue_dl,
2339                         'noprogress': opts.noprogress,
2340                         'playliststart': opts.playliststart,
2341                         'playlistend': opts.playlistend,
2342                         'logtostderr': opts.outtmpl == '-',
2343                         })
2344                 fd.add_info_extractor(youtube_search_ie)
2345                 fd.add_info_extractor(youtube_pl_ie)
2346                 fd.add_info_extractor(youtube_user_ie)
2347                 fd.add_info_extractor(metacafe_ie)
2348                 fd.add_info_extractor(dailymotion_ie)
2349                 fd.add_info_extractor(youtube_ie)
2350                 fd.add_info_extractor(google_ie)
2351                 fd.add_info_extractor(google_search_ie)
2352                 fd.add_info_extractor(photobucket_ie)
2353                 fd.add_info_extractor(yahoo_ie)
2354                 fd.add_info_extractor(yahoo_search_ie)
2355
2356                 # This must come last since it's the
2357                 # fallback if none of the others work
2358                 fd.add_info_extractor(generic_ie)
2359
2360                 # Update version
2361                 if opts.update_self:
2362                         update_self(fd, sys.argv[0])
2363
2364                 # Maybe do nothing
2365                 if len(all_urls) < 1:
2366                         if not opts.update_self:
2367                                 parser.error(u'you must provide at least one URL')
2368                         else:
2369                                 sys.exit()
2370                 retcode = fd.download(all_urls)
2371
2372                 # Dump cookie jar if requested
2373                 if opts.cookiefile is not None:
2374                         try:
2375                                 jar.save()
2376                         except (IOError, OSError), err:
2377                                 sys.exit(u'ERROR: unable to save cookie jar')
2378
2379                 sys.exit(retcode)
2380
2381         except DownloadError:
2382                 sys.exit(1)
2383         except SameFileError:
2384                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2385         except KeyboardInterrupt:
2386                 sys.exit(u'\nERROR: Interrupted by user')