Fix omission of Witold Baryluk as the Dailymotion InfoExtractor author
[youtube-dl] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # Author: Witold Baryluk
8 # License: Public domain code
9 import cookielib
10 import ctypes
11 import datetime
12 import gzip
13 import htmlentitydefs
14 import httplib
15 import locale
16 import math
17 import netrc
18 import os
19 import os.path
20 import re
21 import socket
22 import string
23 import StringIO
24 import subprocess
25 import sys
26 import time
27 import urllib
28 import urllib2
29 import zlib
30
31 # parse_qs was moved from the cgi module to the urlparse module recently.
32 try:
33         from urlparse import parse_qs
34 except ImportError:
35         from cgi import parse_qs
36
37 std_headers = {
38         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
39         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
40         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
41         'Accept-Encoding': 'gzip, deflate',
42         'Accept-Language': 'en-us,en;q=0.5',
43 }
44
45 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
46
47 def preferredencoding():
48         """Get preferred encoding.
49
50         Returns the best encoding scheme for the system, based on
51         locale.getpreferredencoding() and some further tweaks.
52         """
53         def yield_preferredencoding():
54                 try:
55                         pref = locale.getpreferredencoding()
56                         u'TEST'.encode(pref)
57                 except:
58                         pref = 'UTF-8'
59                 while True:
60                         yield pref
61         return yield_preferredencoding().next()
62
63 def htmlentity_transform(matchobj):
64         """Transforms an HTML entity to a Unicode character.
65
66         This function receives a match object and is intended to be used with
67         the re.sub() function.
68         """
69         entity = matchobj.group(1)
70
71         # Known non-numeric HTML entity
72         if entity in htmlentitydefs.name2codepoint:
73                 return unichr(htmlentitydefs.name2codepoint[entity])
74
75         # Unicode character
76         mobj = re.match(ur'(?u)#(x?\d+)', entity)
77         if mobj is not None:
78                 numstr = mobj.group(1)
79                 if numstr.startswith(u'x'):
80                         base = 16
81                         numstr = u'0%s' % numstr
82                 else:
83                         base = 10
84                 return unichr(long(numstr, base))
85
86         # Unknown entity in name, return its literal representation
87         return (u'&%s;' % entity)
88
89 def sanitize_title(utitle):
90         """Sanitizes a video title so it could be used as part of a filename."""
91         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
92         return utitle.replace(unicode(os.sep), u'%')
93
94 def sanitize_open(filename, open_mode):
95         """Try to open the given filename, and slightly tweak it if this fails.
96
97         Attempts to open the given filename. If this fails, it tries to change
98         the filename slightly, step by step, until it's either able to open it
99         or it fails and raises a final exception, like the standard open()
100         function.
101
102         It returns the tuple (stream, definitive_file_name).
103         """
104         try:
105                 if filename == u'-':
106                         if sys.platform == 'win32':
107                                 import msvcrt
108                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
109                         return (sys.stdout, filename)
110                 stream = open(filename, open_mode)
111                 return (stream, filename)
112         except (IOError, OSError), err:
113                 # In case of error, try to remove win32 forbidden chars
114                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
115
116                 # An exception here should be caught in the caller
117                 stream = open(filename, open_mode)
118                 return (stream, filename)
119
120 class DownloadError(Exception):
121         """Download Error exception.
122
123         This exception may be thrown by FileDownloader objects if they are not
124         configured to continue on errors. They will contain the appropriate
125         error message.
126         """
127         pass
128
129 class SameFileError(Exception):
130         """Same File exception.
131
132         This exception will be thrown by FileDownloader objects if they detect
133         multiple files would have to be downloaded to the same file on disk.
134         """
135         pass
136
137 class PostProcessingError(Exception):
138         """Post Processing exception.
139
140         This exception may be raised by PostProcessor's .run() method to
141         indicate an error in the postprocessing task.
142         """
143         pass
144
145 class UnavailableVideoError(Exception):
146         """Unavailable Format exception.
147
148         This exception will be thrown when a video is requested
149         in a format that is not available for that video.
150         """
151         pass
152
153 class ContentTooShortError(Exception):
154         """Content Too Short exception.
155
156         This exception may be raised by FileDownloader objects when a file they
157         download is too small for what the server announced first, indicating
158         the connection was probably interrupted.
159         """
160         # Both in bytes
161         downloaded = None
162         expected = None
163
164         def __init__(self, downloaded, expected):
165                 self.downloaded = downloaded
166                 self.expected = expected
167
168 class YoutubeDLHandler(urllib2.HTTPHandler):
169         """Handler for HTTP requests and responses.
170
171         This class, when installed with an OpenerDirector, automatically adds
172         the standard headers to every HTTP request and handles gzipped and
173         deflated responses from web servers. If compression is to be avoided in
174         a particular request, the original request in the program code only has
175         to include the HTTP header "Youtubedl-No-Compression", which will be
176         removed before making the real request.
177         
178         Part of this code was copied from:
179
180           http://techknack.net/python-urllib2-handlers/
181           
182         Andrew Rowls, the author of that code, agreed to release it to the
183         public domain.
184         """
185
186         @staticmethod
187         def deflate(data):
188                 try:
189                         return zlib.decompress(data, -zlib.MAX_WBITS)
190                 except zlib.error:
191                         return zlib.decompress(data)
192         
193         @staticmethod
194         def addinfourl_wrapper(stream, headers, url, code):
195                 if hasattr(urllib2.addinfourl, 'getcode'):
196                         return urllib2.addinfourl(stream, headers, url, code)
197                 ret = urllib2.addinfourl(stream, headers, url)
198                 ret.code = code
199                 return ret
200         
201         def http_request(self, req):
202                 for h in std_headers:
203                         if h in req.headers:
204                                 del req.headers[h]
205                         req.add_header(h, std_headers[h])
206                 if 'Youtubedl-no-compression' in req.headers:
207                         if 'Accept-encoding' in req.headers:
208                                 del req.headers['Accept-encoding']
209                         del req.headers['Youtubedl-no-compression']
210                 return req
211
212         def http_response(self, req, resp):
213                 old_resp = resp
214                 # gzip
215                 if resp.headers.get('Content-encoding', '') == 'gzip':
216                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
217                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
218                         resp.msg = old_resp.msg
219                 # deflate
220                 if resp.headers.get('Content-encoding', '') == 'deflate':
221                         gz = StringIO.StringIO(self.deflate(resp.read()))
222                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
223                         resp.msg = old_resp.msg
224                 return resp
225
226 class FileDownloader(object):
227         """File Downloader class.
228
229         File downloader objects are the ones responsible of downloading the
230         actual video file and writing it to disk if the user has requested
231         it, among some other tasks. In most cases there should be one per
232         program. As, given a video URL, the downloader doesn't know how to
233         extract all the needed information, task that InfoExtractors do, it
234         has to pass the URL to one of them.
235
236         For this, file downloader objects have a method that allows
237         InfoExtractors to be registered in a given order. When it is passed
238         a URL, the file downloader handles it to the first InfoExtractor it
239         finds that reports being able to handle it. The InfoExtractor extracts
240         all the information about the video or videos the URL refers to, and
241         asks the FileDownloader to process the video information, possibly
242         downloading the video.
243
244         File downloaders accept a lot of parameters. In order not to saturate
245         the object constructor with arguments, it receives a dictionary of
246         options instead. These options are available through the params
247         attribute for the InfoExtractors to use. The FileDownloader also
248         registers itself as the downloader in charge for the InfoExtractors
249         that are added to it, so this is a "mutual registration".
250
251         Available options:
252
253         username:         Username for authentication purposes.
254         password:         Password for authentication purposes.
255         usenetrc:         Use netrc for authentication instead.
256         quiet:            Do not print messages to stdout.
257         forceurl:         Force printing final URL.
258         forcetitle:       Force printing title.
259         forcethumbnail:   Force printing thumbnail URL.
260         forcedescription: Force printing description.
261         simulate:         Do not download the video files.
262         format:           Video format code.
263         format_limit:     Highest quality format to try.
264         outtmpl:          Template for output names.
265         ignoreerrors:     Do not stop on download errors.
266         ratelimit:        Download speed limit, in bytes/sec.
267         nooverwrites:     Prevent overwriting files.
268         retries:          Number of times to retry for HTTP error 5xx
269         continuedl:       Try to continue downloads if possible.
270         noprogress:       Do not print the progress bar.
271         playliststart:    Playlist item to start at.
272         playlistend:      Playlist item to end at.
273         logtostderr:      Log messages to stderr instead of stdout.
274         consoletitle:     Display progress in console window's titlebar.
275         nopart:           Do not use temporary .part files.
276         """
277
278         params = None
279         _ies = []
280         _pps = []
281         _download_retcode = None
282         _num_downloads = None
283         _screen_file = None
284
285         def __init__(self, params):
286                 """Create a FileDownloader object with the given options."""
287                 self._ies = []
288                 self._pps = []
289                 self._download_retcode = 0
290                 self._num_downloads = 0
291                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
292                 self.params = params
293
294         @staticmethod
295         def pmkdir(filename):
296                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
297                 components = filename.split(os.sep)
298                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
299                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
300                 for dir in aggregate:
301                         if not os.path.exists(dir):
302                                 os.mkdir(dir)
303
304         @staticmethod
305         def format_bytes(bytes):
306                 if bytes is None:
307                         return 'N/A'
308                 if type(bytes) is str:
309                         bytes = float(bytes)
310                 if bytes == 0.0:
311                         exponent = 0
312                 else:
313                         exponent = long(math.log(bytes, 1024.0))
314                 suffix = 'bkMGTPEZY'[exponent]
315                 converted = float(bytes) / float(1024**exponent)
316                 return '%.2f%s' % (converted, suffix)
317
318         @staticmethod
319         def calc_percent(byte_counter, data_len):
320                 if data_len is None:
321                         return '---.-%'
322                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
323
324         @staticmethod
325         def calc_eta(start, now, total, current):
326                 if total is None:
327                         return '--:--'
328                 dif = now - start
329                 if current == 0 or dif < 0.001: # One millisecond
330                         return '--:--'
331                 rate = float(current) / dif
332                 eta = long((float(total) - float(current)) / rate)
333                 (eta_mins, eta_secs) = divmod(eta, 60)
334                 if eta_mins > 99:
335                         return '--:--'
336                 return '%02d:%02d' % (eta_mins, eta_secs)
337
338         @staticmethod
339         def calc_speed(start, now, bytes):
340                 dif = now - start
341                 if bytes == 0 or dif < 0.001: # One millisecond
342                         return '%10s' % '---b/s'
343                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
344
345         @staticmethod
346         def best_block_size(elapsed_time, bytes):
347                 new_min = max(bytes / 2.0, 1.0)
348                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
349                 if elapsed_time < 0.001:
350                         return long(new_max)
351                 rate = bytes / elapsed_time
352                 if rate > new_max:
353                         return long(new_max)
354                 if rate < new_min:
355                         return long(new_min)
356                 return long(rate)
357
358         @staticmethod
359         def parse_bytes(bytestr):
360                 """Parse a string indicating a byte quantity into a long integer."""
361                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
362                 if matchobj is None:
363                         return None
364                 number = float(matchobj.group(1))
365                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
366                 return long(round(number * multiplier))
367
368         def add_info_extractor(self, ie):
369                 """Add an InfoExtractor object to the end of the list."""
370                 self._ies.append(ie)
371                 ie.set_downloader(self)
372
373         def add_post_processor(self, pp):
374                 """Add a PostProcessor object to the end of the chain."""
375                 self._pps.append(pp)
376                 pp.set_downloader(self)
377
378         def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
379                 """Print message to stdout if not in quiet mode."""
380                 try:
381                         if not self.params.get('quiet', False):
382                                 terminator = [u'\n', u''][skip_eol]
383                                 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
384                         self._screen_file.flush()
385                 except (UnicodeEncodeError), err:
386                         if not ignore_encoding_errors:
387                                 raise
388
389         def to_stderr(self, message):
390                 """Print message to stderr."""
391                 print >>sys.stderr, message.encode(preferredencoding())
392
393         def to_cons_title(self, message):
394                 """Set console/terminal window title to message."""
395                 if not self.params.get('consoletitle', False):
396                         return
397                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
398                         # c_wchar_p() might not be necessary if `message` is
399                         # already of type unicode()
400                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
401                 elif 'TERM' in os.environ:
402                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
403
404         def fixed_template(self):
405                 """Checks if the output template is fixed."""
406                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
407
408         def trouble(self, message=None):
409                 """Determine action to take when a download problem appears.
410
411                 Depending on if the downloader has been configured to ignore
412                 download errors or not, this method may throw an exception or
413                 not when errors are found, after printing the message.
414                 """
415                 if message is not None:
416                         self.to_stderr(message)
417                 if not self.params.get('ignoreerrors', False):
418                         raise DownloadError(message)
419                 self._download_retcode = 1
420
421         def slow_down(self, start_time, byte_counter):
422                 """Sleep if the download speed is over the rate limit."""
423                 rate_limit = self.params.get('ratelimit', None)
424                 if rate_limit is None or byte_counter == 0:
425                         return
426                 now = time.time()
427                 elapsed = now - start_time
428                 if elapsed <= 0.0:
429                         return
430                 speed = float(byte_counter) / elapsed
431                 if speed > rate_limit:
432                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
433
434         def temp_name(self, filename):
435                 """Returns a temporary filename for the given filename."""
436                 if self.params.get('nopart', False) or filename == u'-' or \
437                                 (os.path.exists(filename) and not os.path.isfile(filename)):
438                         return filename
439                 return filename + u'.part'
440
441         def undo_temp_name(self, filename):
442                 if filename.endswith(u'.part'):
443                         return filename[:-len(u'.part')]
444                 return filename
445
446         def try_rename(self, old_filename, new_filename):
447                 try:
448                         if old_filename == new_filename:
449                                 return
450                         os.rename(old_filename, new_filename)
451                 except (IOError, OSError), err:
452                         self.trouble(u'ERROR: unable to rename file')
453
454         def report_destination(self, filename):
455                 """Report destination filename."""
456                 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
457
458         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
459                 """Report download progress."""
460                 if self.params.get('noprogress', False):
461                         return
462                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
463                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
464                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
465                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
466
467         def report_resuming_byte(self, resume_len):
468                 """Report attempt to resume at given byte."""
469                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
470
471         def report_retry(self, count, retries):
472                 """Report retry in case of HTTP error 5xx"""
473                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
474
475         def report_file_already_downloaded(self, file_name):
476                 """Report file has already been fully downloaded."""
477                 try:
478                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
479                 except (UnicodeEncodeError), err:
480                         self.to_screen(u'[download] The file has already been downloaded')
481
482         def report_unable_to_resume(self):
483                 """Report it was impossible to resume download."""
484                 self.to_screen(u'[download] Unable to resume')
485
486         def report_finish(self):
487                 """Report download finished."""
488                 if self.params.get('noprogress', False):
489                         self.to_screen(u'[download] Download completed')
490                 else:
491                         self.to_screen(u'')
492
493         def increment_downloads(self):
494                 """Increment the ordinal that assigns a number to each file."""
495                 self._num_downloads += 1
496
497         def process_info(self, info_dict):
498                 """Process a single dictionary returned by an InfoExtractor."""
499                 # Do nothing else if in simulate mode
500                 if self.params.get('simulate', False):
501                         # Forced printings
502                         if self.params.get('forcetitle', False):
503                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
504                         if self.params.get('forceurl', False):
505                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
506                         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
507                                 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
508                         if self.params.get('forcedescription', False) and 'description' in info_dict:
509                                 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
510
511                         return
512
513                 try:
514                         template_dict = dict(info_dict)
515                         template_dict['epoch'] = unicode(long(time.time()))
516                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
517                         filename = self.params['outtmpl'] % template_dict
518                 except (ValueError, KeyError), err:
519                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
520                         return
521                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
522                         self.to_stderr(u'WARNING: file exists and will be skipped')
523                         return
524
525                 try:
526                         self.pmkdir(filename)
527                 except (OSError, IOError), err:
528                         self.trouble(u'ERROR: unable to create directories: %s' % str(err))
529                         return
530
531                 try:
532                         success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
533                 except (OSError, IOError), err:
534                         raise UnavailableVideoError
535                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
536                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
537                         return
538                 except (ContentTooShortError, ), err:
539                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
540                         return
541
542                 if success:
543                         try:
544                                 self.post_process(filename, info_dict)
545                         except (PostProcessingError), err:
546                                 self.trouble(u'ERROR: postprocessing: %s' % str(err))
547                                 return
548
549         def download(self, url_list):
550                 """Download a given list of URLs."""
551                 if len(url_list) > 1 and self.fixed_template():
552                         raise SameFileError(self.params['outtmpl'])
553
554                 for url in url_list:
555                         suitable_found = False
556                         for ie in self._ies:
557                                 # Go to next InfoExtractor if not suitable
558                                 if not ie.suitable(url):
559                                         continue
560
561                                 # Suitable InfoExtractor found
562                                 suitable_found = True
563
564                                 # Extract information from URL and process it
565                                 ie.extract(url)
566
567                                 # Suitable InfoExtractor had been found; go to next URL
568                                 break
569
570                         if not suitable_found:
571                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
572
573                 return self._download_retcode
574
575         def post_process(self, filename, ie_info):
576                 """Run the postprocessing chain on the given file."""
577                 info = dict(ie_info)
578                 info['filepath'] = filename
579                 for pp in self._pps:
580                         info = pp.run(info)
581                         if info is None:
582                                 break
583
584         def _download_with_rtmpdump(self, filename, url, player_url):
585                 self.report_destination(filename)
586                 tmpfilename = self.temp_name(filename)
587
588                 # Check for rtmpdump first
589                 try:
590                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
591                 except (OSError, IOError):
592                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
593                         return False
594
595                 # Download using rtmpdump. rtmpdump returns exit code 2 when
596                 # the connection was interrumpted and resuming appears to be
597                 # possible. This is part of rtmpdump's normal usage, AFAIK.
598                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
599                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
600                 while retval == 2 or retval == 1:
601                         prevsize = os.path.getsize(tmpfilename)
602                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
603                         time.sleep(5.0) # This seems to be needed
604                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
605                         cursize = os.path.getsize(tmpfilename)
606                         if prevsize == cursize and retval == 1:
607                                 break
608                 if retval == 0:
609                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
610                         self.try_rename(tmpfilename, filename)
611                         return True
612                 else:
613                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
614                         return False
615
616         def _do_download(self, filename, url, player_url):
617                 # Check file already present
618                 if self.params.get('continuedl', False) and os.path.isfile(filename) and not self.params.get('nopart', False):
619                         self.report_file_already_downloaded(filename)
620                         return True
621
622                 # Attempt to download using rtmpdump
623                 if url.startswith('rtmp'):
624                         return self._download_with_rtmpdump(filename, url, player_url)
625
626                 tmpfilename = self.temp_name(filename)
627                 stream = None
628                 open_mode = 'wb'
629
630                 # Do not include the Accept-Encoding header
631                 headers = {'Youtubedl-no-compression': 'True'}
632                 basic_request = urllib2.Request(url, None, headers)
633                 request = urllib2.Request(url, None, headers)
634
635                 # Establish possible resume length
636                 if os.path.isfile(tmpfilename):
637                         resume_len = os.path.getsize(tmpfilename)
638                 else:
639                         resume_len = 0
640
641                 # Request parameters in case of being able to resume
642                 if self.params.get('continuedl', False) and resume_len != 0:
643                         self.report_resuming_byte(resume_len)
644                         request.add_header('Range','bytes=%d-' % resume_len)
645                         open_mode = 'ab'
646
647                 count = 0
648                 retries = self.params.get('retries', 0)
649                 while count <= retries:
650                         # Establish connection
651                         try:
652                                 data = urllib2.urlopen(request)
653                                 break
654                         except (urllib2.HTTPError, ), err:
655                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
656                                         # Unexpected HTTP error
657                                         raise
658                                 elif err.code == 416:
659                                         # Unable to resume (requested range not satisfiable)
660                                         try:
661                                                 # Open the connection again without the range header
662                                                 data = urllib2.urlopen(basic_request)
663                                                 content_length = data.info()['Content-Length']
664                                         except (urllib2.HTTPError, ), err:
665                                                 if err.code < 500 or err.code >= 600:
666                                                         raise
667                                         else:
668                                                 # Examine the reported length
669                                                 if (content_length is not None and
670                                                     (resume_len - 100 < long(content_length) < resume_len + 100)):
671                                                         # The file had already been fully downloaded.
672                                                         # Explanation to the above condition: in issue #175 it was revealed that
673                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
674                                                         # changing the file size slightly and causing problems for some users. So
675                                                         # I decided to implement a suggested change and consider the file
676                                                         # completely downloaded if the file size differs less than 100 bytes from
677                                                         # the one in the hard drive.
678                                                         self.report_file_already_downloaded(filename)
679                                                         self.try_rename(tmpfilename, filename)
680                                                         return True
681                                                 else:
682                                                         # The length does not match, we start the download over
683                                                         self.report_unable_to_resume()
684                                                         open_mode = 'wb'
685                                                         break
686                         # Retry
687                         count += 1
688                         if count <= retries:
689                                 self.report_retry(count, retries)
690
691                 if count > retries:
692                         self.trouble(u'ERROR: giving up after %s retries' % retries)
693                         return False
694
695                 data_len = data.info().get('Content-length', None)
696                 if data_len is not None:
697                         data_len = long(data_len) + resume_len
698                 data_len_str = self.format_bytes(data_len)
699                 byte_counter = 0 + resume_len
700                 block_size = 1024
701                 start = time.time()
702                 while True:
703                         # Download and write
704                         before = time.time()
705                         data_block = data.read(block_size)
706                         after = time.time()
707                         if len(data_block) == 0:
708                                 break
709                         byte_counter += len(data_block)
710
711                         # Open file just in time
712                         if stream is None:
713                                 try:
714                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
715                                         filename = self.undo_temp_name(tmpfilename)
716                                         self.report_destination(filename)
717                                 except (OSError, IOError), err:
718                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
719                                         return False
720                         try:
721                                 stream.write(data_block)
722                         except (IOError, OSError), err:
723                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
724                                 return False
725                         block_size = self.best_block_size(after - before, len(data_block))
726
727                         # Progress message
728                         percent_str = self.calc_percent(byte_counter, data_len)
729                         eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
730                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
731                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
732
733                         # Apply rate limit
734                         self.slow_down(start, byte_counter - resume_len)
735
736                 stream.close()
737                 self.report_finish()
738                 if data_len is not None and byte_counter != data_len:
739                         raise ContentTooShortError(byte_counter, long(data_len))
740                 self.try_rename(tmpfilename, filename)
741                 return True
742
743 class InfoExtractor(object):
744         """Information Extractor class.
745
746         Information extractors are the classes that, given a URL, extract
747         information from the video (or videos) the URL refers to. This
748         information includes the real video URL, the video title and simplified
749         title, author and others. The information is stored in a dictionary
750         which is then passed to the FileDownloader. The FileDownloader
751         processes this information possibly downloading the video to the file
752         system, among other possible outcomes. The dictionaries must include
753         the following fields:
754
755         id:             Video identifier.
756         url:            Final video URL.
757         uploader:       Nickname of the video uploader.
758         title:          Literal title.
759         stitle:         Simplified title.
760         ext:            Video filename extension.
761         format:         Video format.
762         player_url:     SWF Player URL (may be None).
763
764         The following fields are optional. Their primary purpose is to allow
765         youtube-dl to serve as the backend for a video search function, such
766         as the one in youtube2mp3.  They are only used when their respective
767         forced printing functions are called:
768
769         thumbnail:      Full URL to a video thumbnail image.
770         description:    One-line video description.
771
772         Subclasses of this one should re-define the _real_initialize() and
773         _real_extract() methods, as well as the suitable() static method.
774         Probably, they should also be instantiated and added to the main
775         downloader.
776         """
777
778         _ready = False
779         _downloader = None
780
781         def __init__(self, downloader=None):
782                 """Constructor. Receives an optional downloader."""
783                 self._ready = False
784                 self.set_downloader(downloader)
785
786         @staticmethod
787         def suitable(url):
788                 """Receives a URL and returns True if suitable for this IE."""
789                 return False
790
791         def initialize(self):
792                 """Initializes an instance (authentication, etc)."""
793                 if not self._ready:
794                         self._real_initialize()
795                         self._ready = True
796
797         def extract(self, url):
798                 """Extracts URL information and returns it in list of dicts."""
799                 self.initialize()
800                 return self._real_extract(url)
801
802         def set_downloader(self, downloader):
803                 """Sets the downloader for this IE."""
804                 self._downloader = downloader
805
806         def _real_initialize(self):
807                 """Real initialization process. Redefine in subclasses."""
808                 pass
809
810         def _real_extract(self, url):
811                 """Real extraction process. Redefine in subclasses."""
812                 pass
813
814 class YoutubeIE(InfoExtractor):
815         """Information extractor for youtube.com."""
816
817         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
818         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
819         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
820         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
821         _NETRC_MACHINE = 'youtube'
822         # Listed in order of quality
823         _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
824         _video_extensions = {
825                 '13': '3gp',
826                 '17': 'mp4',
827                 '18': 'mp4',
828                 '22': 'mp4',
829                 '37': 'mp4',
830                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
831                 '43': 'webm',
832                 '45': 'webm',
833         }
834
835         @staticmethod
836         def suitable(url):
837                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
838
839         def report_lang(self):
840                 """Report attempt to set language."""
841                 self._downloader.to_screen(u'[youtube] Setting language')
842
843         def report_login(self):
844                 """Report attempt to log in."""
845                 self._downloader.to_screen(u'[youtube] Logging in')
846
847         def report_age_confirmation(self):
848                 """Report attempt to confirm age."""
849                 self._downloader.to_screen(u'[youtube] Confirming age')
850
851         def report_video_webpage_download(self, video_id):
852                 """Report attempt to download video webpage."""
853                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
854
855         def report_video_info_webpage_download(self, video_id):
856                 """Report attempt to download video info webpage."""
857                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
858
859         def report_information_extraction(self, video_id):
860                 """Report attempt to extract video information."""
861                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
862
863         def report_unavailable_format(self, video_id, format):
864                 """Report extracted video URL."""
865                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
866
867         def report_rtmp_download(self):
868                 """Indicate the download will use the RTMP protocol."""
869                 self._downloader.to_screen(u'[youtube] RTMP download detected')
870
871         def _real_initialize(self):
872                 if self._downloader is None:
873                         return
874
875                 username = None
876                 password = None
877                 downloader_params = self._downloader.params
878
879                 # Attempt to use provided username and password or .netrc data
880                 if downloader_params.get('username', None) is not None:
881                         username = downloader_params['username']
882                         password = downloader_params['password']
883                 elif downloader_params.get('usenetrc', False):
884                         try:
885                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
886                                 if info is not None:
887                                         username = info[0]
888                                         password = info[2]
889                                 else:
890                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
891                         except (IOError, netrc.NetrcParseError), err:
892                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
893                                 return
894
895                 # Set language
896                 request = urllib2.Request(self._LANG_URL)
897                 try:
898                         self.report_lang()
899                         urllib2.urlopen(request).read()
900                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
901                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
902                         return
903
904                 # No authentication to be performed
905                 if username is None:
906                         return
907
908                 # Log in
909                 login_form = {
910                                 'current_form': 'loginForm',
911                                 'next':         '/',
912                                 'action_login': 'Log In',
913                                 'username':     username,
914                                 'password':     password,
915                                 }
916                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
917                 try:
918                         self.report_login()
919                         login_results = urllib2.urlopen(request).read()
920                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
921                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
922                                 return
923                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
924                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
925                         return
926
927                 # Confirm age
928                 age_form = {
929                                 'next_url':             '/',
930                                 'action_confirm':       'Confirm',
931                                 }
932                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
933                 try:
934                         self.report_age_confirmation()
935                         age_results = urllib2.urlopen(request).read()
936                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
937                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
938                         return
939
940         def _real_extract(self, url):
941                 # Extract video id from URL
942                 mobj = re.match(self._VALID_URL, url)
943                 if mobj is None:
944                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
945                         return
946                 video_id = mobj.group(2)
947
948                 # Get video webpage
949                 self.report_video_webpage_download(video_id)
950                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id)
951                 try:
952                         video_webpage = urllib2.urlopen(request).read()
953                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
954                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
955                         return
956
957                 # Attempt to extract SWF player URL
958                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
959                 if mobj is not None:
960                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
961                 else:
962                         player_url = None
963
964                 # Get video info
965                 self.report_video_info_webpage_download(video_id)
966                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
967                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
968                                            % (video_id, el_type))
969                         request = urllib2.Request(video_info_url)
970                         try:
971                                 video_info_webpage = urllib2.urlopen(request).read()
972                                 video_info = parse_qs(video_info_webpage)
973                                 if 'token' in video_info:
974                                         break
975                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
976                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
977                                 return
978                 if 'token' not in video_info:
979                         if 'reason' in video_info:
980                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
981                         else:
982                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
983                         return
984
985                 # Start extracting information
986                 self.report_information_extraction(video_id)
987
988                 # uploader
989                 if 'author' not in video_info:
990                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
991                         return
992                 video_uploader = urllib.unquote_plus(video_info['author'][0])
993
994                 # title
995                 if 'title' not in video_info:
996                         self._downloader.trouble(u'ERROR: unable to extract video title')
997                         return
998                 video_title = urllib.unquote_plus(video_info['title'][0])
999                 video_title = video_title.decode('utf-8')
1000                 video_title = sanitize_title(video_title)
1001
1002                 # simplified title
1003                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1004                 simple_title = simple_title.strip(ur'_')
1005
1006                 # thumbnail image
1007                 if 'thumbnail_url' not in video_info:
1008                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1009                         video_thumbnail = ''
1010                 else:   # don't panic if we can't find it
1011                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1012
1013                 # upload date
1014                 upload_date = u'NA'
1015                 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
1016                 if mobj is not None:
1017                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1018                         format_expressions = ['%d %B %Y', '%B %d %Y']
1019                         for expression in format_expressions:
1020                                 try:
1021                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1022                                 except:
1023                                         pass
1024
1025                 # description
1026                 video_description = 'No description available.'
1027                 if self._downloader.params.get('forcedescription', False):
1028                         mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1029                         if mobj is not None:
1030                                 video_description = mobj.group(1)
1031
1032                 # token
1033                 video_token = urllib.unquote_plus(video_info['token'][0])
1034
1035                 # Decide which formats to download
1036                 req_format = self._downloader.params.get('format', None)
1037
1038                 if 'fmt_url_map' in video_info:
1039                         url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
1040                         format_limit = self._downloader.params.get('format_limit', None)
1041                         if format_limit is not None and format_limit in self._available_formats:
1042                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
1043                         else:
1044                                 format_list = self._available_formats
1045                         existing_formats = [x for x in format_list if x in url_map]
1046                         if len(existing_formats) == 0:
1047                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1048                                 return
1049                         if req_format is None:
1050                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1051                         elif req_format == '-1':
1052                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1053                         else:
1054                                 # Specific format
1055                                 if req_format not in url_map:
1056                                         self._downloader.trouble(u'ERROR: requested format not available')
1057                                         return
1058                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
1059
1060                 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1061                         self.report_rtmp_download()
1062                         video_url_list = [(None, video_info['conn'][0])]
1063
1064                 else:
1065                         self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
1066                         return
1067
1068                 for format_param, video_real_url in video_url_list:
1069                         # At this point we have a new video
1070                         self._downloader.increment_downloads()
1071
1072                         # Extension
1073                         video_extension = self._video_extensions.get(format_param, 'flv')
1074
1075                         # Find the video URL in fmt_url_map or conn paramters
1076                         try:
1077                                 # Process video information
1078                                 self._downloader.process_info({
1079                                         'id':           video_id.decode('utf-8'),
1080                                         'url':          video_real_url.decode('utf-8'),
1081                                         'uploader':     video_uploader.decode('utf-8'),
1082                                         'upload_date':  upload_date,
1083                                         'title':        video_title,
1084                                         'stitle':       simple_title,
1085                                         'ext':          video_extension.decode('utf-8'),
1086                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1087                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1088                                         'description':  video_description.decode('utf-8'),
1089                                         'player_url':   player_url,
1090                                 })
1091                         except UnavailableVideoError, err:
1092                                 self._downloader.trouble(u'\nERROR: unable to download video')
1093
1094
1095 class MetacafeIE(InfoExtractor):
1096         """Information Extractor for metacafe.com."""
1097
1098         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1099         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1100         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1101         _youtube_ie = None
1102
1103         def __init__(self, youtube_ie, downloader=None):
1104                 InfoExtractor.__init__(self, downloader)
1105                 self._youtube_ie = youtube_ie
1106
1107         @staticmethod
1108         def suitable(url):
1109                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1110
1111         def report_disclaimer(self):
1112                 """Report disclaimer retrieval."""
1113                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1114
1115         def report_age_confirmation(self):
1116                 """Report attempt to confirm age."""
1117                 self._downloader.to_screen(u'[metacafe] Confirming age')
1118
1119         def report_download_webpage(self, video_id):
1120                 """Report webpage download."""
1121                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1122
1123         def report_extraction(self, video_id):
1124                 """Report information extraction."""
1125                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1126
1127         def _real_initialize(self):
1128                 # Retrieve disclaimer
1129                 request = urllib2.Request(self._DISCLAIMER)
1130                 try:
1131                         self.report_disclaimer()
1132                         disclaimer = urllib2.urlopen(request).read()
1133                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1134                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1135                         return
1136
1137                 # Confirm age
1138                 disclaimer_form = {
1139                         'filters': '0',
1140                         'submit': "Continue - I'm over 18",
1141                         }
1142                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1143                 try:
1144                         self.report_age_confirmation()
1145                         disclaimer = urllib2.urlopen(request).read()
1146                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1147                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1148                         return
1149
1150         def _real_extract(self, url):
1151                 # Extract id and simplified title from URL
1152                 mobj = re.match(self._VALID_URL, url)
1153                 if mobj is None:
1154                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1155                         return
1156
1157                 video_id = mobj.group(1)
1158
1159                 # Check if video comes from YouTube
1160                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1161                 if mobj2 is not None:
1162                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1163                         return
1164
1165                 # At this point we have a new video
1166                 self._downloader.increment_downloads()
1167
1168                 simple_title = mobj.group(2).decode('utf-8')
1169
1170                 # Retrieve video webpage to extract further information
1171                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1172                 try:
1173                         self.report_download_webpage(video_id)
1174                         webpage = urllib2.urlopen(request).read()
1175                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1176                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1177                         return
1178
1179                 # Extract URL, uploader and title from webpage
1180                 self.report_extraction(video_id)
1181                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1182                 if mobj is not None:
1183                         mediaURL = urllib.unquote(mobj.group(1))
1184                         video_extension = mediaURL[-3:]
1185
1186                         # Extract gdaKey if available
1187                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1188                         if mobj is None:
1189                                 video_url = mediaURL
1190                         else:
1191                                 gdaKey = mobj.group(1)
1192                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1193                 else:
1194                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1195                         if mobj is None:
1196                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1197                                 return
1198                         vardict = parse_qs(mobj.group(1))
1199                         if 'mediaData' not in vardict:
1200                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1201                                 return
1202                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1203                         if mobj is None:
1204                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1205                                 return
1206                         mediaURL = mobj.group(1).replace('\\/', '/')
1207                         video_extension = mediaURL[-3:]
1208                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1209
1210                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1211                 if mobj is None:
1212                         self._downloader.trouble(u'ERROR: unable to extract title')
1213                         return
1214                 video_title = mobj.group(1).decode('utf-8')
1215                 video_title = sanitize_title(video_title)
1216
1217                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1218                 if mobj is None:
1219                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1220                         return
1221                 video_uploader = mobj.group(1)
1222
1223                 try:
1224                         # Process video information
1225                         self._downloader.process_info({
1226                                 'id':           video_id.decode('utf-8'),
1227                                 'url':          video_url.decode('utf-8'),
1228                                 'uploader':     video_uploader.decode('utf-8'),
1229                                 'upload_date':  u'NA',
1230                                 'title':        video_title,
1231                                 'stitle':       simple_title,
1232                                 'ext':          video_extension.decode('utf-8'),
1233                                 'format':       u'NA',
1234                                 'player_url':   None,
1235                         })
1236                 except UnavailableVideoError:
1237                         self._downloader.trouble(u'\nERROR: unable to download video')
1238
1239
1240 class DailymotionIE(InfoExtractor):
1241         """Information Extractor for Dailymotion"""
1242
1243         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1244
1245         def __init__(self, downloader=None):
1246                 InfoExtractor.__init__(self, downloader)
1247
1248         @staticmethod
1249         def suitable(url):
1250                 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1251
1252         def report_download_webpage(self, video_id):
1253                 """Report webpage download."""
1254                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1255
1256         def report_extraction(self, video_id):
1257                 """Report information extraction."""
1258                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1259
1260         def _real_initialize(self):
1261                 return
1262
1263         def _real_extract(self, url):
1264                 # Extract id and simplified title from URL
1265                 mobj = re.match(self._VALID_URL, url)
1266                 if mobj is None:
1267                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1268                         return
1269
1270                 # At this point we have a new video
1271                 self._downloader.increment_downloads()
1272                 video_id = mobj.group(1)
1273
1274                 simple_title = mobj.group(2).decode('utf-8')
1275                 video_extension = 'flv'
1276
1277                 # Retrieve video webpage to extract further information
1278                 request = urllib2.Request(url)
1279                 try:
1280                         self.report_download_webpage(video_id)
1281                         webpage = urllib2.urlopen(request).read()
1282                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1283                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1284                         return
1285
1286                 # Extract URL, uploader and title from webpage
1287                 self.report_extraction(video_id)
1288                 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1289                 if mobj is None:
1290                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1291                         return
1292                 mediaURL = urllib.unquote(mobj.group(1))
1293
1294                 # if needed add http://www.dailymotion.com/ if relative URL
1295
1296                 video_url = mediaURL
1297
1298                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1299                 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1300                 if mobj is None:
1301                         self._downloader.trouble(u'ERROR: unable to extract title')
1302                         return
1303                 video_title = mobj.group(1).decode('utf-8')
1304                 video_title = sanitize_title(video_title)
1305
1306                 mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
1307                 if mobj is None:
1308                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1309                         return
1310                 video_uploader = mobj.group(1)
1311
1312                 try:
1313                         # Process video information
1314                         self._downloader.process_info({
1315                                 'id':           video_id.decode('utf-8'),
1316                                 'url':          video_url.decode('utf-8'),
1317                                 'uploader':     video_uploader.decode('utf-8'),
1318                                 'upload_date':  u'NA',
1319                                 'title':        video_title,
1320                                 'stitle':       simple_title,
1321                                 'ext':          video_extension.decode('utf-8'),
1322                                 'format':       u'NA',
1323                                 'player_url':   None,
1324                         })
1325                 except UnavailableVideoError:
1326                         self._downloader.trouble(u'\nERROR: unable to download video')
1327
1328 class GoogleIE(InfoExtractor):
1329         """Information extractor for video.google.com."""
1330
1331         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1332
1333         def __init__(self, downloader=None):
1334                 InfoExtractor.__init__(self, downloader)
1335
1336         @staticmethod
1337         def suitable(url):
1338                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1339
1340         def report_download_webpage(self, video_id):
1341                 """Report webpage download."""
1342                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1343
1344         def report_extraction(self, video_id):
1345                 """Report information extraction."""
1346                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1347
1348         def _real_initialize(self):
1349                 return
1350
1351         def _real_extract(self, url):
1352                 # Extract id from URL
1353                 mobj = re.match(self._VALID_URL, url)
1354                 if mobj is None:
1355                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1356                         return
1357
1358                 # At this point we have a new video
1359                 self._downloader.increment_downloads()
1360                 video_id = mobj.group(1)
1361
1362                 video_extension = 'mp4'
1363
1364                 # Retrieve video webpage to extract further information
1365                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1366                 try:
1367                         self.report_download_webpage(video_id)
1368                         webpage = urllib2.urlopen(request).read()
1369                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1370                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1371                         return
1372
1373                 # Extract URL, uploader, and title from webpage
1374                 self.report_extraction(video_id)
1375                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1376                 if mobj is None:
1377                         video_extension = 'flv'
1378                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1379                 if mobj is None:
1380                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1381                         return
1382                 mediaURL = urllib.unquote(mobj.group(1))
1383                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1384                 mediaURL = mediaURL.replace('\\x26', '\x26')
1385
1386                 video_url = mediaURL
1387
1388                 mobj = re.search(r'<title>(.*)</title>', webpage)
1389                 if mobj is None:
1390                         self._downloader.trouble(u'ERROR: unable to extract title')
1391                         return
1392                 video_title = mobj.group(1).decode('utf-8')
1393                 video_title = sanitize_title(video_title)
1394                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1395
1396                 # Extract video description
1397                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1398                 if mobj is None:
1399                         self._downloader.trouble(u'ERROR: unable to extract video description')
1400                         return
1401                 video_description = mobj.group(1).decode('utf-8')
1402                 if not video_description:
1403                         video_description = 'No description available.'
1404
1405                 # Extract video thumbnail
1406                 if self._downloader.params.get('forcethumbnail', False):
1407                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1408                         try:
1409                                 webpage = urllib2.urlopen(request).read()
1410                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1411                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1412                                 return
1413                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1414                         if mobj is None:
1415                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1416                                 return
1417                         video_thumbnail = mobj.group(1)
1418                 else:   # we need something to pass to process_info
1419                         video_thumbnail = ''
1420
1421
1422                 try:
1423                         # Process video information
1424                         self._downloader.process_info({
1425                                 'id':           video_id.decode('utf-8'),
1426                                 'url':          video_url.decode('utf-8'),
1427                                 'uploader':     u'NA',
1428                                 'upload_date':  u'NA',
1429                                 'title':        video_title,
1430                                 'stitle':       simple_title,
1431                                 'ext':          video_extension.decode('utf-8'),
1432                                 'format':       u'NA',
1433                                 'player_url':   None,
1434                         })
1435                 except UnavailableVideoError:
1436                         self._downloader.trouble(u'\nERROR: unable to download video')
1437
1438
1439 class PhotobucketIE(InfoExtractor):
1440         """Information extractor for photobucket.com."""
1441
1442         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1443
1444         def __init__(self, downloader=None):
1445                 InfoExtractor.__init__(self, downloader)
1446
1447         @staticmethod
1448         def suitable(url):
1449                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1450
1451         def report_download_webpage(self, video_id):
1452                 """Report webpage download."""
1453                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1454
1455         def report_extraction(self, video_id):
1456                 """Report information extraction."""
1457                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1458
1459         def _real_initialize(self):
1460                 return
1461
1462         def _real_extract(self, url):
1463                 # Extract id from URL
1464                 mobj = re.match(self._VALID_URL, url)
1465                 if mobj is None:
1466                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1467                         return
1468
1469                 # At this point we have a new video
1470                 self._downloader.increment_downloads()
1471                 video_id = mobj.group(1)
1472
1473                 video_extension = 'flv'
1474
1475                 # Retrieve video webpage to extract further information
1476                 request = urllib2.Request(url)
1477                 try:
1478                         self.report_download_webpage(video_id)
1479                         webpage = urllib2.urlopen(request).read()
1480                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1481                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1482                         return
1483
1484                 # Extract URL, uploader, and title from webpage
1485                 self.report_extraction(video_id)
1486                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1487                 if mobj is None:
1488                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1489                         return
1490                 mediaURL = urllib.unquote(mobj.group(1))
1491
1492                 video_url = mediaURL
1493
1494                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1495                 if mobj is None:
1496                         self._downloader.trouble(u'ERROR: unable to extract title')
1497                         return
1498                 video_title = mobj.group(1).decode('utf-8')
1499                 video_title = sanitize_title(video_title)
1500                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1501
1502                 video_uploader = mobj.group(2).decode('utf-8')
1503
1504                 try:
1505                         # Process video information
1506                         self._downloader.process_info({
1507                                 'id':           video_id.decode('utf-8'),
1508                                 'url':          video_url.decode('utf-8'),
1509                                 'uploader':     video_uploader,
1510                                 'upload_date':  u'NA',
1511                                 'title':        video_title,
1512                                 'stitle':       simple_title,
1513                                 'ext':          video_extension.decode('utf-8'),
1514                                 'format':       u'NA',
1515                                 'player_url':   None,
1516                         })
1517                 except UnavailableVideoError:
1518                         self._downloader.trouble(u'\nERROR: unable to download video')
1519
1520
1521 class YahooIE(InfoExtractor):
1522         """Information extractor for video.yahoo.com."""
1523
1524         # _VALID_URL matches all Yahoo! Video URLs
1525         # _VPAGE_URL matches only the extractable '/watch/' URLs
1526         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1527         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1528
1529         def __init__(self, downloader=None):
1530                 InfoExtractor.__init__(self, downloader)
1531
1532         @staticmethod
1533         def suitable(url):
1534                 return (re.match(YahooIE._VALID_URL, url) is not None)
1535
1536         def report_download_webpage(self, video_id):
1537                 """Report webpage download."""
1538                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1539
1540         def report_extraction(self, video_id):
1541                 """Report information extraction."""
1542                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1543
1544         def _real_initialize(self):
1545                 return
1546
1547         def _real_extract(self, url, new_video=True):
1548                 # Extract ID from URL
1549                 mobj = re.match(self._VALID_URL, url)
1550                 if mobj is None:
1551                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1552                         return
1553
1554                 # At this point we have a new video
1555                 self._downloader.increment_downloads()
1556                 video_id = mobj.group(2)
1557                 video_extension = 'flv'
1558
1559                 # Rewrite valid but non-extractable URLs as
1560                 # extractable English language /watch/ URLs
1561                 if re.match(self._VPAGE_URL, url) is None:
1562                         request = urllib2.Request(url)
1563                         try:
1564                                 webpage = urllib2.urlopen(request).read()
1565                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1566                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1567                                 return
1568
1569                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1570                         if mobj is None:
1571                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1572                                 return
1573                         yahoo_id = mobj.group(1)
1574
1575                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1576                         if mobj is None:
1577                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1578                                 return
1579                         yahoo_vid = mobj.group(1)
1580
1581                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1582                         return self._real_extract(url, new_video=False)
1583
1584                 # Retrieve video webpage to extract further information
1585                 request = urllib2.Request(url)
1586                 try:
1587                         self.report_download_webpage(video_id)
1588                         webpage = urllib2.urlopen(request).read()
1589                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1590                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1591                         return
1592
1593                 # Extract uploader and title from webpage
1594                 self.report_extraction(video_id)
1595                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1596                 if mobj is None:
1597                         self._downloader.trouble(u'ERROR: unable to extract video title')
1598                         return
1599                 video_title = mobj.group(1).decode('utf-8')
1600                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1601
1602                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1603                 if mobj is None:
1604                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1605                         return
1606                 video_uploader = mobj.group(1).decode('utf-8')
1607
1608                 # Extract video thumbnail
1609                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1610                 if mobj is None:
1611                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1612                         return
1613                 video_thumbnail = mobj.group(1).decode('utf-8')
1614
1615                 # Extract video description
1616                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1617                 if mobj is None:
1618                         self._downloader.trouble(u'ERROR: unable to extract video description')
1619                         return
1620                 video_description = mobj.group(1).decode('utf-8')
1621                 if not video_description: video_description = 'No description available.'
1622
1623                 # Extract video height and width
1624                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1625                 if mobj is None:
1626                         self._downloader.trouble(u'ERROR: unable to extract video height')
1627                         return
1628                 yv_video_height = mobj.group(1)
1629
1630                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1631                 if mobj is None:
1632                         self._downloader.trouble(u'ERROR: unable to extract video width')
1633                         return
1634                 yv_video_width = mobj.group(1)
1635
1636                 # Retrieve video playlist to extract media URL
1637                 # I'm not completely sure what all these options are, but we
1638                 # seem to need most of them, otherwise the server sends a 401.
1639                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1640                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1641                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1642                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1643                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1644                 try:
1645                         self.report_download_webpage(video_id)
1646                         webpage = urllib2.urlopen(request).read()
1647                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1648                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1649                         return
1650
1651                 # Extract media URL from playlist XML
1652                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1653                 if mobj is None:
1654                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1655                         return
1656                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1657                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1658
1659                 try:
1660                         # Process video information
1661                         self._downloader.process_info({
1662                                 'id':           video_id.decode('utf-8'),
1663                                 'url':          video_url,
1664                                 'uploader':     video_uploader,
1665                                 'upload_date':  u'NA',
1666                                 'title':        video_title,
1667                                 'stitle':       simple_title,
1668                                 'ext':          video_extension.decode('utf-8'),
1669                                 'thumbnail':    video_thumbnail.decode('utf-8'),
1670                                 'description':  video_description,
1671                                 'thumbnail':    video_thumbnail,
1672                                 'description':  video_description,
1673                                 'player_url':   None,
1674                         })
1675                 except UnavailableVideoError:
1676                         self._downloader.trouble(u'\nERROR: unable to download video')
1677
1678
1679 class GenericIE(InfoExtractor):
1680         """Generic last-resort information extractor."""
1681
1682         def __init__(self, downloader=None):
1683                 InfoExtractor.__init__(self, downloader)
1684
1685         @staticmethod
1686         def suitable(url):
1687                 return True
1688
1689         def report_download_webpage(self, video_id):
1690                 """Report webpage download."""
1691                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1692                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1693
1694         def report_extraction(self, video_id):
1695                 """Report information extraction."""
1696                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1697
1698         def _real_initialize(self):
1699                 return
1700
1701         def _real_extract(self, url):
1702                 # At this point we have a new video
1703                 self._downloader.increment_downloads()
1704
1705                 video_id = url.split('/')[-1]
1706                 request = urllib2.Request(url)
1707                 try:
1708                         self.report_download_webpage(video_id)
1709                         webpage = urllib2.urlopen(request).read()
1710                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1711                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1712                         return
1713                 except ValueError, err:
1714                         # since this is the last-resort InfoExtractor, if
1715                         # this error is thrown, it'll be thrown here
1716                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1717                         return
1718
1719                 self.report_extraction(video_id)
1720                 # Start with something easy: JW Player in SWFObject
1721                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1722                 if mobj is None:
1723                         # Broaden the search a little bit
1724                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1725                 if mobj is None:
1726                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1727                         return
1728
1729                 # It's possible that one of the regexes
1730                 # matched, but returned an empty group:
1731                 if mobj.group(1) is None:
1732                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1733                         return
1734
1735                 video_url = urllib.unquote(mobj.group(1))
1736                 video_id  = os.path.basename(video_url)
1737
1738                 # here's a fun little line of code for you:
1739                 video_extension = os.path.splitext(video_id)[1][1:]
1740                 video_id        = os.path.splitext(video_id)[0]
1741
1742                 # it's tempting to parse this further, but you would
1743                 # have to take into account all the variations like
1744                 #   Video Title - Site Name
1745                 #   Site Name | Video Title
1746                 #   Video Title - Tagline | Site Name
1747                 # and so on and so forth; it's just not practical
1748                 mobj = re.search(r'<title>(.*)</title>', webpage)
1749                 if mobj is None:
1750                         self._downloader.trouble(u'ERROR: unable to extract title')
1751                         return
1752                 video_title = mobj.group(1).decode('utf-8')
1753                 video_title = sanitize_title(video_title)
1754                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1755
1756                 # video uploader is domain name
1757                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1758                 if mobj is None:
1759                         self._downloader.trouble(u'ERROR: unable to extract title')
1760                         return
1761                 video_uploader = mobj.group(1).decode('utf-8')
1762
1763                 try:
1764                         # Process video information
1765                         self._downloader.process_info({
1766                                 'id':           video_id.decode('utf-8'),
1767                                 'url':          video_url.decode('utf-8'),
1768                                 'uploader':     video_uploader,
1769                                 'upload_date':  u'NA',
1770                                 'title':        video_title,
1771                                 'stitle':       simple_title,
1772                                 'ext':          video_extension.decode('utf-8'),
1773                                 'format':       u'NA',
1774                                 'player_url':   None,
1775                         })
1776                 except UnavailableVideoError, err:
1777                         self._downloader.trouble(u'\nERROR: unable to download video')
1778
1779
1780 class YoutubeSearchIE(InfoExtractor):
1781         """Information Extractor for YouTube search queries."""
1782         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1783         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1784         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1785         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1786         _youtube_ie = None
1787         _max_youtube_results = 1000
1788
1789         def __init__(self, youtube_ie, downloader=None):
1790                 InfoExtractor.__init__(self, downloader)
1791                 self._youtube_ie = youtube_ie
1792
1793         @staticmethod
1794         def suitable(url):
1795                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1796
1797         def report_download_page(self, query, pagenum):
1798                 """Report attempt to download playlist page with given number."""
1799                 query = query.decode(preferredencoding())
1800                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1801
1802         def _real_initialize(self):
1803                 self._youtube_ie.initialize()
1804
1805         def _real_extract(self, query):
1806                 mobj = re.match(self._VALID_QUERY, query)
1807                 if mobj is None:
1808                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1809                         return
1810
1811                 prefix, query = query.split(':')
1812                 prefix = prefix[8:]
1813                 query  = query.encode('utf-8')
1814                 if prefix == '':
1815                         self._download_n_results(query, 1)
1816                         return
1817                 elif prefix == 'all':
1818                         self._download_n_results(query, self._max_youtube_results)
1819                         return
1820                 else:
1821                         try:
1822                                 n = long(prefix)
1823                                 if n <= 0:
1824                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1825                                         return
1826                                 elif n > self._max_youtube_results:
1827                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1828                                         n = self._max_youtube_results
1829                                 self._download_n_results(query, n)
1830                                 return
1831                         except ValueError: # parsing prefix as integer fails
1832                                 self._download_n_results(query, 1)
1833                                 return
1834
1835         def _download_n_results(self, query, n):
1836                 """Downloads a specified number of results for a query"""
1837
1838                 video_ids = []
1839                 already_seen = set()
1840                 pagenum = 1
1841
1842                 while True:
1843                         self.report_download_page(query, pagenum)
1844                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1845                         request = urllib2.Request(result_url)
1846                         try:
1847                                 page = urllib2.urlopen(request).read()
1848                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1849                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1850                                 return
1851
1852                         # Extract video identifiers
1853                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1854                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1855                                 if video_id not in already_seen:
1856                                         video_ids.append(video_id)
1857                                         already_seen.add(video_id)
1858                                         if len(video_ids) == n:
1859                                                 # Specified n videos reached
1860                                                 for id in video_ids:
1861                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1862                                                 return
1863
1864                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1865                                 for id in video_ids:
1866                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1867                                 return
1868
1869                         pagenum = pagenum + 1
1870
1871 class GoogleSearchIE(InfoExtractor):
1872         """Information Extractor for Google Video search queries."""
1873         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1874         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1875         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1876         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1877         _google_ie = None
1878         _max_google_results = 1000
1879
1880         def __init__(self, google_ie, downloader=None):
1881                 InfoExtractor.__init__(self, downloader)
1882                 self._google_ie = google_ie
1883
1884         @staticmethod
1885         def suitable(url):
1886                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1887
1888         def report_download_page(self, query, pagenum):
1889                 """Report attempt to download playlist page with given number."""
1890                 query = query.decode(preferredencoding())
1891                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1892
1893         def _real_initialize(self):
1894                 self._google_ie.initialize()
1895
1896         def _real_extract(self, query):
1897                 mobj = re.match(self._VALID_QUERY, query)
1898                 if mobj is None:
1899                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1900                         return
1901
1902                 prefix, query = query.split(':')
1903                 prefix = prefix[8:]
1904                 query  = query.encode('utf-8')
1905                 if prefix == '':
1906                         self._download_n_results(query, 1)
1907                         return
1908                 elif prefix == 'all':
1909                         self._download_n_results(query, self._max_google_results)
1910                         return
1911                 else:
1912                         try:
1913                                 n = long(prefix)
1914                                 if n <= 0:
1915                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1916                                         return
1917                                 elif n > self._max_google_results:
1918                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n))
1919                                         n = self._max_google_results
1920                                 self._download_n_results(query, n)
1921                                 return
1922                         except ValueError: # parsing prefix as integer fails
1923                                 self._download_n_results(query, 1)
1924                                 return
1925
1926         def _download_n_results(self, query, n):
1927                 """Downloads a specified number of results for a query"""
1928
1929                 video_ids = []
1930                 already_seen = set()
1931                 pagenum = 1
1932
1933                 while True:
1934                         self.report_download_page(query, pagenum)
1935                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1936                         request = urllib2.Request(result_url)
1937                         try:
1938                                 page = urllib2.urlopen(request).read()
1939                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1940                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1941                                 return
1942
1943                         # Extract video identifiers
1944                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1945                                 video_id = mobj.group(1)
1946                                 if video_id not in already_seen:
1947                                         video_ids.append(video_id)
1948                                         already_seen.add(video_id)
1949                                         if len(video_ids) == n:
1950                                                 # Specified n videos reached
1951                                                 for id in video_ids:
1952                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1953                                                 return
1954
1955                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1956                                 for id in video_ids:
1957                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1958                                 return
1959
1960                         pagenum = pagenum + 1
1961
1962 class YahooSearchIE(InfoExtractor):
1963         """Information Extractor for Yahoo! Video search queries."""
1964         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1965         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1966         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1967         _MORE_PAGES_INDICATOR = r'\s*Next'
1968         _yahoo_ie = None
1969         _max_yahoo_results = 1000
1970
1971         def __init__(self, yahoo_ie, downloader=None):
1972                 InfoExtractor.__init__(self, downloader)
1973                 self._yahoo_ie = yahoo_ie
1974
1975         @staticmethod
1976         def suitable(url):
1977                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1978
1979         def report_download_page(self, query, pagenum):
1980                 """Report attempt to download playlist page with given number."""
1981                 query = query.decode(preferredencoding())
1982                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1983
1984         def _real_initialize(self):
1985                 self._yahoo_ie.initialize()
1986
1987         def _real_extract(self, query):
1988                 mobj = re.match(self._VALID_QUERY, query)
1989                 if mobj is None:
1990                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1991                         return
1992
1993                 prefix, query = query.split(':')
1994                 prefix = prefix[8:]
1995                 query  = query.encode('utf-8')
1996                 if prefix == '':
1997                         self._download_n_results(query, 1)
1998                         return
1999                 elif prefix == 'all':
2000                         self._download_n_results(query, self._max_yahoo_results)
2001                         return
2002                 else:
2003                         try:
2004                                 n = long(prefix)
2005                                 if n <= 0:
2006                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2007                                         return
2008                                 elif n > self._max_yahoo_results:
2009                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n))
2010                                         n = self._max_yahoo_results
2011                                 self._download_n_results(query, n)
2012                                 return
2013                         except ValueError: # parsing prefix as integer fails
2014                                 self._download_n_results(query, 1)
2015                                 return
2016
2017         def _download_n_results(self, query, n):
2018                 """Downloads a specified number of results for a query"""
2019
2020                 video_ids = []
2021                 already_seen = set()
2022                 pagenum = 1
2023
2024                 while True:
2025                         self.report_download_page(query, pagenum)
2026                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2027                         request = urllib2.Request(result_url)
2028                         try:
2029                                 page = urllib2.urlopen(request).read()
2030                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2031                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2032                                 return
2033
2034                         # Extract video identifiers
2035                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2036                                 video_id = mobj.group(1)
2037                                 if video_id not in already_seen:
2038                                         video_ids.append(video_id)
2039                                         already_seen.add(video_id)
2040                                         if len(video_ids) == n:
2041                                                 # Specified n videos reached
2042                                                 for id in video_ids:
2043                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2044                                                 return
2045
2046                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2047                                 for id in video_ids:
2048                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2049                                 return
2050
2051                         pagenum = pagenum + 1
2052
2053 class YoutubePlaylistIE(InfoExtractor):
2054         """Information Extractor for YouTube playlists."""
2055
2056         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*'
2057         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
2058         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2059         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2060         _youtube_ie = None
2061
2062         def __init__(self, youtube_ie, downloader=None):
2063                 InfoExtractor.__init__(self, downloader)
2064                 self._youtube_ie = youtube_ie
2065
2066         @staticmethod
2067         def suitable(url):
2068                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
2069
2070         def report_download_page(self, playlist_id, pagenum):
2071                 """Report attempt to download playlist page with given number."""
2072                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2073
2074         def _real_initialize(self):
2075                 self._youtube_ie.initialize()
2076
2077         def _real_extract(self, url):
2078                 # Extract playlist id
2079                 mobj = re.match(self._VALID_URL, url)
2080                 if mobj is None:
2081                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2082                         return
2083
2084                 # Download playlist pages
2085                 playlist_id = mobj.group(1)
2086                 video_ids = []
2087                 pagenum = 1
2088
2089                 while True:
2090                         self.report_download_page(playlist_id, pagenum)
2091                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum))
2092                         try:
2093                                 page = urllib2.urlopen(request).read()
2094                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2095                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2096                                 return
2097
2098                         # Extract video identifiers
2099                         ids_in_page = []
2100                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2101                                 if mobj.group(1) not in ids_in_page:
2102                                         ids_in_page.append(mobj.group(1))
2103                         video_ids.extend(ids_in_page)
2104
2105                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2106                                 break
2107                         pagenum = pagenum + 1
2108
2109                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2110                 playlistend = self._downloader.params.get('playlistend', -1)
2111                 video_ids = video_ids[playliststart:playlistend]
2112
2113                 for id in video_ids:
2114                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2115                 return
2116
2117 class YoutubeUserIE(InfoExtractor):
2118         """Information Extractor for YouTube users."""
2119
2120         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2121         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2122         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2123         _youtube_ie = None
2124
2125         def __init__(self, youtube_ie, downloader=None):
2126                 InfoExtractor.__init__(self, downloader)
2127                 self._youtube_ie = youtube_ie
2128
2129         @staticmethod
2130         def suitable(url):
2131                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2132
2133         def report_download_page(self, username):
2134                 """Report attempt to download user page."""
2135                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2136
2137         def _real_initialize(self):
2138                 self._youtube_ie.initialize()
2139
2140         def _real_extract(self, url):
2141                 # Extract username
2142                 mobj = re.match(self._VALID_URL, url)
2143                 if mobj is None:
2144                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2145                         return
2146
2147                 # Download user page
2148                 username = mobj.group(1)
2149                 video_ids = []
2150                 pagenum = 1
2151
2152                 self.report_download_page(username)
2153                 request = urllib2.Request(self._TEMPLATE_URL % (username))
2154                 try:
2155                         page = urllib2.urlopen(request).read()
2156                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2157                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2158                         return
2159
2160                 # Extract video identifiers
2161                 ids_in_page = []
2162
2163                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2164                         if mobj.group(1) not in ids_in_page:
2165                                 ids_in_page.append(mobj.group(1))
2166                 video_ids.extend(ids_in_page)
2167
2168                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2169                 playlistend = self._downloader.params.get('playlistend', -1)
2170                 video_ids = video_ids[playliststart:playlistend]
2171
2172                 for id in video_ids:
2173                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2174                 return
2175
2176 class DepositFilesIE(InfoExtractor):
2177         """Information extractor for depositfiles.com"""
2178
2179         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2180
2181         def __init__(self, downloader=None):
2182                 InfoExtractor.__init__(self, downloader)
2183
2184         @staticmethod
2185         def suitable(url):
2186                 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2187
2188         def report_download_webpage(self, file_id):
2189                 """Report webpage download."""
2190                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2191
2192         def report_extraction(self, file_id):
2193                 """Report information extraction."""
2194                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2195
2196         def _real_initialize(self):
2197                 return
2198
2199         def _real_extract(self, url):
2200                 # At this point we have a new file
2201                 self._downloader.increment_downloads()
2202
2203                 file_id = url.split('/')[-1]
2204                 # Rebuild url in english locale
2205                 url = 'http://depositfiles.com/en/files/' + file_id
2206
2207                 # Retrieve file webpage with 'Free download' button pressed
2208                 free_download_indication = { 'gateway_result' : '1' }
2209                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2210                 try:
2211                         self.report_download_webpage(file_id)
2212                         webpage = urllib2.urlopen(request).read()
2213                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2214                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2215                         return
2216
2217                 # Search for the real file URL
2218                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2219                 if (mobj is None) or (mobj.group(1) is None):
2220                         # Try to figure out reason of the error.
2221                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2222                         if (mobj is not None) and (mobj.group(1) is not None):
2223                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2224                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2225                         else:
2226                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2227                         return
2228
2229                 file_url = mobj.group(1)
2230                 file_extension = os.path.splitext(file_url)[1][1:]
2231
2232                 # Search for file title
2233                 mobj = re.search(r'<b title="(.*?)">', webpage)
2234                 if mobj is None:
2235                         self._downloader.trouble(u'ERROR: unable to extract title')
2236                         return
2237                 file_title = mobj.group(1).decode('utf-8')
2238
2239                 try:
2240                         # Process file information
2241                         self._downloader.process_info({
2242                                 'id':           file_id.decode('utf-8'),
2243                                 'url':          file_url.decode('utf-8'),
2244                                 'uploader':     u'NA',
2245                                 'upload_date':  u'NA',
2246                                 'title':        file_title,
2247                                 'stitle':       file_title,
2248                                 'ext':          file_extension.decode('utf-8'),
2249                                 'format':       u'NA',
2250                                 'player_url':   None,
2251                         })
2252                 except UnavailableVideoError, err:
2253                         self._downloader.trouble(u'ERROR: unable to download file')
2254
2255 class PostProcessor(object):
2256         """Post Processor class.
2257
2258         PostProcessor objects can be added to downloaders with their
2259         add_post_processor() method. When the downloader has finished a
2260         successful download, it will take its internal chain of PostProcessors
2261         and start calling the run() method on each one of them, first with
2262         an initial argument and then with the returned value of the previous
2263         PostProcessor.
2264
2265         The chain will be stopped if one of them ever returns None or the end
2266         of the chain is reached.
2267
2268         PostProcessor objects follow a "mutual registration" process similar
2269         to InfoExtractor objects.
2270         """
2271
2272         _downloader = None
2273
2274         def __init__(self, downloader=None):
2275                 self._downloader = downloader
2276
2277         def set_downloader(self, downloader):
2278                 """Sets the downloader for this PP."""
2279                 self._downloader = downloader
2280
2281         def run(self, information):
2282                 """Run the PostProcessor.
2283
2284                 The "information" argument is a dictionary like the ones
2285                 composed by InfoExtractors. The only difference is that this
2286                 one has an extra field called "filepath" that points to the
2287                 downloaded file.
2288
2289                 When this method returns None, the postprocessing chain is
2290                 stopped. However, this method may return an information
2291                 dictionary that will be passed to the next postprocessing
2292                 object in the chain. It can be the one it received after
2293                 changing some fields.
2294
2295                 In addition, this method may raise a PostProcessingError
2296                 exception that will be taken into account by the downloader
2297                 it was called from.
2298                 """
2299                 return information # by default, do nothing
2300
2301 ### MAIN PROGRAM ###
2302 if __name__ == '__main__':
2303         try:
2304                 # Modules needed only when running the main program
2305                 import getpass
2306                 import optparse
2307
2308                 # Function to update the program file with the latest version from the repository.
2309                 def update_self(downloader, filename):
2310                         # Note: downloader only used for options
2311                         if not os.access(filename, os.W_OK):
2312                                 sys.exit('ERROR: no write permissions on %s' % filename)
2313
2314                         downloader.to_screen('Updating to latest stable version...')
2315                         try:
2316                                 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2317                                 latest_version = urllib.urlopen(latest_url).read().strip()
2318                                 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2319                                 newcontent = urllib.urlopen(prog_url).read()
2320                         except (IOError, OSError), err:
2321                                 sys.exit('ERROR: unable to download latest version')
2322                         try:
2323                                 stream = open(filename, 'w')
2324                                 stream.write(newcontent)
2325                                 stream.close()
2326                         except (IOError, OSError), err:
2327                                 sys.exit('ERROR: unable to overwrite current version')
2328                         downloader.to_screen('Updated to version %s' % latest_version)
2329
2330                 # Parse command line
2331                 parser = optparse.OptionParser(
2332                         usage='Usage: %prog [options] url...',
2333                         version='2010.12.09',
2334                         conflict_handler='resolve',
2335                 )
2336
2337                 parser.add_option('-h', '--help',
2338                                 action='help', help='print this help text and exit')
2339                 parser.add_option('-v', '--version',
2340                                 action='version', help='print program version and exit')
2341                 parser.add_option('-U', '--update',
2342                                 action='store_true', dest='update_self', help='update this program to latest stable version')
2343                 parser.add_option('-i', '--ignore-errors',
2344                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2345                 parser.add_option('-r', '--rate-limit',
2346                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2347                 parser.add_option('-R', '--retries',
2348                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2349                 parser.add_option('--playlist-start',
2350                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2351                 parser.add_option('--playlist-end',
2352                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2353                 parser.add_option('--dump-user-agent',
2354                                 action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
2355
2356                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2357                 authentication.add_option('-u', '--username',
2358                                 dest='username', metavar='USERNAME', help='account username')
2359                 authentication.add_option('-p', '--password',
2360                                 dest='password', metavar='PASSWORD', help='account password')
2361                 authentication.add_option('-n', '--netrc',
2362                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2363                 parser.add_option_group(authentication)
2364
2365                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2366                 video_format.add_option('-f', '--format',
2367                                 action='store', dest='format', metavar='FORMAT', help='video format code')
2368                 video_format.add_option('--all-formats',
2369                                 action='store_const', dest='format', help='download all available video formats', const='-1')
2370                 video_format.add_option('--max-quality',
2371                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2372                 parser.add_option_group(video_format)
2373
2374                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2375                 verbosity.add_option('-q', '--quiet',
2376                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2377                 verbosity.add_option('-s', '--simulate',
2378                                 action='store_true', dest='simulate', help='do not download video', default=False)
2379                 verbosity.add_option('-g', '--get-url',
2380                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2381                 verbosity.add_option('-e', '--get-title',
2382                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2383                 verbosity.add_option('--get-thumbnail',
2384                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2385                 verbosity.add_option('--get-description',
2386                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2387                 verbosity.add_option('--no-progress',
2388                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2389                 verbosity.add_option('--console-title',
2390                                 action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
2391                 parser.add_option_group(verbosity)
2392
2393                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2394                 filesystem.add_option('-t', '--title',
2395                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
2396                 filesystem.add_option('-l', '--literal',
2397                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2398                 filesystem.add_option('-A', '--auto-number',
2399                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2400                 filesystem.add_option('-o', '--output',
2401                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2402                 filesystem.add_option('-a', '--batch-file',
2403                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2404                 filesystem.add_option('-w', '--no-overwrites',
2405                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2406                 filesystem.add_option('-c', '--continue',
2407                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2408                 filesystem.add_option('--cookies',
2409                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2410                 filesystem.add_option('--no-part',
2411                                 action='store_true', dest='nopart', help='do not use .part files', default=False)
2412                 parser.add_option_group(filesystem)
2413
2414                 (opts, args) = parser.parse_args()
2415
2416                 # Open appropriate CookieJar
2417                 if opts.cookiefile is None:
2418                         jar = cookielib.CookieJar()
2419                 else:
2420                         try:
2421                                 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2422                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2423                                         jar.load()
2424                         except (IOError, OSError), err:
2425                                 sys.exit(u'ERROR: unable to open cookie file')
2426
2427                 # Dump user agent
2428                 if opts.dump_user_agent:
2429                         print std_headers['User-Agent']
2430                         sys.exit(0)
2431
2432                 # General configuration
2433                 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2434                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()))
2435                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2436
2437                 # Batch file verification
2438                 batchurls = []
2439                 if opts.batchfile is not None:
2440                         try:
2441                                 if opts.batchfile == '-':
2442                                         batchfd = sys.stdin
2443                                 else:
2444                                         batchfd = open(opts.batchfile, 'r')
2445                                 batchurls = batchfd.readlines()
2446                                 batchurls = [x.strip() for x in batchurls]
2447                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2448                         except IOError:
2449                                 sys.exit(u'ERROR: batch file could not be read')
2450                 all_urls = batchurls + args
2451
2452                 # Conflicting, missing and erroneous options
2453                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2454                         parser.error(u'using .netrc conflicts with giving username/password')
2455                 if opts.password is not None and opts.username is None:
2456                         parser.error(u'account username missing')
2457                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2458                         parser.error(u'using output template conflicts with using title, literal title or auto number')
2459                 if opts.usetitle and opts.useliteral:
2460                         parser.error(u'using title conflicts with using literal title')
2461                 if opts.username is not None and opts.password is None:
2462                         opts.password = getpass.getpass(u'Type account password and press return:')
2463                 if opts.ratelimit is not None:
2464                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2465                         if numeric_limit is None:
2466                                 parser.error(u'invalid rate limit specified')
2467                         opts.ratelimit = numeric_limit
2468                 if opts.retries is not None:
2469                         try:
2470                                 opts.retries = long(opts.retries)
2471                         except (TypeError, ValueError), err:
2472                                 parser.error(u'invalid retry count specified')
2473                 try:
2474                         opts.playliststart = long(opts.playliststart)
2475                         if opts.playliststart <= 0:
2476                                 raise ValueError
2477                 except (TypeError, ValueError), err:
2478                         parser.error(u'invalid playlist start number specified')
2479                 try:
2480                         opts.playlistend = long(opts.playlistend)
2481                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2482                                 raise ValueError
2483                 except (TypeError, ValueError), err:
2484                         parser.error(u'invalid playlist end number specified')
2485
2486                 # Information extractors
2487                 youtube_ie = YoutubeIE()
2488                 metacafe_ie = MetacafeIE(youtube_ie)
2489                 dailymotion_ie = DailymotionIE()
2490                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2491                 youtube_user_ie = YoutubeUserIE(youtube_ie)
2492                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2493                 google_ie = GoogleIE()
2494                 google_search_ie = GoogleSearchIE(google_ie)
2495                 photobucket_ie = PhotobucketIE()
2496                 yahoo_ie = YahooIE()
2497                 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2498                 deposit_files_ie = DepositFilesIE()
2499                 generic_ie = GenericIE()
2500
2501                 # File downloader
2502                 fd = FileDownloader({
2503                         'usenetrc': opts.usenetrc,
2504                         'username': opts.username,
2505                         'password': opts.password,
2506                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2507                         'forceurl': opts.geturl,
2508                         'forcetitle': opts.gettitle,
2509                         'forcethumbnail': opts.getthumbnail,
2510                         'forcedescription': opts.getdescription,
2511                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2512                         'format': opts.format,
2513                         'format_limit': opts.format_limit,
2514                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2515                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2516                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2517                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2518                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2519                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2520                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2521                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2522                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2523                                 or u'%(id)s.%(ext)s'),
2524                         'ignoreerrors': opts.ignoreerrors,
2525                         'ratelimit': opts.ratelimit,
2526                         'nooverwrites': opts.nooverwrites,
2527                         'retries': opts.retries,
2528                         'continuedl': opts.continue_dl,
2529                         'noprogress': opts.noprogress,
2530                         'playliststart': opts.playliststart,
2531                         'playlistend': opts.playlistend,
2532                         'logtostderr': opts.outtmpl == '-',
2533                         'consoletitle': opts.consoletitle,
2534                         'nopart': opts.nopart,
2535                         })
2536                 fd.add_info_extractor(youtube_search_ie)
2537                 fd.add_info_extractor(youtube_pl_ie)
2538                 fd.add_info_extractor(youtube_user_ie)
2539                 fd.add_info_extractor(metacafe_ie)
2540                 fd.add_info_extractor(dailymotion_ie)
2541                 fd.add_info_extractor(youtube_ie)
2542                 fd.add_info_extractor(google_ie)
2543                 fd.add_info_extractor(google_search_ie)
2544                 fd.add_info_extractor(photobucket_ie)
2545                 fd.add_info_extractor(yahoo_ie)
2546                 fd.add_info_extractor(yahoo_search_ie)
2547                 fd.add_info_extractor(deposit_files_ie)
2548
2549                 # This must come last since it's the
2550                 # fallback if none of the others work
2551                 fd.add_info_extractor(generic_ie)
2552
2553                 # Update version
2554                 if opts.update_self:
2555                         update_self(fd, sys.argv[0])
2556
2557                 # Maybe do nothing
2558                 if len(all_urls) < 1:
2559                         if not opts.update_self:
2560                                 parser.error(u'you must provide at least one URL')
2561                         else:
2562                                 sys.exit()
2563                 retcode = fd.download(all_urls)
2564
2565                 # Dump cookie jar if requested
2566                 if opts.cookiefile is not None:
2567                         try:
2568                                 jar.save()
2569                         except (IOError, OSError), err:
2570                                 sys.exit(u'ERROR: unable to save cookie jar')
2571
2572                 sys.exit(retcode)
2573
2574         except DownloadError:
2575                 sys.exit(1)
2576         except SameFileError:
2577                 sys.exit(u'ERROR: fixed output name but more than one file to download')
2578         except KeyboardInterrupt:
2579                 sys.exit(u'\nERROR: Interrupted by user')