_ Git - youtube-dl/blob - youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # Author: Benjamin Johnson
   6 # License: Public domain code
   7 import htmlentitydefs
   8 import httplib
   9 import locale
  10 import math
  11 import netrc
  12 import os
  13 import os.path
  14 import re
  15 import socket
  16 import string
  17 import subprocess
  18 import sys
  19 import time
  20 import urllib
  21 import urllib2
  22
  23 # parse_qs was moved from the cgi module to the urlparse module recently.
  24 try:
  25         from urlparse import parse_qs
  26 except ImportError:
  27         from cgi import parse_qs
  28
  29 std_headers = {
  30         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
  31         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  32         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
  33         'Accept-Language': 'en-us,en;q=0.5',
  34 }
  35
  36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  37
  38 def preferredencoding():
  39         """Get preferred encoding.
  40
  41         Returns the best encoding scheme for the system, based on
  42         locale.getpreferredencoding() and some further tweaks.
  43         """
  44         def yield_preferredencoding():
  45                 try:
  46                         pref = locale.getpreferredencoding()
  47                         u'TEST'.encode(pref)
  48                 except:
  49                         pref = 'UTF-8'
  50                 while True:
  51                         yield pref
  52         return yield_preferredencoding().next()
  53
  54 def htmlentity_transform(matchobj):
  55         """Transforms an HTML entity to a Unicode character.
  56
  57         This function receives a match object and is intended to be used with
  58         the re.sub() function.
  59         """
  60         entity = matchobj.group(1)
  61
  62         # Known non-numeric HTML entity
  63         if entity in htmlentitydefs.name2codepoint:
  64                 return unichr(htmlentitydefs.name2codepoint[entity])
  65
  66         # Unicode character
  67         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  68         if mobj is not None:
  69                 numstr = mobj.group(1)
  70                 if numstr.startswith(u'x'):
  71                         base = 16
  72                         numstr = u'0%s' % numstr
  73                 else:
  74                         base = 10
  75                 return unichr(long(numstr, base))
  76
  77         # Unknown entity in name, return its literal representation
  78         return (u'&%s;' % entity)
  79
  80 def sanitize_title(utitle):
  81         """Sanitizes a video title so it could be used as part of a filename."""
  82         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  83         return utitle.replace(unicode(os.sep), u'%')
  84
  85 def sanitize_open(filename, open_mode):
  86         """Try to open the given filename, and slightly tweak it if this fails.
  87
  88         Attempts to open the given filename. If this fails, it tries to change
  89         the filename slightly, step by step, until it's either able to open it
  90         or it fails and raises a final exception, like the standard open()
  91         function.
  92
  93         It returns the tuple (stream, definitive_file_name).
  94         """
  95         try:
  96                 if filename == u'-':
  97                         return (sys.stdout, filename)
  98                 stream = open(filename, open_mode)
  99                 return (stream, filename)
 100         except (IOError, OSError), err:
 101                 # In case of error, try to remove win32 forbidden chars
 102                 filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename)
 103
 104                 # An exception here should be caught in the caller
 105                 stream = open(filename, open_mode)
 106                 return (stream, filename)
 107
 108
 109 class DownloadError(Exception):
 110         """Download Error exception.
 111
 112         This exception may be thrown by FileDownloader objects if they are not
 113         configured to continue on errors. They will contain the appropriate
 114         error message.
 115         """
 116         pass
 117
 118 class SameFileError(Exception):
 119         """Same File exception.
 120
 121         This exception will be thrown by FileDownloader objects if they detect
 122         multiple files would have to be downloaded to the same file on disk.
 123         """
 124         pass
 125
 126 class PostProcessingError(Exception):
 127         """Post Processing exception.
 128
 129         This exception may be raised by PostProcessor's .run() method to
 130         indicate an error in the postprocessing task.
 131         """
 132         pass
 133
 134 class UnavailableFormatError(Exception):
 135         """Unavailable Format exception.
 136
 137         This exception will be thrown when a video is requested
 138         in a format that is not available for that video.
 139         """
 140         pass
 141
 142 class ContentTooShortError(Exception):
 143         """Content Too Short exception.
 144
 145         This exception may be raised by FileDownloader objects when a file they
 146         download is too small for what the server announced first, indicating
 147         the connection was probably interrupted.
 148         """
 149         # Both in bytes
 150         downloaded = None
 151         expected = None
 152
 153         def __init__(self, downloaded, expected):
 154                 self.downloaded = downloaded
 155                 self.expected = expected
 156
 157 class FileDownloader(object):
 158         """File Downloader class.
 159
 160         File downloader objects are the ones responsible of downloading the
 161         actual video file and writing it to disk if the user has requested
 162         it, among some other tasks. In most cases there should be one per
 163         program. As, given a video URL, the downloader doesn't know how to
 164         extract all the needed information, task that InfoExtractors do, it
 165         has to pass the URL to one of them.
 166
 167         For this, file downloader objects have a method that allows
 168         InfoExtractors to be registered in a given order. When it is passed
 169         a URL, the file downloader handles it to the first InfoExtractor it
 170         finds that reports being able to handle it. The InfoExtractor extracts
 171         all the information about the video or videos the URL refers to, and
 172         asks the FileDownloader to process the video information, possibly
 173         downloading the video.
 174
 175         File downloaders accept a lot of parameters. In order not to saturate
 176         the object constructor with arguments, it receives a dictionary of
 177         options instead. These options are available through the params
 178         attribute for the InfoExtractors to use. The FileDownloader also
 179         registers itself as the downloader in charge for the InfoExtractors
 180         that are added to it, so this is a "mutual registration".
 181
 182         Available options:
 183
 184         username:       Username for authentication purposes.
 185         password:       Password for authentication purposes.
 186         usenetrc:       Use netrc for authentication instead.
 187         quiet:          Do not print messages to stdout.
 188         forceurl:       Force printing final URL.
 189         forcetitle:     Force printing title.
 190         simulate:       Do not download the video files.
 191         format:         Video format code.
 192         outtmpl:        Template for output names.
 193         ignoreerrors:   Do not stop on download errors.
 194         ratelimit:      Download speed limit, in bytes/sec.
 195         nooverwrites:   Prevent overwriting files.
 196         continuedl:     Try to continue downloads if possible.
 197         noprogress:     Do not print the progress bar.
 198         """
 199
 200         params = None
 201         _ies = []
 202         _pps = []
 203         _download_retcode = None
 204
 205         def __init__(self, params):
 206                 """Create a FileDownloader object with the given options."""
 207                 self._ies = []
 208                 self._pps = []
 209                 self._download_retcode = 0
 210                 self.params = params
 211
 212         @staticmethod
 213         def pmkdir(filename):
 214                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 215                 components = filename.split(os.sep)
 216                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 217                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 218                 for dir in aggregate:
 219                         if not os.path.exists(dir):
 220                                 os.mkdir(dir)
 221
 222         @staticmethod
 223         def format_bytes(bytes):
 224                 if bytes is None:
 225                         return 'N/A'
 226                 if type(bytes) is str:
 227                         bytes = float(bytes)
 228                 if bytes == 0.0:
 229                         exponent = 0
 230                 else:
 231                         exponent = long(math.log(bytes, 1024.0))
 232                 suffix = 'bkMGTPEZY'[exponent]
 233                 converted = float(bytes) / float(1024**exponent)
 234                 return '%.2f%s' % (converted, suffix)
 235
 236         @staticmethod
 237         def calc_percent(byte_counter, data_len):
 238                 if data_len is None:
 239                         return '---.-%'
 240                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 241
 242         @staticmethod
 243         def calc_eta(start, now, total, current):
 244                 if total is None:
 245                         return '--:--'
 246                 dif = now - start
 247                 if current == 0 or dif < 0.001: # One millisecond
 248                         return '--:--'
 249                 rate = float(current) / dif
 250                 eta = long((float(total) - float(current)) / rate)
 251                 (eta_mins, eta_secs) = divmod(eta, 60)
 252                 if eta_mins > 99:
 253                         return '--:--'
 254                 return '%02d:%02d' % (eta_mins, eta_secs)
 255
 256         @staticmethod
 257         def calc_speed(start, now, bytes):
 258                 dif = now - start
 259                 if bytes == 0 or dif < 0.001: # One millisecond
 260                         return '%10s' % '---b/s'
 261                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 262
 263         @staticmethod
 264         def best_block_size(elapsed_time, bytes):
 265                 new_min = max(bytes / 2.0, 1.0)
 266                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 267                 if elapsed_time < 0.001:
 268                         return long(new_max)
 269                 rate = bytes / elapsed_time
 270                 if rate > new_max:
 271                         return long(new_max)
 272                 if rate < new_min:
 273                         return long(new_min)
 274                 return long(rate)
 275
 276         @staticmethod
 277         def parse_bytes(bytestr):
 278                 """Parse a string indicating a byte quantity into a long integer."""
 279                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 280                 if matchobj is None:
 281                         return None
 282                 number = float(matchobj.group(1))
 283                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 284                 return long(round(number * multiplier))
 285
 286         @staticmethod
 287         def verify_url(url):
 288                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
 289                 request = urllib2.Request(url, None, std_headers)
 290                 data = urllib2.urlopen(request)
 291                 data.read(1)
 292                 url = data.geturl()
 293                 data.close()
 294                 return url
 295
 296         def add_info_extractor(self, ie):
 297                 """Add an InfoExtractor object to the end of the list."""
 298                 self._ies.append(ie)
 299                 ie.set_downloader(self)
 300
 301         def add_post_processor(self, pp):
 302                 """Add a PostProcessor object to the end of the chain."""
 303                 self._pps.append(pp)
 304                 pp.set_downloader(self)
 305
 306         def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
 307                 """Print message to stdout if not in quiet mode."""
 308                 try:
 309                         if not self.params.get('quiet', False):
 310                                 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 311                         sys.stdout.flush()
 312                 except (UnicodeEncodeError), err:
 313                         if not ignore_encoding_errors:
 314                                 raise
 315
 316         def to_stderr(self, message):
 317                 """Print message to stderr."""
 318                 print >>sys.stderr, message.encode(preferredencoding())
 319
 320         def fixed_template(self):
 321                 """Checks if the output template is fixed."""
 322                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 323
 324         def trouble(self, message=None):
 325                 """Determine action to take when a download problem appears.
 326
 327                 Depending on if the downloader has been configured to ignore
 328                 download errors or not, this method may throw an exception or
 329                 not when errors are found, after printing the message.
 330                 """
 331                 if message is not None:
 332                         self.to_stderr(message)
 333                 if not self.params.get('ignoreerrors', False):
 334                         raise DownloadError(message)
 335                 self._download_retcode = 1
 336
 337         def slow_down(self, start_time, byte_counter):
 338                 """Sleep if the download speed is over the rate limit."""
 339                 rate_limit = self.params.get('ratelimit', None)
 340                 if rate_limit is None or byte_counter == 0:
 341                         return
 342                 now = time.time()
 343                 elapsed = now - start_time
 344                 if elapsed <= 0.0:
 345                         return
 346                 speed = float(byte_counter) / elapsed
 347                 if speed > rate_limit:
 348                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 349
 350         def report_destination(self, filename):
 351                 """Report destination filename."""
 352                 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 353
 354         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 355                 """Report download progress."""
 356                 if self.params.get('noprogress', False):
 357                         return
 358                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 359                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 360
 361         def report_resuming_byte(self, resume_len):
 362                 """Report attemtp to resume at given byte."""
 363                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
 364
 365         def report_file_already_downloaded(self, file_name):
 366                 """Report file has already been fully downloaded."""
 367                 try:
 368                         self.to_stdout(u'[download] %s has already been downloaded' % file_name)
 369                 except (UnicodeEncodeError), err:
 370                         self.to_stdout(u'[download] The file has already been downloaded')
 371
 372         def report_unable_to_resume(self):
 373                 """Report it was impossible to resume download."""
 374                 self.to_stdout(u'[download] Unable to resume')
 375
 376         def report_finish(self):
 377                 """Report download finished."""
 378                 if self.params.get('noprogress', False):
 379                         self.to_stdout(u'[download] Download completed')
 380                 else:
 381                         self.to_stdout(u'')
 382
 383         def process_info(self, info_dict):
 384                 """Process a single dictionary returned by an InfoExtractor."""
 385                 # Do nothing else if in simulate mode
 386                 if self.params.get('simulate', False):
 387                         # Verify URL if it's an HTTP one
 388                         if info_dict['url'].startswith('http'):
 389                                 try:
 390                                         self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
 391                                 except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
 392                                         raise UnavailableFormatError
 393
 394                         # Forced printings
 395                         if self.params.get('forcetitle', False):
 396                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 397                         if self.params.get('forceurl', False):
 398                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 399
 400                         return
 401
 402                 try:
 403                         template_dict = dict(info_dict)
 404                         template_dict['epoch'] = unicode(long(time.time()))
 405                         filename = self.params['outtmpl'] % template_dict
 406                 except (ValueError, KeyError), err:
 407                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 408                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 409                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
 410                         return
 411
 412                 try:
 413                         self.pmkdir(filename)
 414                 except (OSError, IOError), err:
 415                         self.trouble('ERROR: unable to create directories: %s' % str(err))
 416                         return
 417
 418                 try:
 419                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
 420                 except (OSError, IOError), err:
 421                         raise UnavailableFormatError
 422                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 423                         self.trouble('ERROR: unable to download video data: %s' % str(err))
 424                         return
 425                 except (ContentTooShortError, ), err:
 426                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 427                         return
 428
 429                 if success:
 430                         try:
 431                                 self.post_process(filename, info_dict)
 432                         except (PostProcessingError), err:
 433                                 self.trouble('ERROR: postprocessing: %s' % str(err))
 434                                 return
 435
 436         def download(self, url_list):
 437                 """Download a given list of URLs."""
 438                 if len(url_list) > 1 and self.fixed_template():
 439                         raise SameFileError(self.params['outtmpl'])
 440
 441                 for url in url_list:
 442                         suitable_found = False
 443                         for ie in self._ies:
 444                                 # Go to next InfoExtractor if not suitable
 445                                 if not ie.suitable(url):
 446                                         continue
 447
 448                                 # Suitable InfoExtractor found
 449                                 suitable_found = True
 450
 451                                 # Extract information from URL and process it
 452                                 ie.extract(url)
 453
 454                                 # Suitable InfoExtractor had been found; go to next URL
 455                                 break
 456
 457                         if not suitable_found:
 458                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 459
 460                 return self._download_retcode
 461
 462         def post_process(self, filename, ie_info):
 463                 """Run the postprocessing chain on the given file."""
 464                 info = dict(ie_info)
 465                 info['filepath'] = filename
 466                 for pp in self._pps:
 467                         info = pp.run(info)
 468                         if info is None:
 469                                 break
 470
 471         def _download_with_rtmpdump(self, filename, url):
 472                 self.report_destination(filename)
 473
 474                 # Check for rtmpdump first
 475                 try:
 476                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 477                 except (OSError, IOError):
 478                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 479                         return False
 480
 481                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 482                 # the connection was interrumpted and resuming appears to be
 483                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 484                 basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename]
 485                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 486                 while retval == 2 or retval == 1:
 487                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
 488                         time.sleep(2.0) # This seems to be needed
 489                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 490                 if retval == 0:
 491                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
 492                         return True
 493                 else:
 494                         self.trouble('\nERROR: rtmpdump exited with code %d' % retval)
 495                         return False
 496
 497         def _do_download(self, filename, url):
 498                 # Attempt to download using rtmpdump
 499                 if url.startswith('rtmp'):
 500                         return self._download_with_rtmpdump(filename, url)
 501
 502                 stream = None
 503                 open_mode = 'wb'
 504                 basic_request = urllib2.Request(url, None, std_headers)
 505                 request = urllib2.Request(url, None, std_headers)
 506
 507                 # Establish possible resume length
 508                 if os.path.isfile(filename):
 509                         resume_len = os.path.getsize(filename)
 510                 else:
 511                         resume_len = 0
 512
 513                 # Request parameters in case of being able to resume
 514                 if self.params.get('continuedl', False) and resume_len != 0:
 515                         self.report_resuming_byte(resume_len)
 516                         request.add_header('Range','bytes=%d-' % resume_len)
 517                         open_mode = 'ab'
 518
 519                 # Establish connection
 520                 try:
 521                         data = urllib2.urlopen(request)
 522                 except (urllib2.HTTPError, ), err:
 523                         if err.code != 416: #  416 is 'Requested range not satisfiable'
 524                                 raise
 525                         # Unable to resume
 526                         data = urllib2.urlopen(basic_request)
 527                         content_length = data.info()['Content-Length']
 528
 529                         if content_length is not None and long(content_length) == resume_len:
 530                                 # Because the file had already been fully downloaded
 531                                 self.report_file_already_downloaded(filename)
 532                                 return True
 533                         else:
 534                                 # Because the server didn't let us
 535                                 self.report_unable_to_resume()
 536                                 open_mode = 'wb'
 537
 538                 data_len = data.info().get('Content-length', None)
 539                 data_len_str = self.format_bytes(data_len)
 540                 byte_counter = 0
 541                 block_size = 1024
 542                 start = time.time()
 543                 while True:
 544                         # Download and write
 545                         before = time.time()
 546                         data_block = data.read(block_size)
 547                         after = time.time()
 548                         data_block_len = len(data_block)
 549                         if data_block_len == 0:
 550                                 break
 551                         byte_counter += data_block_len
 552
 553                         # Open file just in time
 554                         if stream is None:
 555                                 try:
 556                                         (stream, filename) = sanitize_open(filename, open_mode)
 557                                         self.report_destination(filename)
 558                                 except (OSError, IOError), err:
 559                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
 560                                         return False
 561                         stream.write(data_block)
 562                         block_size = self.best_block_size(after - before, data_block_len)
 563
 564                         # Progress message
 565                         percent_str = self.calc_percent(byte_counter, data_len)
 566                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 567                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 568                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 569
 570                         # Apply rate limit
 571                         self.slow_down(start, byte_counter)
 572
 573                 self.report_finish()
 574                 if data_len is not None and str(byte_counter) != data_len:
 575                         raise ContentTooShortError(byte_counter, long(data_len))
 576                 return True
 577
 578 class InfoExtractor(object):
 579         """Information Extractor class.
 580
 581         Information extractors are the classes that, given a URL, extract
 582         information from the video (or videos) the URL refers to. This
 583         information includes the real video URL, the video title and simplified
 584         title, author and others. The information is stored in a dictionary
 585         which is then passed to the FileDownloader. The FileDownloader
 586         processes this information possibly downloading the video to the file
 587         system, among other possible outcomes. The dictionaries must include
 588         the following fields:
 589
 590         id:             Video identifier.
 591         url:            Final video URL.
 592         uploader:       Nickname of the video uploader.
 593         title:          Literal title.
 594         stitle:         Simplified title.
 595         ext:            Video filename extension.
 596         format:         Video format.
 597
 598         Subclasses of this one should re-define the _real_initialize() and
 599         _real_extract() methods, as well as the suitable() static method.
 600         Probably, they should also be instantiated and added to the main
 601         downloader.
 602         """
 603
 604         _ready = False
 605         _downloader = None
 606
 607         def __init__(self, downloader=None):
 608                 """Constructor. Receives an optional downloader."""
 609                 self._ready = False
 610                 self.set_downloader(downloader)
 611
 612         @staticmethod
 613         def suitable(url):
 614                 """Receives a URL and returns True if suitable for this IE."""
 615                 return False
 616
 617         def initialize(self):
 618                 """Initializes an instance (authentication, etc)."""
 619                 if not self._ready:
 620                         self._real_initialize()
 621                         self._ready = True
 622
 623         def extract(self, url):
 624                 """Extracts URL information and returns it in list of dicts."""
 625                 self.initialize()
 626                 return self._real_extract(url)
 627
 628         def set_downloader(self, downloader):
 629                 """Sets the downloader for this IE."""
 630                 self._downloader = downloader
 631
 632         def _real_initialize(self):
 633                 """Real initialization process. Redefine in subclasses."""
 634                 pass
 635
 636         def _real_extract(self, url):
 637                 """Real extraction process. Redefine in subclasses."""
 638                 pass
 639
 640 class YoutubeIE(InfoExtractor):
 641         """Information extractor for youtube.com."""
 642
 643         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 644         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 645         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 646         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 647         _NETRC_MACHINE = 'youtube'
 648         _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag
 649         _video_extensions = {
 650                 '13': '3gp',
 651                 '17': 'mp4',
 652                 '18': 'mp4',
 653                 '22': 'mp4',
 654                 '37': 'mp4',
 655         }
 656
 657         @staticmethod
 658         def suitable(url):
 659                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 660
 661         def report_lang(self):
 662                 """Report attempt to set language."""
 663                 self._downloader.to_stdout(u'[youtube] Setting language')
 664
 665         def report_login(self):
 666                 """Report attempt to log in."""
 667                 self._downloader.to_stdout(u'[youtube] Logging in')
 668
 669         def report_age_confirmation(self):
 670                 """Report attempt to confirm age."""
 671                 self._downloader.to_stdout(u'[youtube] Confirming age')
 672
 673         def report_video_info_webpage_download(self, video_id):
 674                 """Report attempt to download video info webpage."""
 675                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 676
 677         def report_information_extraction(self, video_id):
 678                 """Report attempt to extract video information."""
 679                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 680
 681         def report_unavailable_format(self, video_id, format):
 682                 """Report extracted video URL."""
 683                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 684
 685         def report_rtmp_download(self):
 686                 """Indicate the download will use the RTMP protocol."""
 687                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
 688
 689         def _real_initialize(self):
 690                 if self._downloader is None:
 691                         return
 692
 693                 username = None
 694                 password = None
 695                 downloader_params = self._downloader.params
 696
 697                 # Attempt to use provided username and password or .netrc data
 698                 if downloader_params.get('username', None) is not None:
 699                         username = downloader_params['username']
 700                         password = downloader_params['password']
 701                 elif downloader_params.get('usenetrc', False):
 702                         try:
 703                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 704                                 if info is not None:
 705                                         username = info[0]
 706                                         password = info[2]
 707                                 else:
 708                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 709                         except (IOError, netrc.NetrcParseError), err:
 710                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 711                                 return
 712
 713                 # Set language
 714                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 715                 try:
 716                         self.report_lang()
 717                         urllib2.urlopen(request).read()
 718                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 719                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 720                         return
 721
 722                 # No authentication to be performed
 723                 if username is None:
 724                         return
 725
 726                 # Log in
 727                 login_form = {
 728                                 'current_form': 'loginForm',
 729                                 'next':         '/',
 730                                 'action_login': 'Log In',
 731                                 'username':     username,
 732                                 'password':     password,
 733                                 }
 734                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 735                 try:
 736                         self.report_login()
 737                         login_results = urllib2.urlopen(request).read()
 738                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 739                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 740                                 return
 741                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 742                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 743                         return
 744
 745                 # Confirm age
 746                 age_form = {
 747                                 'next_url':             '/',
 748                                 'action_confirm':       'Confirm',
 749                                 }
 750                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 751                 try:
 752                         self.report_age_confirmation()
 753                         age_results = urllib2.urlopen(request).read()
 754                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 755                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 756                         return
 757
 758         def _real_extract(self, url):
 759                 # Extract video id from URL
 760                 mobj = re.match(self._VALID_URL, url)
 761                 if mobj is None:
 762                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 763                         return
 764                 video_id = mobj.group(2)
 765
 766                 # Downloader parameters
 767                 best_quality = False
 768                 all_formats = False
 769                 format_param = None
 770                 quality_index = 0
 771                 if self._downloader is not None:
 772                         params = self._downloader.params
 773                         format_param = params.get('format', None)
 774                         if format_param == '0':
 775                                 format_param = self._available_formats[quality_index]
 776                                 best_quality = True
 777                         elif format_param == '-1':
 778                                 format_param = self._available_formats[quality_index]
 779                                 all_formats = True
 780
 781                 while True:
 782                         # Extension
 783                         video_extension = self._video_extensions.get(format_param, 'flv')
 784
 785                         # Get video info
 786                         self.report_video_info_webpage_download(video_id)
 787                         for el_type in ['embedded', 'detailpage']:
 788                                 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s&el=%s&ps=default&eurl=&gl=US&hl=en'
 789                                                    % (video_id, el_type))
 790                                 request = urllib2.Request(video_info_url, None, std_headers)
 791                                 try:
 792                                         video_info_webpage = urllib2.urlopen(request).read()
 793                                         video_info = parse_qs(video_info_webpage)
 794                                         if 'token' in video_info:
 795                                                 break
 796                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 797                                         self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 798                                         return
 799                         self.report_information_extraction(video_id)
 800
 801                         # "t" param
 802                         if 'token' not in video_info:
 803                                 # Attempt to see if YouTube has issued an error message
 804                                 if 'reason' not in video_info:
 805                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
 806                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
 807                                         stream.write(video_info_webpage)
 808                                         stream.close()
 809                                 else:
 810                                         reason = urllib.unquote_plus(video_info['reason'][0])
 811                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
 812                                 return
 813                         token = urllib.unquote_plus(video_info['token'][0])
 814                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
 815                         if format_param is not None:
 816                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 817
 818                         # Check possible RTMP download
 819                         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 820                                 self.report_rtmp_download()
 821                                 video_real_url = video_info['conn'][0]
 822
 823                         # uploader
 824                         if 'author' not in video_info:
 825                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 826                                 return
 827                         video_uploader = urllib.unquote_plus(video_info['author'][0])
 828
 829                         # title
 830                         if 'title' not in video_info:
 831                                 self._downloader.trouble(u'ERROR: unable to extract video title')
 832                                 return
 833                         video_title = urllib.unquote_plus(video_info['title'][0])
 834                         video_title = video_title.decode('utf-8')
 835                         video_title = sanitize_title(video_title)
 836
 837                         # simplified title
 838                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 839                         simple_title = simple_title.strip(ur'_')
 840
 841                         try:
 842                                 # Process video information
 843                                 self._downloader.process_info({
 844                                         'id':           video_id.decode('utf-8'),
 845                                         'url':          video_real_url.decode('utf-8'),
 846                                         'uploader':     video_uploader.decode('utf-8'),
 847                                         'title':        video_title,
 848                                         'stitle':       simple_title,
 849                                         'ext':          video_extension.decode('utf-8'),
 850                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
 851                                 })
 852
 853                                 if all_formats:
 854                                         if quality_index == len(self._available_formats) - 1:
 855                                                 # None left to get
 856                                                 return
 857                                         else:
 858                                                 quality_index += 1
 859                                                 format_param = self._available_formats[quality_index]
 860                                                 if format_param == None:
 861                                                         return
 862                                                 continue
 863
 864                                 return
 865
 866                         except UnavailableFormatError, err:
 867                                 if best_quality or all_formats:
 868                                         if quality_index == len(self._available_formats) - 1:
 869                                                 # I don't ever expect this to happen
 870                                                 if not all_formats:
 871                                                         self._downloader.trouble(u'ERROR: no known formats available for video')
 872                                                 return
 873                                         else:
 874                                                 self.report_unavailable_format(video_id, format_param)
 875                                                 quality_index += 1
 876                                                 format_param = self._available_formats[quality_index]
 877                                                 if format_param == None:
 878                                                         return
 879                                                 continue
 880                                 else:
 881                                         self._downloader.trouble('ERROR: format not available for video')
 882                                         return
 883
 884
 885 class MetacafeIE(InfoExtractor):
 886         """Information Extractor for metacafe.com."""
 887
 888         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 889         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 890         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 891         _youtube_ie = None
 892
 893         def __init__(self, youtube_ie, downloader=None):
 894                 InfoExtractor.__init__(self, downloader)
 895                 self._youtube_ie = youtube_ie
 896
 897         @staticmethod
 898         def suitable(url):
 899                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
 900
 901         def report_disclaimer(self):
 902                 """Report disclaimer retrieval."""
 903                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 904
 905         def report_age_confirmation(self):
 906                 """Report attempt to confirm age."""
 907                 self._downloader.to_stdout(u'[metacafe] Confirming age')
 908
 909         def report_download_webpage(self, video_id):
 910                 """Report webpage download."""
 911                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 912
 913         def report_extraction(self, video_id):
 914                 """Report information extraction."""
 915                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 916
 917         def _real_initialize(self):
 918                 # Retrieve disclaimer
 919                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
 920                 try:
 921                         self.report_disclaimer()
 922                         disclaimer = urllib2.urlopen(request).read()
 923                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 924                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 925                         return
 926
 927                 # Confirm age
 928                 disclaimer_form = {
 929                         'filters': '0',
 930                         'submit': "Continue - I'm over 18",
 931                         }
 932                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
 933                 try:
 934                         self.report_age_confirmation()
 935                         disclaimer = urllib2.urlopen(request).read()
 936                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 937                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 938                         return
 939
 940         def _real_extract(self, url):
 941                 # Extract id and simplified title from URL
 942                 mobj = re.match(self._VALID_URL, url)
 943                 if mobj is None:
 944                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 945                         return
 946
 947                 video_id = mobj.group(1)
 948
 949                 # Check if video comes from YouTube
 950                 mobj2 = re.match(r'^yt-(.*)$', video_id)
 951                 if mobj2 is not None:
 952                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
 953                         return
 954
 955                 simple_title = mobj.group(2).decode('utf-8')
 956                 video_extension = 'flv'
 957
 958                 # Retrieve video webpage to extract further information
 959                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
 960                 try:
 961                         self.report_download_webpage(video_id)
 962                         webpage = urllib2.urlopen(request).read()
 963                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 964                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
 965                         return
 966
 967                 # Extract URL, uploader and title from webpage
 968                 self.report_extraction(video_id)
 969                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 970                 if mobj is None:
 971                         self._downloader.trouble(u'ERROR: unable to extract media URL')
 972                         return
 973                 mediaURL = urllib.unquote(mobj.group(1))
 974
 975                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
 976                 #if mobj is None:
 977                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
 978                 #       return
 979                 #gdaKey = mobj.group(1)
 980                 #
 981                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 982
 983                 video_url = mediaURL
 984
 985                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 986                 if mobj is None:
 987                         self._downloader.trouble(u'ERROR: unable to extract title')
 988                         return
 989                 video_title = mobj.group(1).decode('utf-8')
 990                 video_title = sanitize_title(video_title)
 991
 992                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
 993                 if mobj is None:
 994                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 995                         return
 996                 video_uploader = mobj.group(1)
 997
 998                 try:
 999                         # Process video information
1000                         self._downloader.process_info({
1001                                 'id':           video_id.decode('utf-8'),
1002                                 'url':          video_url.decode('utf-8'),
1003                                 'uploader':     video_uploader.decode('utf-8'),
1004                                 'title':        video_title,
1005                                 'stitle':       simple_title,
1006                                 'ext':          video_extension.decode('utf-8'),
1007                                 'format':       u'NA',
1008                         })
1009                 except UnavailableFormatError:
1010                         self._downloader.trouble(u'ERROR: format not available for video')
1011
1012
1013 class GoogleIE(InfoExtractor):
1014         """Information extractor for video.google.com."""
1015
1016         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1017
1018         def __init__(self, downloader=None):
1019                 InfoExtractor.__init__(self, downloader)
1020
1021         @staticmethod
1022         def suitable(url):
1023                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1024
1025         def report_download_webpage(self, video_id):
1026                 """Report webpage download."""
1027                 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1028
1029         def report_extraction(self, video_id):
1030                 """Report information extraction."""
1031                 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1032
1033         def _real_initialize(self):
1034                 return
1035
1036         def _real_extract(self, url):
1037                 # Extract id from URL
1038                 mobj = re.match(self._VALID_URL, url)
1039                 if mobj is None:
1040                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1041                         return
1042
1043                 video_id = mobj.group(1)
1044
1045                 video_extension = 'mp4'
1046
1047                 # Retrieve video webpage to extract further information
1048                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1049                 try:
1050                         self.report_download_webpage(video_id)
1051                         webpage = urllib2.urlopen(request).read()
1052                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1053                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1054                         return
1055
1056                 # Extract URL, uploader, and title from webpage
1057                 self.report_extraction(video_id)
1058                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1059                 if mobj is None:
1060                         video_extension = 'flv'
1061                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1062                 if mobj is None:
1063                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1064                         return
1065                 mediaURL = urllib.unquote(mobj.group(1))
1066                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1067                 mediaURL = mediaURL.replace('\\x26', '\x26')
1068
1069                 video_url = mediaURL
1070
1071                 mobj = re.search(r'<title>(.*)</title>', webpage)
1072                 if mobj is None:
1073                         self._downloader.trouble(u'ERROR: unable to extract title')
1074                         return
1075                 video_title = mobj.group(1).decode('utf-8')
1076                 video_title = sanitize_title(video_title)
1077                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1078
1079                 try:
1080                         # Process video information
1081                         self._downloader.process_info({
1082                                 'id':           video_id.decode('utf-8'),
1083                                 'url':          video_url.decode('utf-8'),
1084                                 'uploader':     u'NA',
1085                                 'title':        video_title,
1086                                 'stitle':       simple_title,
1087                                 'ext':          video_extension.decode('utf-8'),
1088                                 'format':       u'NA',
1089                         })
1090                 except UnavailableFormatError:
1091                         self._downloader.trouble(u'ERROR: format not available for video')
1092
1093
1094 class PhotobucketIE(InfoExtractor):
1095         """Information extractor for photobucket.com."""
1096
1097         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1098
1099         def __init__(self, downloader=None):
1100                 InfoExtractor.__init__(self, downloader)
1101
1102         @staticmethod
1103         def suitable(url):
1104                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1105
1106         def report_download_webpage(self, video_id):
1107                 """Report webpage download."""
1108                 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1109
1110         def report_extraction(self, video_id):
1111                 """Report information extraction."""
1112                 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1113
1114         def _real_initialize(self):
1115                 return
1116
1117         def _real_extract(self, url):
1118                 # Extract id from URL
1119                 mobj = re.match(self._VALID_URL, url)
1120                 if mobj is None:
1121                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1122                         return
1123
1124                 video_id = mobj.group(1)
1125
1126                 video_extension = 'flv'
1127
1128                 # Retrieve video webpage to extract further information
1129                 request = urllib2.Request(url)
1130                 try:
1131                         self.report_download_webpage(video_id)
1132                         webpage = urllib2.urlopen(request).read()
1133                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1134                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1135                         return
1136
1137                 # Extract URL, uploader, and title from webpage
1138                 self.report_extraction(video_id)
1139                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1140                 if mobj is None:
1141                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1142                         return
1143                 mediaURL = urllib.unquote(mobj.group(1))
1144
1145                 video_url = mediaURL
1146
1147                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1148                 if mobj is None:
1149                         self._downloader.trouble(u'ERROR: unable to extract title')
1150                         return
1151                 video_title = mobj.group(1).decode('utf-8')
1152                 video_title = sanitize_title(video_title)
1153                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1154
1155                 video_uploader = mobj.group(2).decode('utf-8')
1156
1157                 try:
1158                         # Process video information
1159                         self._downloader.process_info({
1160                                 'id':           video_id.decode('utf-8'),
1161                                 'url':          video_url.decode('utf-8'),
1162                                 'uploader':     video_uploader,
1163                                 'title':        video_title,
1164                                 'stitle':       simple_title,
1165                                 'ext':          video_extension.decode('utf-8'),
1166                                 'format':       u'NA',
1167                         })
1168                 except UnavailableFormatError:
1169                         self._downloader.trouble(u'ERROR: format not available for video')
1170
1171
1172 class YahooIE(InfoExtractor):
1173         """Information extractor for video.yahoo.com."""
1174
1175         # _VALID_URL matches all Yahoo! Video URLs
1176         # _VPAGE_URL matches only the extractable '/watch/' URLs
1177         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1178         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1179
1180         def __init__(self, downloader=None):
1181                 InfoExtractor.__init__(self, downloader)
1182
1183         @staticmethod
1184         def suitable(url):
1185                 return (re.match(YahooIE._VALID_URL, url) is not None)
1186
1187         def report_download_webpage(self, video_id):
1188                 """Report webpage download."""
1189                 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1190
1191         def report_extraction(self, video_id):
1192                 """Report information extraction."""
1193                 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1194
1195         def _real_initialize(self):
1196                 return
1197
1198         def _real_extract(self, url):
1199                 # Extract ID from URL
1200                 mobj = re.match(self._VALID_URL, url)
1201                 if mobj is None:
1202                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1203                         return
1204
1205                 video_id = mobj.group(2)
1206                 video_extension = 'flv'
1207
1208                 # Rewrite valid but non-extractable URLs as
1209                 # extractable English language /watch/ URLs
1210                 if re.match(self._VPAGE_URL, url) is None:
1211                         request = urllib2.Request(url)
1212                         try:
1213                                 webpage = urllib2.urlopen(request).read()
1214                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1215                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1216                                 return
1217
1218                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1219                         if mobj is None:
1220                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1221                                 return
1222                         yahoo_id = mobj.group(1)
1223
1224                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1225                         if mobj is None:
1226                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1227                                 return
1228                         yahoo_vid = mobj.group(1)
1229
1230                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1231                         return self._real_extract(url)
1232
1233                 # Retrieve video webpage to extract further information
1234                 request = urllib2.Request(url)
1235                 try:
1236                         self.report_download_webpage(video_id)
1237                         webpage = urllib2.urlopen(request).read()
1238                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1239                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1240                         return
1241
1242                 # Extract uploader and title from webpage
1243                 self.report_extraction(video_id)
1244                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1245                 if mobj is None:
1246                         self._downloader.trouble(u'ERROR: unable to extract video title')
1247                         return
1248                 video_title = mobj.group(1).decode('utf-8')
1249                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1250
1251                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1252                 if mobj is None:
1253                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1254                         return
1255                 video_uploader = mobj.group(1).decode('utf-8')
1256
1257                 # Extract video height and width
1258                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1259                 if mobj is None:
1260                         self._downloader.trouble(u'ERROR: unable to extract video height')
1261                         return
1262                 yv_video_height = mobj.group(1)
1263
1264                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1265                 if mobj is None:
1266                         self._downloader.trouble(u'ERROR: unable to extract video width')
1267                         return
1268                 yv_video_width = mobj.group(1)
1269
1270                 # Retrieve video playlist to extract media URL
1271                 # I'm not completely sure what all these options are, but we
1272                 # seem to need most of them, otherwise the server sends a 401.
1273                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1274                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1275                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1276                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1277                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1278                 try:
1279                         self.report_download_webpage(video_id)
1280                         webpage = urllib2.urlopen(request).read()
1281                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1282                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1283                         return
1284
1285                 # Extract media URL from playlist XML
1286                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1287                 if mobj is None:
1288                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1289                         return
1290                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1291                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1292
1293                 try:
1294                         # Process video information
1295                         self._downloader.process_info({
1296                                 'id':           video_id.decode('utf-8'),
1297                                 'url':          video_url,
1298                                 'uploader':     video_uploader,
1299                                 'title':        video_title,
1300                                 'stitle':       simple_title,
1301                                 'ext':          video_extension.decode('utf-8'),
1302                         })
1303                 except UnavailableFormatError:
1304                         self._downloader.trouble(u'ERROR: format not available for video')
1305
1306
1307 class GenericIE(InfoExtractor):
1308         """Generic last-resort information extractor."""
1309
1310         def __init__(self, downloader=None):
1311                 InfoExtractor.__init__(self, downloader)
1312
1313         @staticmethod
1314         def suitable(url):
1315                 return True
1316
1317         def report_download_webpage(self, video_id):
1318                 """Report webpage download."""
1319                 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1320                 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1321
1322         def report_extraction(self, video_id):
1323                 """Report information extraction."""
1324                 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1325
1326         def _real_initialize(self):
1327                 return
1328
1329         def _real_extract(self, url):
1330                 video_id = url.split('/')[-1]
1331                 request = urllib2.Request(url)
1332                 try:
1333                         self.report_download_webpage(video_id)
1334                         webpage = urllib2.urlopen(request).read()
1335                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1336                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1337                         return
1338                 except ValueError, err:
1339                         # since this is the last-resort InfoExtractor, if
1340                         # this error is thrown, it'll be thrown here
1341                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1342                         return
1343
1344                 # Start with something easy: JW Player in SWFObject
1345                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1346                 if mobj is None:
1347                         # Broaden the search a little bit
1348                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1349                 if mobj is None:
1350                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1351                         return
1352
1353                 # It's possible that one of the regexes
1354                 # matched, but returned an empty group:
1355                 if mobj.group(1) is None:
1356                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1357                         return
1358
1359                 video_url = urllib.unquote(mobj.group(1))
1360                 video_id  = os.path.basename(video_url)
1361
1362                 # here's a fun little line of code for you:
1363                 video_extension = os.path.splitext(video_id)[1][1:]
1364                 video_id        = os.path.splitext(video_id)[0]
1365
1366                 # it's tempting to parse this further, but you would
1367                 # have to take into account all the variations like
1368                 #   Video Title - Site Name
1369                 #   Site Name | Video Title
1370                 #   Video Title - Tagline | Site Name
1371                 # and so on and so forth; it's just not practical
1372                 mobj = re.search(r'<title>(.*)</title>', webpage)
1373                 if mobj is None:
1374                         self._downloader.trouble(u'ERROR: unable to extract title')
1375                         return
1376                 video_title = mobj.group(1).decode('utf-8')
1377                 video_title = sanitize_title(video_title)
1378                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1379
1380                 # video uploader is domain name
1381                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1382                 if mobj is None:
1383                         self._downloader.trouble(u'ERROR: unable to extract title')
1384                         return
1385                 video_uploader = mobj.group(1).decode('utf-8')
1386
1387                 try:
1388                         # Process video information
1389                         self._downloader.process_info({
1390                                 'id':           video_id.decode('utf-8'),
1391                                 'url':          video_url.decode('utf-8'),
1392                                 'uploader':     video_uploader,
1393                                 'title':        video_title,
1394                                 'stitle':       simple_title,
1395                                 'ext':          video_extension.decode('utf-8'),
1396                                 'format':       u'NA',
1397                         })
1398                 except UnavailableFormatError:
1399                         self._downloader.trouble(u'ERROR: format not available for video')
1400
1401
1402 class YoutubeSearchIE(InfoExtractor):
1403         """Information Extractor for YouTube search queries."""
1404         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1405         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1406         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1407         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1408         _youtube_ie = None
1409         _max_youtube_results = 1000
1410
1411         def __init__(self, youtube_ie, downloader=None):
1412                 InfoExtractor.__init__(self, downloader)
1413                 self._youtube_ie = youtube_ie
1414
1415         @staticmethod
1416         def suitable(url):
1417                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1418
1419         def report_download_page(self, query, pagenum):
1420                 """Report attempt to download playlist page with given number."""
1421                 query = query.decode(preferredencoding())
1422                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1423
1424         def _real_initialize(self):
1425                 self._youtube_ie.initialize()
1426
1427         def _real_extract(self, query):
1428                 mobj = re.match(self._VALID_QUERY, query)
1429                 if mobj is None:
1430                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1431                         return
1432
1433                 prefix, query = query.split(':')
1434                 prefix = prefix[8:]
1435                 query  = query.encode('utf-8')
1436                 if prefix == '':
1437                         self._download_n_results(query, 1)
1438                         return
1439                 elif prefix == 'all':
1440                         self._download_n_results(query, self._max_youtube_results)
1441                         return
1442                 else:
1443                         try:
1444                                 n = long(prefix)
1445                                 if n <= 0:
1446                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1447                                         return
1448                                 elif n > self._max_youtube_results:
1449                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1450                                         n = self._max_youtube_results
1451                                 self._download_n_results(query, n)
1452                                 return
1453                         except ValueError: # parsing prefix as integer fails
1454                                 self._download_n_results(query, 1)
1455                                 return
1456
1457         def _download_n_results(self, query, n):
1458                 """Downloads a specified number of results for a query"""
1459
1460                 video_ids = []
1461                 already_seen = set()
1462                 pagenum = 1
1463
1464                 while True:
1465                         self.report_download_page(query, pagenum)
1466                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1467                         request = urllib2.Request(result_url, None, std_headers)
1468                         try:
1469                                 page = urllib2.urlopen(request).read()
1470                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1471                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1472                                 return
1473
1474                         # Extract video identifiers
1475                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1476                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1477                                 if video_id not in already_seen:
1478                                         video_ids.append(video_id)
1479                                         already_seen.add(video_id)
1480                                         if len(video_ids) == n:
1481                                                 # Specified n videos reached
1482                                                 for id in video_ids:
1483                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1484                                                 return
1485
1486                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1487                                 for id in video_ids:
1488                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1489                                 return
1490
1491                         pagenum = pagenum + 1
1492
1493 class YoutubePlaylistIE(InfoExtractor):
1494         """Information Extractor for YouTube playlists."""
1495
1496         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1497         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1498         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1499         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1500         _youtube_ie = None
1501
1502         def __init__(self, youtube_ie, downloader=None):
1503                 InfoExtractor.__init__(self, downloader)
1504                 self._youtube_ie = youtube_ie
1505
1506         @staticmethod
1507         def suitable(url):
1508                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1509
1510         def report_download_page(self, playlist_id, pagenum):
1511                 """Report attempt to download playlist page with given number."""
1512                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1513
1514         def _real_initialize(self):
1515                 self._youtube_ie.initialize()
1516
1517         def _real_extract(self, url):
1518                 # Extract playlist id
1519                 mobj = re.match(self._VALID_URL, url)
1520                 if mobj is None:
1521                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1522                         return
1523
1524                 # Download playlist pages
1525                 playlist_id = mobj.group(1)
1526                 video_ids = []
1527                 pagenum = 1
1528
1529                 while True:
1530                         self.report_download_page(playlist_id, pagenum)
1531                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1532                         try:
1533                                 page = urllib2.urlopen(request).read()
1534                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1535                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1536                                 return
1537
1538                         # Extract video identifiers
1539                         ids_in_page = []
1540                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1541                                 if mobj.group(1) not in ids_in_page:
1542                                         ids_in_page.append(mobj.group(1))
1543                         video_ids.extend(ids_in_page)
1544
1545                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1546                                 break
1547                         pagenum = pagenum + 1
1548
1549                 for id in video_ids:
1550                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1551                 return
1552
1553 class YoutubeUserIE(InfoExtractor):
1554         """Information Extractor for YouTube users."""
1555
1556         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1557         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1558         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1559         _youtube_ie = None
1560
1561         def __init__(self, youtube_ie, downloader=None):
1562                 InfoExtractor.__init__(self, downloader)
1563                 self._youtube_ie = youtube_ie
1564
1565         @staticmethod
1566         def suitable(url):
1567                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1568
1569         def report_download_page(self, username):
1570                 """Report attempt to download user page."""
1571                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1572
1573         def _real_initialize(self):
1574                 self._youtube_ie.initialize()
1575
1576         def _real_extract(self, url):
1577                 # Extract username
1578                 mobj = re.match(self._VALID_URL, url)
1579                 if mobj is None:
1580                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1581                         return
1582
1583                 # Download user page
1584                 username = mobj.group(1)
1585                 video_ids = []
1586                 pagenum = 1
1587
1588                 self.report_download_page(username)
1589                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1590                 try:
1591                         page = urllib2.urlopen(request).read()
1592                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1593                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1594                         return
1595
1596                 # Extract video identifiers
1597                 ids_in_page = []
1598
1599                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1600                         if mobj.group(1) not in ids_in_page:
1601                                 ids_in_page.append(mobj.group(1))
1602                 video_ids.extend(ids_in_page)
1603
1604                 for id in video_ids:
1605                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1606                 return
1607
1608 class PostProcessor(object):
1609         """Post Processor class.
1610
1611         PostProcessor objects can be added to downloaders with their
1612         add_post_processor() method. When the downloader has finished a
1613         successful download, it will take its internal chain of PostProcessors
1614         and start calling the run() method on each one of them, first with
1615         an initial argument and then with the returned value of the previous
1616         PostProcessor.
1617
1618         The chain will be stopped if one of them ever returns None or the end
1619         of the chain is reached.
1620
1621         PostProcessor objects follow a "mutual registration" process similar
1622         to InfoExtractor objects.
1623         """
1624
1625         _downloader = None
1626
1627         def __init__(self, downloader=None):
1628                 self._downloader = downloader
1629
1630         def set_downloader(self, downloader):
1631                 """Sets the downloader for this PP."""
1632                 self._downloader = downloader
1633
1634         def run(self, information):
1635                 """Run the PostProcessor.
1636
1637                 The "information" argument is a dictionary like the ones
1638                 composed by InfoExtractors. The only difference is that this
1639                 one has an extra field called "filepath" that points to the
1640                 downloaded file.
1641
1642                 When this method returns None, the postprocessing chain is
1643                 stopped. However, this method may return an information
1644                 dictionary that will be passed to the next postprocessing
1645                 object in the chain. It can be the one it received after
1646                 changing some fields.
1647
1648                 In addition, this method may raise a PostProcessingError
1649                 exception that will be taken into account by the downloader
1650                 it was called from.
1651                 """
1652                 return information # by default, do nothing
1653
1654 ### MAIN PROGRAM ###
1655 if __name__ == '__main__':
1656         try:
1657                 # Modules needed only when running the main program
1658                 import getpass
1659                 import optparse
1660
1661                 # Function to update the program file with the latest version from bitbucket.org
1662                 def update_self(downloader, filename):
1663                         # Note: downloader only used for options
1664                         if not os.access (filename, os.W_OK):
1665                                 sys.exit('ERROR: no write permissions on %s' % filename)
1666
1667                         downloader.to_stdout('Updating to latest stable version...')
1668                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1669                         latest_version = urllib.urlopen(latest_url).read().strip()
1670                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1671                         newcontent = urllib.urlopen(prog_url).read()
1672                         stream = open(filename, 'w')
1673                         stream.write(newcontent)
1674                         stream.close()
1675                         downloader.to_stdout('Updated to version %s' % latest_version)
1676
1677                 # General configuration
1678                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1679                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1680                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1681
1682                 # Parse command line
1683                 parser = optparse.OptionParser(
1684                         usage='Usage: %prog [options] url...',
1685                         version='2010.04.02',
1686                         conflict_handler='resolve',
1687                 )
1688
1689                 parser.add_option('-h', '--help',
1690                                 action='help', help='print this help text and exit')
1691                 parser.add_option('-v', '--version',
1692                                 action='version', help='print program version and exit')
1693                 parser.add_option('-U', '--update',
1694                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1695                 parser.add_option('-i', '--ignore-errors',
1696                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1697                 parser.add_option('-r', '--rate-limit',
1698                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1699
1700                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1701                 authentication.add_option('-u', '--username',
1702                                 dest='username', metavar='UN', help='account username')
1703                 authentication.add_option('-p', '--password',
1704                                 dest='password', metavar='PW', help='account password')
1705                 authentication.add_option('-n', '--netrc',
1706                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1707                 parser.add_option_group(authentication)
1708
1709                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1710                 video_format.add_option('-f', '--format',
1711                                 action='store', dest='format', metavar='FMT', help='video format code')
1712                 video_format.add_option('-b', '--best-quality',
1713                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1714                 video_format.add_option('-m', '--mobile-version',
1715                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1716                 video_format.add_option('-d', '--high-def',
1717                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1718                 video_format.add_option('--all-formats',
1719                                 action='store_const', dest='format', help='download all available video formats', const='-1')
1720                 parser.add_option_group(video_format)
1721
1722                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1723                 verbosity.add_option('-q', '--quiet',
1724                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1725                 verbosity.add_option('-s', '--simulate',
1726                                 action='store_true', dest='simulate', help='do not download video', default=False)
1727                 verbosity.add_option('-g', '--get-url',
1728                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1729                 verbosity.add_option('-e', '--get-title',
1730                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1731                 verbosity.add_option('--no-progress',
1732                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
1733                 parser.add_option_group(verbosity)
1734
1735                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1736                 filesystem.add_option('-t', '--title',
1737                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1738                 filesystem.add_option('-l', '--literal',
1739                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1740                 filesystem.add_option('-o', '--output',
1741                                 dest='outtmpl', metavar='TPL', help='output filename template')
1742                 filesystem.add_option('-a', '--batch-file',
1743                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1744                 filesystem.add_option('-w', '--no-overwrites',
1745                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1746                 filesystem.add_option('-c', '--continue',
1747                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1748                 parser.add_option_group(filesystem)
1749
1750                 (opts, args) = parser.parse_args()
1751
1752                 # Batch file verification
1753                 batchurls = []
1754                 if opts.batchfile is not None:
1755                         try:
1756                                 batchurls = open(opts.batchfile, 'r').readlines()
1757                                 batchurls = [x.strip() for x in batchurls]
1758                                 batchurls = [x for x in batchurls if len(x) > 0]
1759                         except IOError:
1760                                 sys.exit(u'ERROR: batch file could not be read')
1761                 all_urls = batchurls + args
1762
1763                 # Conflicting, missing and erroneous options
1764                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1765                         parser.error(u'using .netrc conflicts with giving username/password')
1766                 if opts.password is not None and opts.username is None:
1767                         parser.error(u'account username missing')
1768                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1769                         parser.error(u'using output template conflicts with using title or literal title')
1770                 if opts.usetitle and opts.useliteral:
1771                         parser.error(u'using title conflicts with using literal title')
1772                 if opts.username is not None and opts.password is None:
1773                         opts.password = getpass.getpass(u'Type account password and press return:')
1774                 if opts.ratelimit is not None:
1775                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1776                         if numeric_limit is None:
1777                                 parser.error(u'invalid rate limit specified')
1778                         opts.ratelimit = numeric_limit
1779
1780                 # Information extractors
1781                 youtube_ie = YoutubeIE()
1782                 metacafe_ie = MetacafeIE(youtube_ie)
1783                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1784                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1785                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1786                 google_ie = GoogleIE()
1787                 photobucket_ie = PhotobucketIE()
1788                 yahoo_ie = YahooIE()
1789                 generic_ie = GenericIE()
1790
1791                 # File downloader
1792                 fd = FileDownloader({
1793                         'usenetrc': opts.usenetrc,
1794                         'username': opts.username,
1795                         'password': opts.password,
1796                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1797                         'forceurl': opts.geturl,
1798                         'forcetitle': opts.gettitle,
1799                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1800                         'format': opts.format,
1801                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1802                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
1803                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
1804                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
1805                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1806                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1807                                 or u'%(id)s.%(ext)s'),
1808                         'ignoreerrors': opts.ignoreerrors,
1809                         'ratelimit': opts.ratelimit,
1810                         'nooverwrites': opts.nooverwrites,
1811                         'continuedl': opts.continue_dl,
1812                         'noprogress': opts.noprogress,
1813                         })
1814                 fd.add_info_extractor(youtube_search_ie)
1815                 fd.add_info_extractor(youtube_pl_ie)
1816                 fd.add_info_extractor(youtube_user_ie)
1817                 fd.add_info_extractor(metacafe_ie)
1818                 fd.add_info_extractor(youtube_ie)
1819                 fd.add_info_extractor(google_ie)
1820                 fd.add_info_extractor(photobucket_ie)
1821                 fd.add_info_extractor(yahoo_ie)
1822
1823                 # This must come last since it's the
1824                 # fallback if none of the others work
1825                 fd.add_info_extractor(generic_ie)
1826
1827                 # Update version
1828                 if opts.update_self:
1829                         update_self(fd, sys.argv[0])
1830
1831                 # Maybe do nothing
1832                 if len(all_urls) < 1:
1833                         if not opts.update_self:
1834                                 parser.error(u'you must provide at least one URL')
1835                         else:
1836                                 sys.exit()
1837                 retcode = fd.download(all_urls)
1838                 sys.exit(retcode)
1839
1840         except DownloadError:
1841                 sys.exit(1)
1842         except SameFileError:
1843                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1844         except KeyboardInterrupt:
1845                 sys.exit(u'\nERROR: Interrupted by user')