_ Git - youtube-dl/blob - youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 __authors__  = (
   5         'Ricardo Garcia Gonzalez',
   6         'Danny Colligan',
   7         'Benjamin Johnson',
   8         'Vasyl\' Vavrychuk',
   9         'Witold Baryluk',
  10         'Paweł Paprota',
  11         'Gergely Imreh',
  12         'Rogério Brito',
  13         'Philipp Hagemeister',
  14         'Sören Schulze',
  15         'Kevin Ngo',
  16         'Ori Avtalion',
  17         'shizeeg',
  18         )
  19
  20 __license__ = 'Public Domain'
  21 __version__ = '2012.02.27'
  22
  23 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  24
  25
  26 import cookielib
  27 import datetime
  28 import getpass
  29 import gzip
  30 import htmlentitydefs
  31 import HTMLParser
  32 import httplib
  33 import locale
  34 import math
  35 import netrc
  36 import optparse
  37 import os
  38 import os.path
  39 import re
  40 import shlex
  41 import socket
  42 import string
  43 import subprocess
  44 import sys
  45 import time
  46 import urllib
  47 import urllib2
  48 import warnings
  49 import zlib
  50
  51 if os.name == 'nt':
  52         import ctypes
  53
  54 try:
  55         import email.utils
  56 except ImportError: # Python 2.4
  57         import email.Utils
  58 try:
  59         import cStringIO as StringIO
  60 except ImportError:
  61         import StringIO
  62
  63 # parse_qs was moved from the cgi module to the urlparse module recently.
  64 try:
  65         from urlparse import parse_qs
  66 except ImportError:
  67         from cgi import parse_qs
  68
  69 try:
  70         import lxml.etree
  71 except ImportError:
  72         pass # Handled below
  73
  74 try:
  75         import xml.etree.ElementTree
  76 except ImportError: # Python<2.5: Not officially supported, but let it slip
  77         warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
  78
  79 std_headers = {
  80         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
  81         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  82         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  83         'Accept-Encoding': 'gzip, deflate',
  84         'Accept-Language': 'en-us,en;q=0.5',
  85 }
  86
  87 try:
  88         import json
  89 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
  90         import re
  91         class json(object):
  92                 @staticmethod
  93                 def loads(s):
  94                         s = s.decode('UTF-8')
  95                         def raiseError(msg, i):
  96                                 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
  97                         def skipSpace(i, expectMore=True):
  98                                 while i < len(s) and s[i] in ' \t\r\n':
  99                                         i += 1
 100                                 if expectMore:
 101                                         if i >= len(s):
 102                                                 raiseError('Premature end', i)
 103                                 return i
 104                         def decodeEscape(match):
 105                                 esc = match.group(1)
 106                                 _STATIC = {
 107                                         '"': '"',
 108                                         '\\': '\\',
 109                                         '/': '/',
 110                                         'b': unichr(0x8),
 111                                         'f': unichr(0xc),
 112                                         'n': '\n',
 113                                         'r': '\r',
 114                                         't': '\t',
 115                                 }
 116                                 if esc in _STATIC:
 117                                         return _STATIC[esc]
 118                                 if esc[0] == 'u':
 119                                         if len(esc) == 1+4:
 120                                                 return unichr(int(esc[1:5], 16))
 121                                         if len(esc) == 5+6 and esc[5:7] == '\\u':
 122                                                 hi = int(esc[1:5], 16)
 123                                                 low = int(esc[7:11], 16)
 124                                                 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
 125                                 raise ValueError('Unknown escape ' + str(esc))
 126                         def parseString(i):
 127                                 i += 1
 128                                 e = i
 129                                 while True:
 130                                         e = s.index('"', e)
 131                                         bslashes = 0
 132                                         while s[e-bslashes-1] == '\\':
 133                                                 bslashes += 1
 134                                         if bslashes % 2 == 1:
 135                                                 e += 1
 136                                                 continue
 137                                         break
 138                                 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
 139                                 stri = rexp.sub(decodeEscape, s[i:e])
 140                                 return (e+1,stri)
 141                         def parseObj(i):
 142                                 i += 1
 143                                 res = {}
 144                                 i = skipSpace(i)
 145                                 if s[i] == '}': # Empty dictionary
 146                                         return (i+1,res)
 147                                 while True:
 148                                         if s[i] != '"':
 149                                                 raiseError('Expected a string object key', i)
 150                                         i,key = parseString(i)
 151                                         i = skipSpace(i)
 152                                         if i >= len(s) or s[i] != ':':
 153                                                 raiseError('Expected a colon', i)
 154                                         i,val = parse(i+1)
 155                                         res[key] = val
 156                                         i = skipSpace(i)
 157                                         if s[i] == '}':
 158                                                 return (i+1, res)
 159                                         if s[i] != ',':
 160                                                 raiseError('Expected comma or closing curly brace', i)
 161                                         i = skipSpace(i+1)
 162                         def parseArray(i):
 163                                 res = []
 164                                 i = skipSpace(i+1)
 165                                 if s[i] == ']': # Empty array
 166                                         return (i+1,res)
 167                                 while True:
 168                                         i,val = parse(i)
 169                                         res.append(val)
 170                                         i = skipSpace(i) # Raise exception if premature end
 171                                         if s[i] == ']':
 172                                                 return (i+1, res)
 173                                         if s[i] != ',':
 174                                                 raiseError('Expected a comma or closing bracket', i)
 175                                         i = skipSpace(i+1)
 176                         def parseDiscrete(i):
 177                                 for k,v in {'true': True, 'false': False, 'null': None}.items():
 178                                         if s.startswith(k, i):
 179                                                 return (i+len(k), v)
 180                                 raiseError('Not a boolean (or null)', i)
 181                         def parseNumber(i):
 182                                 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
 183                                 if mobj is None:
 184                                         raiseError('Not a number', i)
 185                                 nums = mobj.group(1)
 186                                 if '.' in nums or 'e' in nums or 'E' in nums:
 187                                         return (i+len(nums), float(nums))
 188                                 return (i+len(nums), int(nums))
 189                         CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
 190                         def parse(i):
 191                                 i = skipSpace(i)
 192                                 i,res = CHARMAP.get(s[i], parseNumber)(i)
 193                                 i = skipSpace(i, False)
 194                                 return (i,res)
 195                         i,res = parse(0)
 196                         if i < len(s):
 197                                 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
 198                         return res
 199
 200 def preferredencoding():
 201         """Get preferred encoding.
 202
 203         Returns the best encoding scheme for the system, based on
 204         locale.getpreferredencoding() and some further tweaks.
 205         """
 206         def yield_preferredencoding():
 207                 try:
 208                         pref = locale.getpreferredencoding()
 209                         u'TEST'.encode(pref)
 210                 except:
 211                         pref = 'UTF-8'
 212                 while True:
 213                         yield pref
 214         return yield_preferredencoding().next()
 215
 216
 217 def htmlentity_transform(matchobj):
 218         """Transforms an HTML entity to a Unicode character.
 219
 220         This function receives a match object and is intended to be used with
 221         the re.sub() function.
 222         """
 223         entity = matchobj.group(1)
 224
 225         # Known non-numeric HTML entity
 226         if entity in htmlentitydefs.name2codepoint:
 227                 return unichr(htmlentitydefs.name2codepoint[entity])
 228
 229         # Unicode character
 230         mobj = re.match(ur'(?u)#(x?\d+)', entity)
 231         if mobj is not None:
 232                 numstr = mobj.group(1)
 233                 if numstr.startswith(u'x'):
 234                         base = 16
 235                         numstr = u'0%s' % numstr
 236                 else:
 237                         base = 10
 238                 return unichr(long(numstr, base))
 239
 240         # Unknown entity in name, return its literal representation
 241         return (u'&%s;' % entity)
 242
 243
 244 def sanitize_title(utitle):
 245         """Sanitizes a video title so it could be used as part of a filename."""
 246         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
 247         return utitle.replace(unicode(os.sep), u'%')
 248
 249
 250 def sanitize_open(filename, open_mode):
 251         """Try to open the given filename, and slightly tweak it if this fails.
 252
 253         Attempts to open the given filename. If this fails, it tries to change
 254         the filename slightly, step by step, until it's either able to open it
 255         or it fails and raises a final exception, like the standard open()
 256         function.
 257
 258         It returns the tuple (stream, definitive_file_name).
 259         """
 260         try:
 261                 if filename == u'-':
 262                         if sys.platform == 'win32':
 263                                 import msvcrt
 264                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 265                         return (sys.stdout, filename)
 266                 stream = open(_encodeFilename(filename), open_mode)
 267                 return (stream, filename)
 268         except (IOError, OSError), err:
 269                 # In case of error, try to remove win32 forbidden chars
 270                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 271
 272                 # An exception here should be caught in the caller
 273                 stream = open(_encodeFilename(filename), open_mode)
 274                 return (stream, filename)
 275
 276
 277 def timeconvert(timestr):
 278         """Convert RFC 2822 defined time string into system timestamp"""
 279         timestamp = None
 280         timetuple = email.utils.parsedate_tz(timestr)
 281         if timetuple is not None:
 282                 timestamp = email.utils.mktime_tz(timetuple)
 283         return timestamp
 284
 285 def _simplify_title(title):
 286         expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
 287         return expr.sub(u'_', title).strip(u'_')
 288
 289 def _orderedSet(iterable):
 290         """ Remove all duplicates from the input iterable """
 291         res = []
 292         for el in iterable:
 293                 if el not in res:
 294                         res.append(el)
 295         return res
 296
 297 def _unescapeHTML(s):
 298         """
 299         @param s a string (of type unicode)
 300         """
 301         assert type(s) == type(u'')
 302
 303         htmlParser = HTMLParser.HTMLParser()
 304         return htmlParser.unescape(s)
 305
 306 def _encodeFilename(s):
 307         """
 308         @param s The name of the file (of type unicode)
 309         """
 310
 311         assert type(s) == type(u'')
 312
 313         if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
 314                 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 315                 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 316                 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 317                 return s
 318         else:
 319                 return s.encode(sys.getfilesystemencoding(), 'ignore')
 320
 321 class DownloadError(Exception):
 322         """Download Error exception.
 323
 324         This exception may be thrown by FileDownloader objects if they are not
 325         configured to continue on errors. They will contain the appropriate
 326         error message.
 327         """
 328         pass
 329
 330
 331 class SameFileError(Exception):
 332         """Same File exception.
 333
 334         This exception will be thrown by FileDownloader objects if they detect
 335         multiple files would have to be downloaded to the same file on disk.
 336         """
 337         pass
 338
 339
 340 class PostProcessingError(Exception):
 341         """Post Processing exception.
 342
 343         This exception may be raised by PostProcessor's .run() method to
 344         indicate an error in the postprocessing task.
 345         """
 346         pass
 347
 348 class MaxDownloadsReached(Exception):
 349         """ --max-downloads limit has been reached. """
 350         pass
 351
 352
 353 class UnavailableVideoError(Exception):
 354         """Unavailable Format exception.
 355
 356         This exception will be thrown when a video is requested
 357         in a format that is not available for that video.
 358         """
 359         pass
 360
 361
 362 class ContentTooShortError(Exception):
 363         """Content Too Short exception.
 364
 365         This exception may be raised by FileDownloader objects when a file they
 366         download is too small for what the server announced first, indicating
 367         the connection was probably interrupted.
 368         """
 369         # Both in bytes
 370         downloaded = None
 371         expected = None
 372
 373         def __init__(self, downloaded, expected):
 374                 self.downloaded = downloaded
 375                 self.expected = expected
 376
 377
 378 class YoutubeDLHandler(urllib2.HTTPHandler):
 379         """Handler for HTTP requests and responses.
 380
 381         This class, when installed with an OpenerDirector, automatically adds
 382         the standard headers to every HTTP request and handles gzipped and
 383         deflated responses from web servers. If compression is to be avoided in
 384         a particular request, the original request in the program code only has
 385         to include the HTTP header "Youtubedl-No-Compression", which will be
 386         removed before making the real request.
 387
 388         Part of this code was copied from:
 389
 390         http://techknack.net/python-urllib2-handlers/
 391
 392         Andrew Rowls, the author of that code, agreed to release it to the
 393         public domain.
 394         """
 395
 396         @staticmethod
 397         def deflate(data):
 398                 try:
 399                         return zlib.decompress(data, -zlib.MAX_WBITS)
 400                 except zlib.error:
 401                         return zlib.decompress(data)
 402
 403         @staticmethod
 404         def addinfourl_wrapper(stream, headers, url, code):
 405                 if hasattr(urllib2.addinfourl, 'getcode'):
 406                         return urllib2.addinfourl(stream, headers, url, code)
 407                 ret = urllib2.addinfourl(stream, headers, url)
 408                 ret.code = code
 409                 return ret
 410
 411         def http_request(self, req):
 412                 for h in std_headers:
 413                         if h in req.headers:
 414                                 del req.headers[h]
 415                         req.add_header(h, std_headers[h])
 416                 if 'Youtubedl-no-compression' in req.headers:
 417                         if 'Accept-encoding' in req.headers:
 418                                 del req.headers['Accept-encoding']
 419                         del req.headers['Youtubedl-no-compression']
 420                 return req
 421
 422         def http_response(self, req, resp):
 423                 old_resp = resp
 424                 # gzip
 425                 if resp.headers.get('Content-encoding', '') == 'gzip':
 426                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 427                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 428                         resp.msg = old_resp.msg
 429                 # deflate
 430                 if resp.headers.get('Content-encoding', '') == 'deflate':
 431                         gz = StringIO.StringIO(self.deflate(resp.read()))
 432                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 433                         resp.msg = old_resp.msg
 434                 return resp
 435
 436
 437 class FileDownloader(object):
 438         """File Downloader class.
 439
 440         File downloader objects are the ones responsible of downloading the
 441         actual video file and writing it to disk if the user has requested
 442         it, among some other tasks. In most cases there should be one per
 443         program. As, given a video URL, the downloader doesn't know how to
 444         extract all the needed information, task that InfoExtractors do, it
 445         has to pass the URL to one of them.
 446
 447         For this, file downloader objects have a method that allows
 448         InfoExtractors to be registered in a given order. When it is passed
 449         a URL, the file downloader handles it to the first InfoExtractor it
 450         finds that reports being able to handle it. The InfoExtractor extracts
 451         all the information about the video or videos the URL refers to, and
 452         asks the FileDownloader to process the video information, possibly
 453         downloading the video.
 454
 455         File downloaders accept a lot of parameters. In order not to saturate
 456         the object constructor with arguments, it receives a dictionary of
 457         options instead. These options are available through the params
 458         attribute for the InfoExtractors to use. The FileDownloader also
 459         registers itself as the downloader in charge for the InfoExtractors
 460         that are added to it, so this is a "mutual registration".
 461
 462         Available options:
 463
 464         username:         Username for authentication purposes.
 465         password:         Password for authentication purposes.
 466         usenetrc:         Use netrc for authentication instead.
 467         quiet:            Do not print messages to stdout.
 468         forceurl:         Force printing final URL.
 469         forcetitle:       Force printing title.
 470         forcethumbnail:   Force printing thumbnail URL.
 471         forcedescription: Force printing description.
 472         forcefilename:    Force printing final filename.
 473         simulate:         Do not download the video files.
 474         format:           Video format code.
 475         format_limit:     Highest quality format to try.
 476         outtmpl:          Template for output names.
 477         ignoreerrors:     Do not stop on download errors.
 478         ratelimit:        Download speed limit, in bytes/sec.
 479         nooverwrites:     Prevent overwriting files.
 480         retries:          Number of times to retry for HTTP error 5xx
 481         continuedl:       Try to continue downloads if possible.
 482         noprogress:       Do not print the progress bar.
 483         playliststart:    Playlist item to start at.
 484         playlistend:      Playlist item to end at.
 485         matchtitle:       Download only matching titles.
 486         rejecttitle:      Reject downloads for matching titles.
 487         logtostderr:      Log messages to stderr instead of stdout.
 488         consoletitle:     Display progress in console window's titlebar.
 489         nopart:           Do not use temporary .part files.
 490         updatetime:       Use the Last-modified header to set output file timestamps.
 491         writedescription: Write the video description to a .description file
 492         writeinfojson:    Write the video description to a .info.json file
 493         """
 494
 495         params = None
 496         _ies = []
 497         _pps = []
 498         _download_retcode = None
 499         _num_downloads = None
 500         _screen_file = None
 501
 502         def __init__(self, params):
 503                 """Create a FileDownloader object with the given options."""
 504                 self._ies = []
 505                 self._pps = []
 506                 self._download_retcode = 0
 507                 self._num_downloads = 0
 508                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 509                 self.params = params
 510
 511         @staticmethod
 512         def format_bytes(bytes):
 513                 if bytes is None:
 514                         return 'N/A'
 515                 if type(bytes) is str:
 516                         bytes = float(bytes)
 517                 if bytes == 0.0:
 518                         exponent = 0
 519                 else:
 520                         exponent = long(math.log(bytes, 1024.0))
 521                 suffix = 'bkMGTPEZY'[exponent]
 522                 converted = float(bytes) / float(1024 ** exponent)
 523                 return '%.2f%s' % (converted, suffix)
 524
 525         @staticmethod
 526         def calc_percent(byte_counter, data_len):
 527                 if data_len is None:
 528                         return '---.-%'
 529                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 530
 531         @staticmethod
 532         def calc_eta(start, now, total, current):
 533                 if total is None:
 534                         return '--:--'
 535                 dif = now - start
 536                 if current == 0 or dif < 0.001: # One millisecond
 537                         return '--:--'
 538                 rate = float(current) / dif
 539                 eta = long((float(total) - float(current)) / rate)
 540                 (eta_mins, eta_secs) = divmod(eta, 60)
 541                 if eta_mins > 99:
 542                         return '--:--'
 543                 return '%02d:%02d' % (eta_mins, eta_secs)
 544
 545         @staticmethod
 546         def calc_speed(start, now, bytes):
 547                 dif = now - start
 548                 if bytes == 0 or dif < 0.001: # One millisecond
 549                         return '%10s' % '---b/s'
 550                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 551
 552         @staticmethod
 553         def best_block_size(elapsed_time, bytes):
 554                 new_min = max(bytes / 2.0, 1.0)
 555                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 556                 if elapsed_time < 0.001:
 557                         return long(new_max)
 558                 rate = bytes / elapsed_time
 559                 if rate > new_max:
 560                         return long(new_max)
 561                 if rate < new_min:
 562                         return long(new_min)
 563                 return long(rate)
 564
 565         @staticmethod
 566         def parse_bytes(bytestr):
 567                 """Parse a string indicating a byte quantity into a long integer."""
 568                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 569                 if matchobj is None:
 570                         return None
 571                 number = float(matchobj.group(1))
 572                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 573                 return long(round(number * multiplier))
 574
 575         def add_info_extractor(self, ie):
 576                 """Add an InfoExtractor object to the end of the list."""
 577                 self._ies.append(ie)
 578                 ie.set_downloader(self)
 579
 580         def add_post_processor(self, pp):
 581                 """Add a PostProcessor object to the end of the chain."""
 582                 self._pps.append(pp)
 583                 pp.set_downloader(self)
 584
 585         def to_screen(self, message, skip_eol=False):
 586                 """Print message to stdout if not in quiet mode."""
 587                 assert type(message) == type(u'')
 588                 if not self.params.get('quiet', False):
 589                         terminator = [u'\n', u''][skip_eol]
 590                         output = message + terminator
 591
 592                         if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 593                                 output = output.encode(preferredencoding(), 'ignore')
 594                         self._screen_file.write(output)
 595                         self._screen_file.flush()
 596
 597         def to_stderr(self, message):
 598                 """Print message to stderr."""
 599                 print >>sys.stderr, message.encode(preferredencoding())
 600
 601         def to_cons_title(self, message):
 602                 """Set console/terminal window title to message."""
 603                 if not self.params.get('consoletitle', False):
 604                         return
 605                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 606                         # c_wchar_p() might not be necessary if `message` is
 607                         # already of type unicode()
 608                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 609                 elif 'TERM' in os.environ:
 610                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 611
 612         def fixed_template(self):
 613                 """Checks if the output template is fixed."""
 614                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 615
 616         def trouble(self, message=None):
 617                 """Determine action to take when a download problem appears.
 618
 619                 Depending on if the downloader has been configured to ignore
 620                 download errors or not, this method may throw an exception or
 621                 not when errors are found, after printing the message.
 622                 """
 623                 if message is not None:
 624                         self.to_stderr(message)
 625                 if not self.params.get('ignoreerrors', False):
 626                         raise DownloadError(message)
 627                 self._download_retcode = 1
 628
 629         def slow_down(self, start_time, byte_counter):
 630                 """Sleep if the download speed is over the rate limit."""
 631                 rate_limit = self.params.get('ratelimit', None)
 632                 if rate_limit is None or byte_counter == 0:
 633                         return
 634                 now = time.time()
 635                 elapsed = now - start_time
 636                 if elapsed <= 0.0:
 637                         return
 638                 speed = float(byte_counter) / elapsed
 639                 if speed > rate_limit:
 640                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 641
 642         def temp_name(self, filename):
 643                 """Returns a temporary filename for the given filename."""
 644                 if self.params.get('nopart', False) or filename == u'-' or \
 645                                 (os.path.exists(_encodeFilename(filename)) and not os.path.isfile(_encodeFilename(filename))):
 646                         return filename
 647                 return filename + u'.part'
 648
 649         def undo_temp_name(self, filename):
 650                 if filename.endswith(u'.part'):
 651                         return filename[:-len(u'.part')]
 652                 return filename
 653
 654         def try_rename(self, old_filename, new_filename):
 655                 try:
 656                         if old_filename == new_filename:
 657                                 return
 658                         os.rename(_encodeFilename(old_filename), _encodeFilename(new_filename))
 659                 except (IOError, OSError), err:
 660                         self.trouble(u'ERROR: unable to rename file')
 661
 662         def try_utime(self, filename, last_modified_hdr):
 663                 """Try to set the last-modified time of the given file."""
 664                 if last_modified_hdr is None:
 665                         return
 666                 if not os.path.isfile(_encodeFilename(filename)):
 667                         return
 668                 timestr = last_modified_hdr
 669                 if timestr is None:
 670                         return
 671                 filetime = timeconvert(timestr)
 672                 if filetime is None:
 673                         return filetime
 674                 try:
 675                         os.utime(filename, (time.time(), filetime))
 676                 except:
 677                         pass
 678                 return filetime
 679
 680         def report_writedescription(self, descfn):
 681                 """ Report that the description file is being written """
 682                 self.to_screen(u'[info] Writing video description to: ' + descfn)
 683
 684         def report_writeinfojson(self, infofn):
 685                 """ Report that the metadata file has been written """
 686                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 687
 688         def report_destination(self, filename):
 689                 """Report destination filename."""
 690                 self.to_screen(u'[download] Destination: ' + filename)
 691
 692         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 693                 """Report download progress."""
 694                 if self.params.get('noprogress', False):
 695                         return
 696                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 697                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 698                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 699                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 700
 701         def report_resuming_byte(self, resume_len):
 702                 """Report attempt to resume at given byte."""
 703                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 704
 705         def report_retry(self, count, retries):
 706                 """Report retry in case of HTTP error 5xx"""
 707                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 708
 709         def report_file_already_downloaded(self, file_name):
 710                 """Report file has already been fully downloaded."""
 711                 try:
 712                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 713                 except (UnicodeEncodeError), err:
 714                         self.to_screen(u'[download] The file has already been downloaded')
 715
 716         def report_unable_to_resume(self):
 717                 """Report it was impossible to resume download."""
 718                 self.to_screen(u'[download] Unable to resume')
 719
 720         def report_finish(self):
 721                 """Report download finished."""
 722                 if self.params.get('noprogress', False):
 723                         self.to_screen(u'[download] Download completed')
 724                 else:
 725                         self.to_screen(u'')
 726
 727         def increment_downloads(self):
 728                 """Increment the ordinal that assigns a number to each file."""
 729                 self._num_downloads += 1
 730
 731         def prepare_filename(self, info_dict):
 732                 """Generate the output filename."""
 733                 try:
 734                         template_dict = dict(info_dict)
 735                         template_dict['epoch'] = unicode(long(time.time()))
 736                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 737                         filename = self.params['outtmpl'] % template_dict
 738                         return filename
 739                 except (ValueError, KeyError), err:
 740                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 741                         return None
 742
 743         def _match_entry(self, info_dict):
 744                 """ Returns None iff the file should be downloaded """
 745
 746                 title = info_dict['title']
 747                 matchtitle = self.params.get('matchtitle', False)
 748                 if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
 749                         return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 750                 rejecttitle = self.params.get('rejecttitle', False)
 751                 if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
 752                         return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 753                 return None
 754
 755         def process_info(self, info_dict):
 756                 """Process a single dictionary returned by an InfoExtractor."""
 757
 758                 reason = self._match_entry(info_dict)
 759                 if reason is not None:
 760                         self.to_screen(u'[download] ' + reason)
 761                         return
 762
 763                 max_downloads = self.params.get('max_downloads')
 764                 if max_downloads is not None:
 765                         if self._num_downloads > int(max_downloads):
 766                                 raise MaxDownloadsReached()
 767
 768                 filename = self.prepare_filename(info_dict)
 769
 770                 # Forced printings
 771                 if self.params.get('forcetitle', False):
 772                         print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 773                 if self.params.get('forceurl', False):
 774                         print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 775                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 776                         print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 777                 if self.params.get('forcedescription', False) and 'description' in info_dict:
 778                         print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 779                 if self.params.get('forcefilename', False) and filename is not None:
 780                         print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 781                 if self.params.get('forceformat', False):
 782                         print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
 783
 784                 # Do nothing else if in simulate mode
 785                 if self.params.get('simulate', False):
 786                         return
 787
 788                 if filename is None:
 789                         return
 790
 791                 try:
 792                         dn = os.path.dirname(_encodeFilename(filename))
 793                         if dn != '' and not os.path.exists(dn): # dn is already encoded
 794                                 os.makedirs(dn)
 795                 except (OSError, IOError), err:
 796                         self.trouble(u'ERROR: unable to create directory ' + unicode(err))
 797                         return
 798
 799                 if self.params.get('writedescription', False):
 800                         try:
 801                                 descfn = filename + u'.description'
 802                                 self.report_writedescription(descfn)
 803                                 descfile = open(_encodeFilename(descfn), 'wb')
 804                                 try:
 805                                         descfile.write(info_dict['description'].encode('utf-8'))
 806                                 finally:
 807                                         descfile.close()
 808                         except (OSError, IOError):
 809                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 810                                 return
 811
 812                 if self.params.get('writeinfojson', False):
 813                         infofn = filename + u'.info.json'
 814                         self.report_writeinfojson(infofn)
 815                         try:
 816                                 json.dump
 817                         except (NameError,AttributeError):
 818                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 819                                 return
 820                         try:
 821                                 infof = open(_encodeFilename(infofn), 'wb')
 822                                 try:
 823                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
 824                                         json.dump(json_info_dict, infof)
 825                                 finally:
 826                                         infof.close()
 827                         except (OSError, IOError):
 828                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 829                                 return
 830
 831                 if not self.params.get('skip_download', False):
 832                         if self.params.get('nooverwrites', False) and os.path.exists(_encodeFilename(filename)):
 833                                 success = True
 834                         else:
 835                                 try:
 836                                         success = self._do_download(filename, info_dict)
 837                                 except (OSError, IOError), err:
 838                                         raise UnavailableVideoError
 839                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 840                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 841                                         return
 842                                 except (ContentTooShortError, ), err:
 843                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 844                                         return
 845
 846                         if success:
 847                                 try:
 848                                         self.post_process(filename, info_dict)
 849                                 except (PostProcessingError), err:
 850                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
 851                                         return
 852
 853         def download(self, url_list):
 854                 """Download a given list of URLs."""
 855                 if len(url_list) > 1 and self.fixed_template():
 856                         raise SameFileError(self.params['outtmpl'])
 857
 858                 for url in url_list:
 859                         suitable_found = False
 860                         for ie in self._ies:
 861                                 # Go to next InfoExtractor if not suitable
 862                                 if not ie.suitable(url):
 863                                         continue
 864
 865                                 # Suitable InfoExtractor found
 866                                 suitable_found = True
 867
 868                                 # Extract information from URL and process it
 869                                 ie.extract(url)
 870
 871                                 # Suitable InfoExtractor had been found; go to next URL
 872                                 break
 873
 874                         if not suitable_found:
 875                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 876
 877                 return self._download_retcode
 878
 879         def post_process(self, filename, ie_info):
 880                 """Run the postprocessing chain on the given file."""
 881                 info = dict(ie_info)
 882                 info['filepath'] = filename
 883                 for pp in self._pps:
 884                         info = pp.run(info)
 885                         if info is None:
 886                                 break
 887
 888         def _download_with_rtmpdump(self, filename, url, player_url):
 889                 self.report_destination(filename)
 890                 tmpfilename = self.temp_name(filename)
 891
 892                 # Check for rtmpdump first
 893                 try:
 894                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 895                 except (OSError, IOError):
 896                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 897                         return False
 898
 899                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 900                 # the connection was interrumpted and resuming appears to be
 901                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 902                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 903                 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 904                 if self.params.get('verbose', False):
 905                         try:
 906                                 import pipes
 907                                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 908                         except ImportError:
 909                                 shell_quote = repr
 910                         self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 911                 retval = subprocess.call(args)
 912                 while retval == 2 or retval == 1:
 913                         prevsize = os.path.getsize(_encodeFilename(tmpfilename))
 914                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 915                         time.sleep(5.0) # This seems to be needed
 916                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 917                         cursize = os.path.getsize(_encodeFilename(tmpfilename))
 918                         if prevsize == cursize and retval == 1:
 919                                 break
 920                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 921                         if prevsize == cursize and retval == 2 and cursize > 1024:
 922                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 923                                 retval = 0
 924                                 break
 925                 if retval == 0:
 926                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(_encodeFilename(tmpfilename)))
 927                         self.try_rename(tmpfilename, filename)
 928                         return True
 929                 else:
 930                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 931                         return False
 932
 933         def _do_download(self, filename, info_dict):
 934                 url = info_dict['url']
 935                 player_url = info_dict.get('player_url', None)
 936
 937                 # Check file already present
 938                 if self.params.get('continuedl', False) and os.path.isfile(_encodeFilename(filename)) and not self.params.get('nopart', False):
 939                         self.report_file_already_downloaded(filename)
 940                         return True
 941
 942                 # Attempt to download using rtmpdump
 943                 if url.startswith('rtmp'):
 944                         return self._download_with_rtmpdump(filename, url, player_url)
 945
 946                 tmpfilename = self.temp_name(filename)
 947                 stream = None
 948
 949                 # Do not include the Accept-Encoding header
 950                 headers = {'Youtubedl-no-compression': 'True'}
 951                 basic_request = urllib2.Request(url, None, headers)
 952                 request = urllib2.Request(url, None, headers)
 953
 954                 # Establish possible resume length
 955                 if os.path.isfile(_encodeFilename(tmpfilename)):
 956                         resume_len = os.path.getsize(_encodeFilename(tmpfilename))
 957                 else:
 958                         resume_len = 0
 959
 960                 open_mode = 'wb'
 961                 if resume_len != 0:
 962                         if self.params.get('continuedl', False):
 963                                 self.report_resuming_byte(resume_len)
 964                                 request.add_header('Range','bytes=%d-' % resume_len)
 965                                 open_mode = 'ab'
 966                         else:
 967                                 resume_len = 0
 968
 969                 count = 0
 970                 retries = self.params.get('retries', 0)
 971                 while count <= retries:
 972                         # Establish connection
 973                         try:
 974                                 if count == 0 and 'urlhandle' in info_dict:
 975                                         data = info_dict['urlhandle']
 976                                 data = urllib2.urlopen(request)
 977                                 break
 978                         except (urllib2.HTTPError, ), err:
 979                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 980                                         # Unexpected HTTP error
 981                                         raise
 982                                 elif err.code == 416:
 983                                         # Unable to resume (requested range not satisfiable)
 984                                         try:
 985                                                 # Open the connection again without the range header
 986                                                 data = urllib2.urlopen(basic_request)
 987                                                 content_length = data.info()['Content-Length']
 988                                         except (urllib2.HTTPError, ), err:
 989                                                 if err.code < 500 or err.code >= 600:
 990                                                         raise
 991                                         else:
 992                                                 # Examine the reported length
 993                                                 if (content_length is not None and
 994                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
 995                                                         # The file had already been fully downloaded.
 996                                                         # Explanation to the above condition: in issue #175 it was revealed that
 997                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 998                                                         # changing the file size slightly and causing problems for some users. So
 999                                                         # I decided to implement a suggested change and consider the file
1000                                                         # completely downloaded if the file size differs less than 100 bytes from
1001                                                         # the one in the hard drive.
1002                                                         self.report_file_already_downloaded(filename)
1003                                                         self.try_rename(tmpfilename, filename)
1004                                                         return True
1005                                                 else:
1006                                                         # The length does not match, we start the download over
1007                                                         self.report_unable_to_resume()
1008                                                         open_mode = 'wb'
1009                                                         break
1010                         # Retry
1011                         count += 1
1012                         if count <= retries:
1013                                 self.report_retry(count, retries)
1014
1015                 if count > retries:
1016                         self.trouble(u'ERROR: giving up after %s retries' % retries)
1017                         return False
1018
1019                 data_len = data.info().get('Content-length', None)
1020                 if data_len is not None:
1021                         data_len = long(data_len) + resume_len
1022                 data_len_str = self.format_bytes(data_len)
1023                 byte_counter = 0 + resume_len
1024                 block_size = 1024
1025                 start = time.time()
1026                 while True:
1027                         # Download and write
1028                         before = time.time()
1029                         data_block = data.read(block_size)
1030                         after = time.time()
1031                         if len(data_block) == 0:
1032                                 break
1033                         byte_counter += len(data_block)
1034
1035                         # Open file just in time
1036                         if stream is None:
1037                                 try:
1038                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
1039                                         assert stream is not None
1040                                         filename = self.undo_temp_name(tmpfilename)
1041                                         self.report_destination(filename)
1042                                 except (OSError, IOError), err:
1043                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
1044                                         return False
1045                         try:
1046                                 stream.write(data_block)
1047                         except (IOError, OSError), err:
1048                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
1049                                 return False
1050                         block_size = self.best_block_size(after - before, len(data_block))
1051
1052                         # Progress message
1053                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
1054                         if data_len is None:
1055                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
1056                         else:
1057                                 percent_str = self.calc_percent(byte_counter, data_len)
1058                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
1059                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
1060
1061                         # Apply rate limit
1062                         self.slow_down(start, byte_counter - resume_len)
1063
1064                 if stream is None:
1065                         self.trouble(u'\nERROR: Did not get any data blocks')
1066                         return False
1067                 stream.close()
1068                 self.report_finish()
1069                 if data_len is not None and byte_counter != data_len:
1070                         raise ContentTooShortError(byte_counter, long(data_len))
1071                 self.try_rename(tmpfilename, filename)
1072
1073                 # Update file modification time
1074                 if self.params.get('updatetime', True):
1075                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
1076
1077                 return True
1078
1079
1080 class InfoExtractor(object):
1081         """Information Extractor class.
1082
1083         Information extractors are the classes that, given a URL, extract
1084         information from the video (or videos) the URL refers to. This
1085         information includes the real video URL, the video title and simplified
1086         title, author and others. The information is stored in a dictionary
1087         which is then passed to the FileDownloader. The FileDownloader
1088         processes this information possibly downloading the video to the file
1089         system, among other possible outcomes. The dictionaries must include
1090         the following fields:
1091
1092         id:             Video identifier.
1093         url:            Final video URL.
1094         uploader:       Nickname of the video uploader.
1095         title:          Literal title.
1096         stitle:         Simplified title.
1097         ext:            Video filename extension.
1098         format:         Video format.
1099         player_url:     SWF Player URL (may be None).
1100
1101         The following fields are optional. Their primary purpose is to allow
1102         youtube-dl to serve as the backend for a video search function, such
1103         as the one in youtube2mp3.  They are only used when their respective
1104         forced printing functions are called:
1105
1106         thumbnail:      Full URL to a video thumbnail image.
1107         description:    One-line video description.
1108
1109         Subclasses of this one should re-define the _real_initialize() and
1110         _real_extract() methods and define a _VALID_URL regexp.
1111         Probably, they should also be added to the list of extractors.
1112         """
1113
1114         _ready = False
1115         _downloader = None
1116
1117         def __init__(self, downloader=None):
1118                 """Constructor. Receives an optional downloader."""
1119                 self._ready = False
1120                 self.set_downloader(downloader)
1121
1122         def suitable(self, url):
1123                 """Receives a URL and returns True if suitable for this IE."""
1124                 return re.match(self._VALID_URL, url) is not None
1125
1126         def initialize(self):
1127                 """Initializes an instance (authentication, etc)."""
1128                 if not self._ready:
1129                         self._real_initialize()
1130                         self._ready = True
1131
1132         def extract(self, url):
1133                 """Extracts URL information and returns it in list of dicts."""
1134                 self.initialize()
1135                 return self._real_extract(url)
1136
1137         def set_downloader(self, downloader):
1138                 """Sets the downloader for this IE."""
1139                 self._downloader = downloader
1140
1141         def _real_initialize(self):
1142                 """Real initialization process. Redefine in subclasses."""
1143                 pass
1144
1145         def _real_extract(self, url):
1146                 """Real extraction process. Redefine in subclasses."""
1147                 pass
1148
1149
1150 class YoutubeIE(InfoExtractor):
1151         """Information extractor for youtube.com."""
1152
1153         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1154         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
1155         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
1156         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1157         _NETRC_MACHINE = 'youtube'
1158         # Listed in order of quality
1159         _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
1160         _available_formats_prefer_free = ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
1161         _video_extensions = {
1162                 '13': '3gp',
1163                 '17': 'mp4',
1164                 '18': 'mp4',
1165                 '22': 'mp4',
1166                 '37': 'mp4',
1167                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
1168                 '43': 'webm',
1169                 '44': 'webm',
1170                 '45': 'webm',
1171         }
1172         _video_dimensions = {
1173                 '5': '240x400',
1174                 '6': '???',
1175                 '13': '???',
1176                 '17': '144x176',
1177                 '18': '360x640',
1178                 '22': '720x1280',
1179                 '34': '360x640',
1180                 '35': '480x854',
1181                 '37': '1080x1920',
1182                 '38': '3072x4096',
1183                 '43': '360x640',
1184                 '44': '480x854',
1185                 '45': '720x1280',
1186         }
1187         IE_NAME = u'youtube'
1188
1189         def report_lang(self):
1190                 """Report attempt to set language."""
1191                 self._downloader.to_screen(u'[youtube] Setting language')
1192
1193         def report_login(self):
1194                 """Report attempt to log in."""
1195                 self._downloader.to_screen(u'[youtube] Logging in')
1196
1197         def report_age_confirmation(self):
1198                 """Report attempt to confirm age."""
1199                 self._downloader.to_screen(u'[youtube] Confirming age')
1200
1201         def report_video_webpage_download(self, video_id):
1202                 """Report attempt to download video webpage."""
1203                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
1204
1205         def report_video_info_webpage_download(self, video_id):
1206                 """Report attempt to download video info webpage."""
1207                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
1208
1209         def report_information_extraction(self, video_id):
1210                 """Report attempt to extract video information."""
1211                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
1212
1213         def report_unavailable_format(self, video_id, format):
1214                 """Report extracted video URL."""
1215                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
1216
1217         def report_rtmp_download(self):
1218                 """Indicate the download will use the RTMP protocol."""
1219                 self._downloader.to_screen(u'[youtube] RTMP download detected')
1220
1221         def _print_formats(self, formats):
1222                 print 'Available formats:'
1223                 for x in formats:
1224                         print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
1225
1226         def _real_initialize(self):
1227                 if self._downloader is None:
1228                         return
1229
1230                 username = None
1231                 password = None
1232                 downloader_params = self._downloader.params
1233
1234                 # Attempt to use provided username and password or .netrc data
1235                 if downloader_params.get('username', None) is not None:
1236                         username = downloader_params['username']
1237                         password = downloader_params['password']
1238                 elif downloader_params.get('usenetrc', False):
1239                         try:
1240                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1241                                 if info is not None:
1242                                         username = info[0]
1243                                         password = info[2]
1244                                 else:
1245                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1246                         except (IOError, netrc.NetrcParseError), err:
1247                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
1248                                 return
1249
1250                 # Set language
1251                 request = urllib2.Request(self._LANG_URL)
1252                 try:
1253                         self.report_lang()
1254                         urllib2.urlopen(request).read()
1255                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1256                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
1257                         return
1258
1259                 # No authentication to be performed
1260                 if username is None:
1261                         return
1262
1263                 # Log in
1264                 login_form = {
1265                                 'current_form': 'loginForm',
1266                                 'next':         '/',
1267                                 'action_login': 'Log In',
1268                                 'username':     username,
1269                                 'password':     password,
1270                                 }
1271                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
1272                 try:
1273                         self.report_login()
1274                         login_results = urllib2.urlopen(request).read()
1275                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
1276                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
1277                                 return
1278                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1279                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
1280                         return
1281
1282                 # Confirm age
1283                 age_form = {
1284                                 'next_url':             '/',
1285                                 'action_confirm':       'Confirm',
1286                                 }
1287                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
1288                 try:
1289                         self.report_age_confirmation()
1290                         age_results = urllib2.urlopen(request).read()
1291                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1292                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1293                         return
1294
1295         def _real_extract(self, url):
1296                 # Extract video id from URL
1297                 mobj = re.match(self._VALID_URL, url)
1298                 if mobj is None:
1299                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1300                         return
1301                 video_id = mobj.group(2)
1302
1303                 # Get video webpage
1304                 self.report_video_webpage_download(video_id)
1305                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
1306                 try:
1307                         video_webpage = urllib2.urlopen(request).read()
1308                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1309                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1310                         return
1311
1312                 # Attempt to extract SWF player URL
1313                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1314                 if mobj is not None:
1315                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1316                 else:
1317                         player_url = None
1318
1319                 # Get video info
1320                 self.report_video_info_webpage_download(video_id)
1321                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1322                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1323                                         % (video_id, el_type))
1324                         request = urllib2.Request(video_info_url)
1325                         try:
1326                                 video_info_webpage = urllib2.urlopen(request).read()
1327                                 video_info = parse_qs(video_info_webpage)
1328                                 if 'token' in video_info:
1329                                         break
1330                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1331                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1332                                 return
1333                 if 'token' not in video_info:
1334                         if 'reason' in video_info:
1335                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1336                         else:
1337                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1338                         return
1339
1340                 # Start extracting information
1341                 self.report_information_extraction(video_id)
1342
1343                 # uploader
1344                 if 'author' not in video_info:
1345                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1346                         return
1347                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1348
1349                 # title
1350                 if 'title' not in video_info:
1351                         self._downloader.trouble(u'ERROR: unable to extract video title')
1352                         return
1353                 video_title = urllib.unquote_plus(video_info['title'][0])
1354                 video_title = video_title.decode('utf-8')
1355                 video_title = sanitize_title(video_title)
1356
1357                 # simplified title
1358                 simple_title = _simplify_title(video_title)
1359
1360                 # thumbnail image
1361                 if 'thumbnail_url' not in video_info:
1362                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1363                         video_thumbnail = ''
1364                 else:   # don't panic if we can't find it
1365                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1366
1367                 # upload date
1368                 upload_date = u'NA'
1369                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1370                 if mobj is not None:
1371                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1372                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1373                         for expression in format_expressions:
1374                                 try:
1375                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1376                                 except:
1377                                         pass
1378
1379                 # description
1380                 try:
1381                         lxml.etree
1382                 except NameError:
1383                         video_description = u'No description available.'
1384                         mobj = re.search(r'<meta name="description" content="(.*?)">', video_webpage)
1385                         if mobj is not None:
1386                                 video_description = mobj.group(1).decode('utf-8')
1387                 else:
1388                         html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1389                         vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1390                         video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
1391                         # TODO use another parser
1392
1393                 # token
1394                 video_token = urllib.unquote_plus(video_info['token'][0])
1395
1396                 # Decide which formats to download
1397                 req_format = self._downloader.params.get('format', None)
1398
1399                 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1400                         self.report_rtmp_download()
1401                         video_url_list = [(None, video_info['conn'][0])]
1402                 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1403                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1404                         url_data = [parse_qs(uds) for uds in url_data_strs]
1405                         url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
1406                         url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
1407
1408                         format_limit = self._downloader.params.get('format_limit', None)
1409                         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1410                         if format_limit is not None and format_limit in available_formats:
1411                                 format_list = available_formats[available_formats.index(format_limit):]
1412                         else:
1413                                 format_list = available_formats
1414                         existing_formats = [x for x in format_list if x in url_map]
1415                         if len(existing_formats) == 0:
1416                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1417                                 return
1418                         if self._downloader.params.get('listformats', None):
1419                                 self._print_formats(existing_formats)
1420                                 return
1421                         if req_format is None or req_format == 'best':
1422                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1423                         elif req_format == 'worst':
1424                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
1425                         elif req_format in ('-1', 'all'):
1426                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1427                         else:
1428                                 # Specific formats. We pick the first in a slash-delimeted sequence.
1429                                 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1430                                 req_formats = req_format.split('/')
1431                                 video_url_list = None
1432                                 for rf in req_formats:
1433                                         if rf in url_map:
1434                                                 video_url_list = [(rf, url_map[rf])]
1435                                                 break
1436                                 if video_url_list is None:
1437                                         self._downloader.trouble(u'ERROR: requested format not available')
1438                                         return
1439                 else:
1440                         self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
1441                         return
1442
1443                 for format_param, video_real_url in video_url_list:
1444                         # At this point we have a new video
1445                         self._downloader.increment_downloads()
1446
1447                         # Extension
1448                         video_extension = self._video_extensions.get(format_param, 'flv')
1449
1450                         try:
1451                                 # Process video information
1452                                 self._downloader.process_info({
1453                                         'id':           video_id.decode('utf-8'),
1454                                         'url':          video_real_url.decode('utf-8'),
1455                                         'uploader':     video_uploader.decode('utf-8'),
1456                                         'upload_date':  upload_date,
1457                                         'title':        video_title,
1458                                         'stitle':       simple_title,
1459                                         'ext':          video_extension.decode('utf-8'),
1460                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1461                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1462                                         'description':  video_description,
1463                                         'player_url':   player_url,
1464                                 })
1465                         except UnavailableVideoError, err:
1466                                 self._downloader.trouble(u'\nERROR: unable to download video')
1467
1468
1469 class MetacafeIE(InfoExtractor):
1470         """Information Extractor for metacafe.com."""
1471
1472         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1473         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1474         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1475         _youtube_ie = None
1476         IE_NAME = u'metacafe'
1477
1478         def __init__(self, youtube_ie, downloader=None):
1479                 InfoExtractor.__init__(self, downloader)
1480                 self._youtube_ie = youtube_ie
1481
1482         def report_disclaimer(self):
1483                 """Report disclaimer retrieval."""
1484                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1485
1486         def report_age_confirmation(self):
1487                 """Report attempt to confirm age."""
1488                 self._downloader.to_screen(u'[metacafe] Confirming age')
1489
1490         def report_download_webpage(self, video_id):
1491                 """Report webpage download."""
1492                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1493
1494         def report_extraction(self, video_id):
1495                 """Report information extraction."""
1496                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1497
1498         def _real_initialize(self):
1499                 # Retrieve disclaimer
1500                 request = urllib2.Request(self._DISCLAIMER)
1501                 try:
1502                         self.report_disclaimer()
1503                         disclaimer = urllib2.urlopen(request).read()
1504                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1505                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1506                         return
1507
1508                 # Confirm age
1509                 disclaimer_form = {
1510                         'filters': '0',
1511                         'submit': "Continue - I'm over 18",
1512                         }
1513                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1514                 try:
1515                         self.report_age_confirmation()
1516                         disclaimer = urllib2.urlopen(request).read()
1517                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1518                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1519                         return
1520
1521         def _real_extract(self, url):
1522                 # Extract id and simplified title from URL
1523                 mobj = re.match(self._VALID_URL, url)
1524                 if mobj is None:
1525                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1526                         return
1527
1528                 video_id = mobj.group(1)
1529
1530                 # Check if video comes from YouTube
1531                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1532                 if mobj2 is not None:
1533                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1534                         return
1535
1536                 # At this point we have a new video
1537                 self._downloader.increment_downloads()
1538
1539                 simple_title = mobj.group(2).decode('utf-8')
1540
1541                 # Retrieve video webpage to extract further information
1542                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1543                 try:
1544                         self.report_download_webpage(video_id)
1545                         webpage = urllib2.urlopen(request).read()
1546                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1547                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1548                         return
1549
1550                 # Extract URL, uploader and title from webpage
1551                 self.report_extraction(video_id)
1552                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1553                 if mobj is not None:
1554                         mediaURL = urllib.unquote(mobj.group(1))
1555                         video_extension = mediaURL[-3:]
1556
1557                         # Extract gdaKey if available
1558                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1559                         if mobj is None:
1560                                 video_url = mediaURL
1561                         else:
1562                                 gdaKey = mobj.group(1)
1563                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1564                 else:
1565                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1566                         if mobj is None:
1567                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1568                                 return
1569                         vardict = parse_qs(mobj.group(1))
1570                         if 'mediaData' not in vardict:
1571                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1572                                 return
1573                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1574                         if mobj is None:
1575                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1576                                 return
1577                         mediaURL = mobj.group(1).replace('\\/', '/')
1578                         video_extension = mediaURL[-3:]
1579                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1580
1581                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1582                 if mobj is None:
1583                         self._downloader.trouble(u'ERROR: unable to extract title')
1584                         return
1585                 video_title = mobj.group(1).decode('utf-8')
1586                 video_title = sanitize_title(video_title)
1587
1588                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1589                 if mobj is None:
1590                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1591                         return
1592                 video_uploader = mobj.group(1)
1593
1594                 try:
1595                         # Process video information
1596                         self._downloader.process_info({
1597                                 'id':           video_id.decode('utf-8'),
1598                                 'url':          video_url.decode('utf-8'),
1599                                 'uploader':     video_uploader.decode('utf-8'),
1600                                 'upload_date':  u'NA',
1601                                 'title':        video_title,
1602                                 'stitle':       simple_title,
1603                                 'ext':          video_extension.decode('utf-8'),
1604                                 'format':       u'NA',
1605                                 'player_url':   None,
1606                         })
1607                 except UnavailableVideoError:
1608                         self._downloader.trouble(u'\nERROR: unable to download video')
1609
1610
1611 class DailymotionIE(InfoExtractor):
1612         """Information Extractor for Dailymotion"""
1613
1614         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1615         IE_NAME = u'dailymotion'
1616
1617         def __init__(self, downloader=None):
1618                 InfoExtractor.__init__(self, downloader)
1619
1620         def report_download_webpage(self, video_id):
1621                 """Report webpage download."""
1622                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1623
1624         def report_extraction(self, video_id):
1625                 """Report information extraction."""
1626                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1627
1628         def _real_extract(self, url):
1629                 # Extract id and simplified title from URL
1630                 mobj = re.match(self._VALID_URL, url)
1631                 if mobj is None:
1632                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1633                         return
1634
1635                 # At this point we have a new video
1636                 self._downloader.increment_downloads()
1637                 video_id = mobj.group(1)
1638
1639                 video_extension = 'flv'
1640
1641                 # Retrieve video webpage to extract further information
1642                 request = urllib2.Request(url)
1643                 request.add_header('Cookie', 'family_filter=off')
1644                 try:
1645                         self.report_download_webpage(video_id)
1646                         webpage = urllib2.urlopen(request).read()
1647                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1648                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1649                         return
1650
1651                 # Extract URL, uploader and title from webpage
1652                 self.report_extraction(video_id)
1653                 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
1654                 if mobj is None:
1655                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1656                         return
1657                 sequence = urllib.unquote(mobj.group(1))
1658                 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1659                 if mobj is None:
1660                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1661                         return
1662                 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
1663
1664                 # if needed add http://www.dailymotion.com/ if relative URL
1665
1666                 video_url = mediaURL
1667
1668                 mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
1669                 if mobj is None:
1670                         self._downloader.trouble(u'ERROR: unable to extract title')
1671                         return
1672                 video_title = _unescapeHTML(mobj.group('title').decode('utf-8'))
1673                 video_title = sanitize_title(video_title)
1674                 simple_title = _simplify_title(video_title)
1675
1676                 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
1677                 if mobj is None:
1678                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1679                         return
1680                 video_uploader = mobj.group(1)
1681
1682                 try:
1683                         # Process video information
1684                         self._downloader.process_info({
1685                                 'id':           video_id.decode('utf-8'),
1686                                 'url':          video_url.decode('utf-8'),
1687                                 'uploader':     video_uploader.decode('utf-8'),
1688                                 'upload_date':  u'NA',
1689                                 'title':        video_title,
1690                                 'stitle':       simple_title,
1691                                 'ext':          video_extension.decode('utf-8'),
1692                                 'format':       u'NA',
1693                                 'player_url':   None,
1694                         })
1695                 except UnavailableVideoError:
1696                         self._downloader.trouble(u'\nERROR: unable to download video')
1697
1698
1699 class GoogleIE(InfoExtractor):
1700         """Information extractor for video.google.com."""
1701
1702         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1703         IE_NAME = u'video.google'
1704
1705         def __init__(self, downloader=None):
1706                 InfoExtractor.__init__(self, downloader)
1707
1708         def report_download_webpage(self, video_id):
1709                 """Report webpage download."""
1710                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1711
1712         def report_extraction(self, video_id):
1713                 """Report information extraction."""
1714                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1715
1716         def _real_extract(self, url):
1717                 # Extract id from URL
1718                 mobj = re.match(self._VALID_URL, url)
1719                 if mobj is None:
1720                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1721                         return
1722
1723                 # At this point we have a new video
1724                 self._downloader.increment_downloads()
1725                 video_id = mobj.group(1)
1726
1727                 video_extension = 'mp4'
1728
1729                 # Retrieve video webpage to extract further information
1730                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1731                 try:
1732                         self.report_download_webpage(video_id)
1733                         webpage = urllib2.urlopen(request).read()
1734                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1735                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1736                         return
1737
1738                 # Extract URL, uploader, and title from webpage
1739                 self.report_extraction(video_id)
1740                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1741                 if mobj is None:
1742                         video_extension = 'flv'
1743                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1744                 if mobj is None:
1745                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1746                         return
1747                 mediaURL = urllib.unquote(mobj.group(1))
1748                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1749                 mediaURL = mediaURL.replace('\\x26', '\x26')
1750
1751                 video_url = mediaURL
1752
1753                 mobj = re.search(r'<title>(.*)</title>', webpage)
1754                 if mobj is None:
1755                         self._downloader.trouble(u'ERROR: unable to extract title')
1756                         return
1757                 video_title = mobj.group(1).decode('utf-8')
1758                 video_title = sanitize_title(video_title)
1759                 simple_title = _simplify_title(video_title)
1760
1761                 # Extract video description
1762                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1763                 if mobj is None:
1764                         self._downloader.trouble(u'ERROR: unable to extract video description')
1765                         return
1766                 video_description = mobj.group(1).decode('utf-8')
1767                 if not video_description:
1768                         video_description = 'No description available.'
1769
1770                 # Extract video thumbnail
1771                 if self._downloader.params.get('forcethumbnail', False):
1772                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1773                         try:
1774                                 webpage = urllib2.urlopen(request).read()
1775                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1776                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1777                                 return
1778                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1779                         if mobj is None:
1780                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1781                                 return
1782                         video_thumbnail = mobj.group(1)
1783                 else:   # we need something to pass to process_info
1784                         video_thumbnail = ''
1785
1786                 try:
1787                         # Process video information
1788                         self._downloader.process_info({
1789                                 'id':           video_id.decode('utf-8'),
1790                                 'url':          video_url.decode('utf-8'),
1791                                 'uploader':     u'NA',
1792                                 'upload_date':  u'NA',
1793                                 'title':        video_title,
1794                                 'stitle':       simple_title,
1795                                 'ext':          video_extension.decode('utf-8'),
1796                                 'format':       u'NA',
1797                                 'player_url':   None,
1798                         })
1799                 except UnavailableVideoError:
1800                         self._downloader.trouble(u'\nERROR: unable to download video')
1801
1802
1803 class PhotobucketIE(InfoExtractor):
1804         """Information extractor for photobucket.com."""
1805
1806         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1807         IE_NAME = u'photobucket'
1808
1809         def __init__(self, downloader=None):
1810                 InfoExtractor.__init__(self, downloader)
1811
1812         def report_download_webpage(self, video_id):
1813                 """Report webpage download."""
1814                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1815
1816         def report_extraction(self, video_id):
1817                 """Report information extraction."""
1818                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1819
1820         def _real_extract(self, url):
1821                 # Extract id from URL
1822                 mobj = re.match(self._VALID_URL, url)
1823                 if mobj is None:
1824                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1825                         return
1826
1827                 # At this point we have a new video
1828                 self._downloader.increment_downloads()
1829                 video_id = mobj.group(1)
1830
1831                 video_extension = 'flv'
1832
1833                 # Retrieve video webpage to extract further information
1834                 request = urllib2.Request(url)
1835                 try:
1836                         self.report_download_webpage(video_id)
1837                         webpage = urllib2.urlopen(request).read()
1838                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1839                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1840                         return
1841
1842                 # Extract URL, uploader, and title from webpage
1843                 self.report_extraction(video_id)
1844                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1845                 if mobj is None:
1846                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1847                         return
1848                 mediaURL = urllib.unquote(mobj.group(1))
1849
1850                 video_url = mediaURL
1851
1852                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1853                 if mobj is None:
1854                         self._downloader.trouble(u'ERROR: unable to extract title')
1855                         return
1856                 video_title = mobj.group(1).decode('utf-8')
1857                 video_title = sanitize_title(video_title)
1858                 simple_title = _simplify_title(vide_title)
1859
1860                 video_uploader = mobj.group(2).decode('utf-8')
1861
1862                 try:
1863                         # Process video information
1864                         self._downloader.process_info({
1865                                 'id':           video_id.decode('utf-8'),
1866                                 'url':          video_url.decode('utf-8'),
1867                                 'uploader':     video_uploader,
1868                                 'upload_date':  u'NA',
1869                                 'title':        video_title,
1870                                 'stitle':       simple_title,
1871                                 'ext':          video_extension.decode('utf-8'),
1872                                 'format':       u'NA',
1873                                 'player_url':   None,
1874                         })
1875                 except UnavailableVideoError:
1876                         self._downloader.trouble(u'\nERROR: unable to download video')
1877
1878
1879 class YahooIE(InfoExtractor):
1880         """Information extractor for video.yahoo.com."""
1881
1882         # _VALID_URL matches all Yahoo! Video URLs
1883         # _VPAGE_URL matches only the extractable '/watch/' URLs
1884         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1885         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1886         IE_NAME = u'video.yahoo'
1887
1888         def __init__(self, downloader=None):
1889                 InfoExtractor.__init__(self, downloader)
1890
1891         def report_download_webpage(self, video_id):
1892                 """Report webpage download."""
1893                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1894
1895         def report_extraction(self, video_id):
1896                 """Report information extraction."""
1897                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1898
1899         def _real_extract(self, url, new_video=True):
1900                 # Extract ID from URL
1901                 mobj = re.match(self._VALID_URL, url)
1902                 if mobj is None:
1903                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1904                         return
1905
1906                 # At this point we have a new video
1907                 self._downloader.increment_downloads()
1908                 video_id = mobj.group(2)
1909                 video_extension = 'flv'
1910
1911                 # Rewrite valid but non-extractable URLs as
1912                 # extractable English language /watch/ URLs
1913                 if re.match(self._VPAGE_URL, url) is None:
1914                         request = urllib2.Request(url)
1915                         try:
1916                                 webpage = urllib2.urlopen(request).read()
1917                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1918                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1919                                 return
1920
1921                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1922                         if mobj is None:
1923                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1924                                 return
1925                         yahoo_id = mobj.group(1)
1926
1927                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1928                         if mobj is None:
1929                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1930                                 return
1931                         yahoo_vid = mobj.group(1)
1932
1933                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1934                         return self._real_extract(url, new_video=False)
1935
1936                 # Retrieve video webpage to extract further information
1937                 request = urllib2.Request(url)
1938                 try:
1939                         self.report_download_webpage(video_id)
1940                         webpage = urllib2.urlopen(request).read()
1941                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1942                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1943                         return
1944
1945                 # Extract uploader and title from webpage
1946                 self.report_extraction(video_id)
1947                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1948                 if mobj is None:
1949                         self._downloader.trouble(u'ERROR: unable to extract video title')
1950                         return
1951                 video_title = mobj.group(1).decode('utf-8')
1952                 simple_title = _simplify_title(video_title)
1953
1954                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1955                 if mobj is None:
1956                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1957                         return
1958                 video_uploader = mobj.group(1).decode('utf-8')
1959
1960                 # Extract video thumbnail
1961                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1962                 if mobj is None:
1963                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1964                         return
1965                 video_thumbnail = mobj.group(1).decode('utf-8')
1966
1967                 # Extract video description
1968                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1969                 if mobj is None:
1970                         self._downloader.trouble(u'ERROR: unable to extract video description')
1971                         return
1972                 video_description = mobj.group(1).decode('utf-8')
1973                 if not video_description:
1974                         video_description = 'No description available.'
1975
1976                 # Extract video height and width
1977                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1978                 if mobj is None:
1979                         self._downloader.trouble(u'ERROR: unable to extract video height')
1980                         return
1981                 yv_video_height = mobj.group(1)
1982
1983                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1984                 if mobj is None:
1985                         self._downloader.trouble(u'ERROR: unable to extract video width')
1986                         return
1987                 yv_video_width = mobj.group(1)
1988
1989                 # Retrieve video playlist to extract media URL
1990                 # I'm not completely sure what all these options are, but we
1991                 # seem to need most of them, otherwise the server sends a 401.
1992                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1993                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1994                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1995                                 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1996                                 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1997                 try:
1998                         self.report_download_webpage(video_id)
1999                         webpage = urllib2.urlopen(request).read()
2000                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2001                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2002                         return
2003
2004                 # Extract media URL from playlist XML
2005                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
2006                 if mobj is None:
2007                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
2008                         return
2009                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
2010                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
2011
2012                 try:
2013                         # Process video information
2014                         self._downloader.process_info({
2015                                 'id':           video_id.decode('utf-8'),
2016                                 'url':          video_url,
2017                                 'uploader':     video_uploader,
2018                                 'upload_date':  u'NA',
2019                                 'title':        video_title,
2020                                 'stitle':       simple_title,
2021                                 'ext':          video_extension.decode('utf-8'),
2022                                 'thumbnail':    video_thumbnail.decode('utf-8'),
2023                                 'description':  video_description,
2024                                 'thumbnail':    video_thumbnail,
2025                                 'player_url':   None,
2026                         })
2027                 except UnavailableVideoError:
2028                         self._downloader.trouble(u'\nERROR: unable to download video')
2029
2030
2031 class VimeoIE(InfoExtractor):
2032         """Information extractor for vimeo.com."""
2033
2034         # _VALID_URL matches Vimeo URLs
2035         _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
2036         IE_NAME = u'vimeo'
2037
2038         def __init__(self, downloader=None):
2039                 InfoExtractor.__init__(self, downloader)
2040
2041         def report_download_webpage(self, video_id):
2042                 """Report webpage download."""
2043                 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
2044
2045         def report_extraction(self, video_id):
2046                 """Report information extraction."""
2047                 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
2048
2049         def _real_extract(self, url, new_video=True):
2050                 # Extract ID from URL
2051                 mobj = re.match(self._VALID_URL, url)
2052                 if mobj is None:
2053                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2054                         return
2055
2056                 # At this point we have a new video
2057                 self._downloader.increment_downloads()
2058                 video_id = mobj.group(1)
2059
2060                 # Retrieve video webpage to extract further information
2061                 request = urllib2.Request(url, None, std_headers)
2062                 try:
2063                         self.report_download_webpage(video_id)
2064                         webpage = urllib2.urlopen(request).read()
2065                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2066                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2067                         return
2068
2069                 # Now we begin extracting as much information as we can from what we
2070                 # retrieved. First we extract the information common to all extractors,
2071                 # and latter we extract those that are Vimeo specific.
2072                 self.report_extraction(video_id)
2073
2074                 # Extract the config JSON
2075                 config = webpage.split(' = {config:')[1].split(',assets:')[0]
2076                 try:
2077                         config = json.loads(config)
2078                 except:
2079                         self._downloader.trouble(u'ERROR: unable to extract info section')
2080                         return
2081
2082                 # Extract title
2083                 video_title = config["video"]["title"]
2084                 simple_title = _simplify_title(video_title)
2085
2086                 # Extract uploader
2087                 video_uploader = config["video"]["owner"]["name"]
2088
2089                 # Extract video thumbnail
2090                 video_thumbnail = config["video"]["thumbnail"]
2091
2092                 # Extract video description
2093                 try:
2094                         lxml.etree
2095                 except NameError:
2096                         video_description = u'No description available.'
2097                         mobj = re.search(r'<meta name="description" content="(.*?)" />', webpage, re.MULTILINE)
2098                         if mobj is not None:
2099                                 video_description = mobj.group(1)
2100                 else:
2101                         html_parser = lxml.etree.HTMLParser()
2102                         vwebpage_doc = lxml.etree.parse(StringIO.StringIO(webpage), html_parser)
2103                         video_description = u''.join(vwebpage_doc.xpath('id("description")//text()')).strip()
2104                         # TODO use another parser
2105
2106                 # Extract upload date
2107                 video_upload_date = u'NA'
2108                 mobj = re.search(r'<span id="clip-date" style="display:none">[^:]*: (.*?)( \([^\(]*\))?</span>', webpage)
2109                 if mobj is not None:
2110                         video_upload_date = mobj.group(1)
2111
2112                 # Vimeo specific: extract request signature and timestamp
2113                 sig = config['request']['signature']
2114                 timestamp = config['request']['timestamp']
2115
2116                 # Vimeo specific: extract video quality information
2117                 # TODO bind to format param
2118                 if 'hd' in config["video"]["files"]["h264"]: quality = 'hd'
2119                 else: quality = 'sd'
2120
2121                 video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=H264&type=moogaloop_local&embed_location=" \
2122                                         %(video_id, sig, timestamp, quality)
2123
2124                 try:
2125                         # Process video information
2126                         self._downloader.process_info({
2127                                 'id':           video_id,
2128                                 'url':          video_url,
2129                                 'uploader':     video_uploader,
2130                                 'upload_date':  video_upload_date,
2131                                 'title':        video_title,
2132                                 'stitle':       simple_title,
2133                                 'ext':          u'mp4',
2134                                 'thumbnail':    video_thumbnail,
2135                                 'description':  video_description,
2136                                 'player_url':   None,
2137                         })
2138                 except UnavailableVideoError:
2139                         self._downloader.trouble(u'ERROR: unable to download video')
2140
2141
2142 class GenericIE(InfoExtractor):
2143         """Generic last-resort information extractor."""
2144
2145         _VALID_URL = r'.*'
2146         IE_NAME = u'generic'
2147
2148         def __init__(self, downloader=None):
2149                 InfoExtractor.__init__(self, downloader)
2150
2151         def report_download_webpage(self, video_id):
2152                 """Report webpage download."""
2153                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2154                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
2155
2156         def report_extraction(self, video_id):
2157                 """Report information extraction."""
2158                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
2159
2160         def _real_extract(self, url):
2161                 # At this point we have a new video
2162                 self._downloader.increment_downloads()
2163
2164                 video_id = url.split('/')[-1]
2165                 request = urllib2.Request(url)
2166                 try:
2167                         self.report_download_webpage(video_id)
2168                         webpage = urllib2.urlopen(request).read()
2169                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2170                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2171                         return
2172                 except ValueError, err:
2173                         # since this is the last-resort InfoExtractor, if
2174                         # this error is thrown, it'll be thrown here
2175                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2176                         return
2177
2178                 self.report_extraction(video_id)
2179                 # Start with something easy: JW Player in SWFObject
2180                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2181                 if mobj is None:
2182                         # Broaden the search a little bit
2183                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2184                 if mobj is None:
2185                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2186                         return
2187
2188                 # It's possible that one of the regexes
2189                 # matched, but returned an empty group:
2190                 if mobj.group(1) is None:
2191                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2192                         return
2193
2194                 video_url = urllib.unquote(mobj.group(1))
2195                 video_id = os.path.basename(video_url)
2196
2197                 # here's a fun little line of code for you:
2198                 video_extension = os.path.splitext(video_id)[1][1:]
2199                 video_id = os.path.splitext(video_id)[0]
2200
2201                 # it's tempting to parse this further, but you would
2202                 # have to take into account all the variations like
2203                 #   Video Title - Site Name
2204                 #   Site Name | Video Title
2205                 #   Video Title - Tagline | Site Name
2206                 # and so on and so forth; it's just not practical
2207                 mobj = re.search(r'<title>(.*)</title>', webpage)
2208                 if mobj is None:
2209                         self._downloader.trouble(u'ERROR: unable to extract title')
2210                         return
2211                 video_title = mobj.group(1).decode('utf-8')
2212                 video_title = sanitize_title(video_title)
2213                 simple_title = _simplify_title(video_title)
2214
2215                 # video uploader is domain name
2216                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2217                 if mobj is None:
2218                         self._downloader.trouble(u'ERROR: unable to extract title')
2219                         return
2220                 video_uploader = mobj.group(1).decode('utf-8')
2221
2222                 try:
2223                         # Process video information
2224                         self._downloader.process_info({
2225                                 'id':           video_id.decode('utf-8'),
2226                                 'url':          video_url.decode('utf-8'),
2227                                 'uploader':     video_uploader,
2228                                 'upload_date':  u'NA',
2229                                 'title':        video_title,
2230                                 'stitle':       simple_title,
2231                                 'ext':          video_extension.decode('utf-8'),
2232                                 'format':       u'NA',
2233                                 'player_url':   None,
2234                         })
2235                 except UnavailableVideoError, err:
2236                         self._downloader.trouble(u'\nERROR: unable to download video')
2237
2238
2239 class YoutubeSearchIE(InfoExtractor):
2240         """Information Extractor for YouTube search queries."""
2241         _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
2242         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2243         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
2244         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2245         _youtube_ie = None
2246         _max_youtube_results = 1000
2247         IE_NAME = u'youtube:search'
2248
2249         def __init__(self, youtube_ie, downloader=None):
2250                 InfoExtractor.__init__(self, downloader)
2251                 self._youtube_ie = youtube_ie
2252
2253         def report_download_page(self, query, pagenum):
2254                 """Report attempt to download playlist page with given number."""
2255                 query = query.decode(preferredencoding())
2256                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
2257
2258         def _real_initialize(self):
2259                 self._youtube_ie.initialize()
2260
2261         def _real_extract(self, query):
2262                 mobj = re.match(self._VALID_URL, query)
2263                 if mobj is None:
2264                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2265                         return
2266
2267                 prefix, query = query.split(':')
2268                 prefix = prefix[8:]
2269                 query = query.encode('utf-8')
2270                 if prefix == '':
2271                         self._download_n_results(query, 1)
2272                         return
2273                 elif prefix == 'all':
2274                         self._download_n_results(query, self._max_youtube_results)
2275                         return
2276                 else:
2277                         try:
2278                                 n = long(prefix)
2279                                 if n <= 0:
2280                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2281                                         return
2282                                 elif n > self._max_youtube_results:
2283                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
2284                                         n = self._max_youtube_results
2285                                 self._download_n_results(query, n)
2286                                 return
2287                         except ValueError: # parsing prefix as integer fails
2288                                 self._download_n_results(query, 1)
2289                                 return
2290
2291         def _download_n_results(self, query, n):
2292                 """Downloads a specified number of results for a query"""
2293
2294                 video_ids = []
2295                 already_seen = set()
2296                 pagenum = 1
2297
2298                 while True:
2299                         self.report_download_page(query, pagenum)
2300                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2301                         request = urllib2.Request(result_url)
2302                         try:
2303                                 page = urllib2.urlopen(request).read()
2304                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2305                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2306                                 return
2307
2308                         # Extract video identifiers
2309                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2310                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2311                                 if video_id not in already_seen:
2312                                         video_ids.append(video_id)
2313                                         already_seen.add(video_id)
2314                                         if len(video_ids) == n:
2315                                                 # Specified n videos reached
2316                                                 for id in video_ids:
2317                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2318                                                 return
2319
2320                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2321                                 for id in video_ids:
2322                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2323                                 return
2324
2325                         pagenum = pagenum + 1
2326
2327
2328 class GoogleSearchIE(InfoExtractor):
2329         """Information Extractor for Google Video search queries."""
2330         _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
2331         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2332         _VIDEO_INDICATOR = r'<a href="http://video\.google\.com/videoplay\?docid=([^"\&]+)'
2333         _MORE_PAGES_INDICATOR = r'class="pn" id="pnnext"'
2334         _google_ie = None
2335         _max_google_results = 1000
2336         IE_NAME = u'video.google:search'
2337
2338         def __init__(self, google_ie, downloader=None):
2339                 InfoExtractor.__init__(self, downloader)
2340                 self._google_ie = google_ie
2341
2342         def report_download_page(self, query, pagenum):
2343                 """Report attempt to download playlist page with given number."""
2344                 query = query.decode(preferredencoding())
2345                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2346
2347         def _real_initialize(self):
2348                 self._google_ie.initialize()
2349
2350         def _real_extract(self, query):
2351                 mobj = re.match(self._VALID_URL, query)
2352                 if mobj is None:
2353                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2354                         return
2355
2356                 prefix, query = query.split(':')
2357                 prefix = prefix[8:]
2358                 query = query.encode('utf-8')
2359                 if prefix == '':
2360                         self._download_n_results(query, 1)
2361                         return
2362                 elif prefix == 'all':
2363                         self._download_n_results(query, self._max_google_results)
2364                         return
2365                 else:
2366                         try:
2367                                 n = long(prefix)
2368                                 if n <= 0:
2369                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2370                                         return
2371                                 elif n > self._max_google_results:
2372                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2373                                         n = self._max_google_results
2374                                 self._download_n_results(query, n)
2375                                 return
2376                         except ValueError: # parsing prefix as integer fails
2377                                 self._download_n_results(query, 1)
2378                                 return
2379
2380         def _download_n_results(self, query, n):
2381                 """Downloads a specified number of results for a query"""
2382
2383                 video_ids = []
2384                 pagenum = 0
2385
2386                 while True:
2387                         self.report_download_page(query, pagenum)
2388                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum*10)
2389                         request = urllib2.Request(result_url)
2390                         try:
2391                                 page = urllib2.urlopen(request).read()
2392                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2393                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2394                                 return
2395
2396                         # Extract video identifiers
2397                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2398                                 video_id = mobj.group(1)
2399                                 if video_id not in video_ids:
2400                                         video_ids.append(video_id)
2401                                         if len(video_ids) == n:
2402                                                 # Specified n videos reached
2403                                                 for id in video_ids:
2404                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2405                                                 return
2406
2407                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2408                                 for id in video_ids:
2409                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2410                                 return
2411
2412                         pagenum = pagenum + 1
2413
2414
2415 class YahooSearchIE(InfoExtractor):
2416         """Information Extractor for Yahoo! Video search queries."""
2417         _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
2418         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2419         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2420         _MORE_PAGES_INDICATOR = r'\s*Next'
2421         _yahoo_ie = None
2422         _max_yahoo_results = 1000
2423         IE_NAME = u'video.yahoo:search'
2424
2425         def __init__(self, yahoo_ie, downloader=None):
2426                 InfoExtractor.__init__(self, downloader)
2427                 self._yahoo_ie = yahoo_ie
2428
2429         def report_download_page(self, query, pagenum):
2430                 """Report attempt to download playlist page with given number."""
2431                 query = query.decode(preferredencoding())
2432                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2433
2434         def _real_initialize(self):
2435                 self._yahoo_ie.initialize()
2436
2437         def _real_extract(self, query):
2438                 mobj = re.match(self._VALID_URL, query)
2439                 if mobj is None:
2440                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2441                         return
2442
2443                 prefix, query = query.split(':')
2444                 prefix = prefix[8:]
2445                 query = query.encode('utf-8')
2446                 if prefix == '':
2447                         self._download_n_results(query, 1)
2448                         return
2449                 elif prefix == 'all':
2450                         self._download_n_results(query, self._max_yahoo_results)
2451                         return
2452                 else:
2453                         try:
2454                                 n = long(prefix)
2455                                 if n <= 0:
2456                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2457                                         return
2458                                 elif n > self._max_yahoo_results:
2459                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2460                                         n = self._max_yahoo_results
2461                                 self._download_n_results(query, n)
2462                                 return
2463                         except ValueError: # parsing prefix as integer fails
2464                                 self._download_n_results(query, 1)
2465                                 return
2466
2467         def _download_n_results(self, query, n):
2468                 """Downloads a specified number of results for a query"""
2469
2470                 video_ids = []
2471                 already_seen = set()
2472                 pagenum = 1
2473
2474                 while True:
2475                         self.report_download_page(query, pagenum)
2476                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2477                         request = urllib2.Request(result_url)
2478                         try:
2479                                 page = urllib2.urlopen(request).read()
2480                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2481                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2482                                 return
2483
2484                         # Extract video identifiers
2485                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2486                                 video_id = mobj.group(1)
2487                                 if video_id not in already_seen:
2488                                         video_ids.append(video_id)
2489                                         already_seen.add(video_id)
2490                                         if len(video_ids) == n:
2491                                                 # Specified n videos reached
2492                                                 for id in video_ids:
2493                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2494                                                 return
2495
2496                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2497                                 for id in video_ids:
2498                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2499                                 return
2500
2501                         pagenum = pagenum + 1
2502
2503
2504 class YoutubePlaylistIE(InfoExtractor):
2505         """Information Extractor for YouTube playlists."""
2506
2507         _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2508         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2509         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2510         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2511         _youtube_ie = None
2512         IE_NAME = u'youtube:playlist'
2513
2514         def __init__(self, youtube_ie, downloader=None):
2515                 InfoExtractor.__init__(self, downloader)
2516                 self._youtube_ie = youtube_ie
2517
2518         def report_download_page(self, playlist_id, pagenum):
2519                 """Report attempt to download playlist page with given number."""
2520                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2521
2522         def _real_initialize(self):
2523                 self._youtube_ie.initialize()
2524
2525         def _real_extract(self, url):
2526                 # Extract playlist id
2527                 mobj = re.match(self._VALID_URL, url)
2528                 if mobj is None:
2529                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2530                         return
2531
2532                 # Single video case
2533                 if mobj.group(3) is not None:
2534                         self._youtube_ie.extract(mobj.group(3))
2535                         return
2536
2537                 # Download playlist pages
2538                 # prefix is 'p' as default for playlists but there are other types that need extra care
2539                 playlist_prefix = mobj.group(1)
2540                 if playlist_prefix == 'a':
2541                         playlist_access = 'artist'
2542                 else:
2543                         playlist_prefix = 'p'
2544                         playlist_access = 'view_play_list'
2545                 playlist_id = mobj.group(2)
2546                 video_ids = []
2547                 pagenum = 1
2548
2549                 while True:
2550                         self.report_download_page(playlist_id, pagenum)
2551                         url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
2552                         request = urllib2.Request(url)
2553                         try:
2554                                 page = urllib2.urlopen(request).read()
2555                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2556                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2557                                 return
2558
2559                         # Extract video identifiers
2560                         ids_in_page = []
2561                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2562                                 if mobj.group(1) not in ids_in_page:
2563                                         ids_in_page.append(mobj.group(1))
2564                         video_ids.extend(ids_in_page)
2565
2566                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2567                                 break
2568                         pagenum = pagenum + 1
2569
2570                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2571                 playlistend = self._downloader.params.get('playlistend', -1)
2572                 video_ids = video_ids[playliststart:playlistend]
2573
2574                 for id in video_ids:
2575                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2576                 return
2577
2578
2579 class YoutubeUserIE(InfoExtractor):
2580         """Information Extractor for YouTube users."""
2581
2582         _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2583         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2584         _GDATA_PAGE_SIZE = 50
2585         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2586         _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
2587         _youtube_ie = None
2588         IE_NAME = u'youtube:user'
2589
2590         def __init__(self, youtube_ie, downloader=None):
2591                 InfoExtractor.__init__(self, downloader)
2592                 self._youtube_ie = youtube_ie
2593
2594         def report_download_page(self, username, start_index):
2595                 """Report attempt to download user page."""
2596                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2597                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2598
2599         def _real_initialize(self):
2600                 self._youtube_ie.initialize()
2601
2602         def _real_extract(self, url):
2603                 # Extract username
2604                 mobj = re.match(self._VALID_URL, url)
2605                 if mobj is None:
2606                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2607                         return
2608
2609                 username = mobj.group(1)
2610
2611                 # Download video ids using YouTube Data API. Result size per
2612                 # query is limited (currently to 50 videos) so we need to query
2613                 # page by page until there are no video ids - it means we got
2614                 # all of them.
2615
2616                 video_ids = []
2617                 pagenum = 0
2618
2619                 while True:
2620                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2621                         self.report_download_page(username, start_index)
2622
2623                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2624
2625                         try:
2626                                 page = urllib2.urlopen(request).read()
2627                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2628                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2629                                 return
2630
2631                         # Extract video identifiers
2632                         ids_in_page = []
2633
2634                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2635                                 if mobj.group(1) not in ids_in_page:
2636                                         ids_in_page.append(mobj.group(1))
2637
2638                         video_ids.extend(ids_in_page)
2639
2640                         # A little optimization - if current page is not
2641                         # "full", ie. does not contain PAGE_SIZE video ids then
2642                         # we can assume that this page is the last one - there
2643                         # are no more ids on further pages - no need to query
2644                         # again.
2645
2646                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2647                                 break
2648
2649                         pagenum += 1
2650
2651                 all_ids_count = len(video_ids)
2652                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2653                 playlistend = self._downloader.params.get('playlistend', -1)
2654
2655                 if playlistend == -1:
2656                         video_ids = video_ids[playliststart:]
2657                 else:
2658                         video_ids = video_ids[playliststart:playlistend]
2659
2660                 self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2661                                 (username, all_ids_count, len(video_ids)))
2662
2663                 for video_id in video_ids:
2664                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2665
2666
2667 class DepositFilesIE(InfoExtractor):
2668         """Information extractor for depositfiles.com"""
2669
2670         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
2671         IE_NAME = u'DepositFiles'
2672
2673         def __init__(self, downloader=None):
2674                 InfoExtractor.__init__(self, downloader)
2675
2676         def report_download_webpage(self, file_id):
2677                 """Report webpage download."""
2678                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2679
2680         def report_extraction(self, file_id):
2681                 """Report information extraction."""
2682                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2683
2684         def _real_extract(self, url):
2685                 # At this point we have a new file
2686                 self._downloader.increment_downloads()
2687
2688                 file_id = url.split('/')[-1]
2689                 # Rebuild url in english locale
2690                 url = 'http://depositfiles.com/en/files/' + file_id
2691
2692                 # Retrieve file webpage with 'Free download' button pressed
2693                 free_download_indication = { 'gateway_result' : '1' }
2694                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2695                 try:
2696                         self.report_download_webpage(file_id)
2697                         webpage = urllib2.urlopen(request).read()
2698                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2699                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2700                         return
2701
2702                 # Search for the real file URL
2703                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2704                 if (mobj is None) or (mobj.group(1) is None):
2705                         # Try to figure out reason of the error.
2706                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2707                         if (mobj is not None) and (mobj.group(1) is not None):
2708                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2709                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2710                         else:
2711                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2712                         return
2713
2714                 file_url = mobj.group(1)
2715                 file_extension = os.path.splitext(file_url)[1][1:]
2716
2717                 # Search for file title
2718                 mobj = re.search(r'<b title="(.*?)">', webpage)
2719                 if mobj is None:
2720                         self._downloader.trouble(u'ERROR: unable to extract title')
2721                         return
2722                 file_title = mobj.group(1).decode('utf-8')
2723
2724                 try:
2725                         # Process file information
2726                         self._downloader.process_info({
2727                                 'id':           file_id.decode('utf-8'),
2728                                 'url':          file_url.decode('utf-8'),
2729                                 'uploader':     u'NA',
2730                                 'upload_date':  u'NA',
2731                                 'title':        file_title,
2732                                 'stitle':       file_title,
2733                                 'ext':          file_extension.decode('utf-8'),
2734                                 'format':       u'NA',
2735                                 'player_url':   None,
2736                         })
2737                 except UnavailableVideoError, err:
2738                         self._downloader.trouble(u'ERROR: unable to download file')
2739
2740
2741 class FacebookIE(InfoExtractor):
2742         """Information Extractor for Facebook"""
2743
2744         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2745         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2746         _NETRC_MACHINE = 'facebook'
2747         _available_formats = ['video', 'highqual', 'lowqual']
2748         _video_extensions = {
2749                 'video': 'mp4',
2750                 'highqual': 'mp4',
2751                 'lowqual': 'mp4',
2752         }
2753         IE_NAME = u'facebook'
2754
2755         def __init__(self, downloader=None):
2756                 InfoExtractor.__init__(self, downloader)
2757
2758         def _reporter(self, message):
2759                 """Add header and report message."""
2760                 self._downloader.to_screen(u'[facebook] %s' % message)
2761
2762         def report_login(self):
2763                 """Report attempt to log in."""
2764                 self._reporter(u'Logging in')
2765
2766         def report_video_webpage_download(self, video_id):
2767                 """Report attempt to download video webpage."""
2768                 self._reporter(u'%s: Downloading video webpage' % video_id)
2769
2770         def report_information_extraction(self, video_id):
2771                 """Report attempt to extract video information."""
2772                 self._reporter(u'%s: Extracting video information' % video_id)
2773
2774         def _parse_page(self, video_webpage):
2775                 """Extract video information from page"""
2776                 # General data
2777                 data = {'title': r'\("video_title", "(.*?)"\)',
2778                         'description': r'<div class="datawrap">(.*?)</div>',
2779                         'owner': r'\("video_owner_name", "(.*?)"\)',
2780                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2781                         }
2782                 video_info = {}
2783                 for piece in data.keys():
2784                         mobj = re.search(data[piece], video_webpage)
2785                         if mobj is not None:
2786                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2787
2788                 # Video urls
2789                 video_urls = {}
2790                 for fmt in self._available_formats:
2791                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2792                         if mobj is not None:
2793                                 # URL is in a Javascript segment inside an escaped Unicode format within
2794                                 # the generally utf-8 page
2795                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2796                 video_info['video_urls'] = video_urls
2797
2798                 return video_info
2799
2800         def _real_initialize(self):
2801                 if self._downloader is None:
2802                         return
2803
2804                 useremail = None
2805                 password = None
2806                 downloader_params = self._downloader.params
2807
2808                 # Attempt to use provided username and password or .netrc data
2809                 if downloader_params.get('username', None) is not None:
2810                         useremail = downloader_params['username']
2811                         password = downloader_params['password']
2812                 elif downloader_params.get('usenetrc', False):
2813                         try:
2814                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2815                                 if info is not None:
2816                                         useremail = info[0]
2817                                         password = info[2]
2818                                 else:
2819                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2820                         except (IOError, netrc.NetrcParseError), err:
2821                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2822                                 return
2823
2824                 if useremail is None:
2825                         return
2826
2827                 # Log in
2828                 login_form = {
2829                         'email': useremail,
2830                         'pass': password,
2831                         'login': 'Log+In'
2832                         }
2833                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2834                 try:
2835                         self.report_login()
2836                         login_results = urllib2.urlopen(request).read()
2837                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2838                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2839                                 return
2840                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2841                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2842                         return
2843
2844         def _real_extract(self, url):
2845                 mobj = re.match(self._VALID_URL, url)
2846                 if mobj is None:
2847                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2848                         return
2849                 video_id = mobj.group('ID')
2850
2851                 # Get video webpage
2852                 self.report_video_webpage_download(video_id)
2853                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2854                 try:
2855                         page = urllib2.urlopen(request)
2856                         video_webpage = page.read()
2857                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2858                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2859                         return
2860
2861                 # Start extracting information
2862                 self.report_information_extraction(video_id)
2863
2864                 # Extract information
2865                 video_info = self._parse_page(video_webpage)
2866
2867                 # uploader
2868                 if 'owner' not in video_info:
2869                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2870                         return
2871                 video_uploader = video_info['owner']
2872
2873                 # title
2874                 if 'title' not in video_info:
2875                         self._downloader.trouble(u'ERROR: unable to extract video title')
2876                         return
2877                 video_title = video_info['title']
2878                 video_title = video_title.decode('utf-8')
2879                 video_title = sanitize_title(video_title)
2880
2881                 simple_title = _simplify_title(video_title)
2882
2883                 # thumbnail image
2884                 if 'thumbnail' not in video_info:
2885                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2886                         video_thumbnail = ''
2887                 else:
2888                         video_thumbnail = video_info['thumbnail']
2889
2890                 # upload date
2891                 upload_date = u'NA'
2892                 if 'upload_date' in video_info:
2893                         upload_time = video_info['upload_date']
2894                         timetuple = email.utils.parsedate_tz(upload_time)
2895                         if timetuple is not None:
2896                                 try:
2897                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2898                                 except:
2899                                         pass
2900
2901                 # description
2902                 video_description = video_info.get('description', 'No description available.')
2903
2904                 url_map = video_info['video_urls']
2905                 if len(url_map.keys()) > 0:
2906                         # Decide which formats to download
2907                         req_format = self._downloader.params.get('format', None)
2908                         format_limit = self._downloader.params.get('format_limit', None)
2909
2910                         if format_limit is not None and format_limit in self._available_formats:
2911                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2912                         else:
2913                                 format_list = self._available_formats
2914                         existing_formats = [x for x in format_list if x in url_map]
2915                         if len(existing_formats) == 0:
2916                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2917                                 return
2918                         if req_format is None:
2919                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2920                         elif req_format == 'worst':
2921                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
2922                         elif req_format == '-1':
2923                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2924                         else:
2925                                 # Specific format
2926                                 if req_format not in url_map:
2927                                         self._downloader.trouble(u'ERROR: requested format not available')
2928                                         return
2929                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2930
2931                 for format_param, video_real_url in video_url_list:
2932
2933                         # At this point we have a new video
2934                         self._downloader.increment_downloads()
2935
2936                         # Extension
2937                         video_extension = self._video_extensions.get(format_param, 'mp4')
2938
2939                         try:
2940                                 # Process video information
2941                                 self._downloader.process_info({
2942                                         'id':           video_id.decode('utf-8'),
2943                                         'url':          video_real_url.decode('utf-8'),
2944                                         'uploader':     video_uploader.decode('utf-8'),
2945                                         'upload_date':  upload_date,
2946                                         'title':        video_title,
2947                                         'stitle':       simple_title,
2948                                         'ext':          video_extension.decode('utf-8'),
2949                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2950                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2951                                         'description':  video_description.decode('utf-8'),
2952                                         'player_url':   None,
2953                                 })
2954                         except UnavailableVideoError, err:
2955                                 self._downloader.trouble(u'\nERROR: unable to download video')
2956
2957 class BlipTVIE(InfoExtractor):
2958         """Information extractor for blip.tv"""
2959
2960         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
2961         _URL_EXT = r'^.*\.([a-z0-9]+)$'
2962         IE_NAME = u'blip.tv'
2963
2964         def report_extraction(self, file_id):
2965                 """Report information extraction."""
2966                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
2967
2968         def report_direct_download(self, title):
2969                 """Report information extraction."""
2970                 self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
2971
2972         def _real_extract(self, url):
2973                 mobj = re.match(self._VALID_URL, url)
2974                 if mobj is None:
2975                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2976                         return
2977
2978                 if '?' in url:
2979                         cchar = '&'
2980                 else:
2981                         cchar = '?'
2982                 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
2983                 request = urllib2.Request(json_url)
2984                 self.report_extraction(mobj.group(1))
2985                 info = None
2986                 try:
2987                         urlh = urllib2.urlopen(request)
2988                         if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
2989                                 basename = url.split('/')[-1]
2990                                 title,ext = os.path.splitext(basename)
2991                                 title = title.decode('UTF-8')
2992                                 ext = ext.replace('.', '')
2993                                 self.report_direct_download(title)
2994                                 info = {
2995                                         'id': title,
2996                                         'url': url,
2997                                         'title': title,
2998                                         'stitle': _simplify_title(title),
2999                                         'ext': ext,
3000                                         'urlhandle': urlh
3001                                 }
3002                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3003                         self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
3004                         return
3005                 if info is None: # Regular URL
3006                         try:
3007                                 json_code = urlh.read()
3008                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3009                                 self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
3010                                 return
3011
3012                         try:
3013                                 json_data = json.loads(json_code)
3014                                 if 'Post' in json_data:
3015                                         data = json_data['Post']
3016                                 else:
3017                                         data = json_data
3018
3019                                 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
3020                                 video_url = data['media']['url']
3021                                 umobj = re.match(self._URL_EXT, video_url)
3022                                 if umobj is None:
3023                                         raise ValueError('Can not determine filename extension')
3024                                 ext = umobj.group(1)
3025
3026                                 info = {
3027                                         'id': data['item_id'],
3028                                         'url': video_url,
3029                                         'uploader': data['display_name'],
3030                                         'upload_date': upload_date,
3031                                         'title': data['title'],
3032                                         'stitle': _simplify_title(data['title']),
3033                                         'ext': ext,
3034                                         'format': data['media']['mimeType'],
3035                                         'thumbnail': data['thumbnailUrl'],
3036                                         'description': data['description'],
3037                                         'player_url': data['embedUrl']
3038                                 }
3039                         except (ValueError,KeyError), err:
3040                                 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
3041                                 return
3042
3043                 self._downloader.increment_downloads()
3044
3045                 try:
3046                         self._downloader.process_info(info)
3047                 except UnavailableVideoError, err:
3048                         self._downloader.trouble(u'\nERROR: unable to download video')
3049
3050
3051 class MyVideoIE(InfoExtractor):
3052         """Information Extractor for myvideo.de."""
3053
3054         _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
3055         IE_NAME = u'myvideo'
3056
3057         def __init__(self, downloader=None):
3058                 InfoExtractor.__init__(self, downloader)
3059
3060         def report_download_webpage(self, video_id):
3061                 """Report webpage download."""
3062                 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
3063
3064         def report_extraction(self, video_id):
3065                 """Report information extraction."""
3066                 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
3067
3068         def _real_extract(self,url):
3069                 mobj = re.match(self._VALID_URL, url)
3070                 if mobj is None:
3071                         self._download.trouble(u'ERROR: invalid URL: %s' % url)
3072                         return
3073
3074                 video_id = mobj.group(1)
3075
3076                 # Get video webpage
3077                 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
3078                 try:
3079                         self.report_download_webpage(video_id)
3080                         webpage = urllib2.urlopen(request).read()
3081                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3082                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3083                         return
3084
3085                 self.report_extraction(video_id)
3086                 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3087                                  webpage)
3088                 if mobj is None:
3089                         self._downloader.trouble(u'ERROR: unable to extract media URL')
3090                         return
3091                 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3092
3093                 mobj = re.search('<title>([^<]+)</title>', webpage)
3094                 if mobj is None:
3095                         self._downloader.trouble(u'ERROR: unable to extract title')
3096                         return
3097
3098                 video_title = mobj.group(1)
3099                 video_title = sanitize_title(video_title)
3100
3101                 simple_title = _simplify_title(video_title)
3102
3103                 try:
3104                         self._downloader.process_info({
3105                                 'id':           video_id,
3106                                 'url':          video_url,
3107                                 'uploader':     u'NA',
3108                                 'upload_date':  u'NA',
3109                                 'title':        video_title,
3110                                 'stitle':       simple_title,
3111                                 'ext':          u'flv',
3112                                 'format':       u'NA',
3113                                 'player_url':   None,
3114                         })
3115                 except UnavailableVideoError:
3116                         self._downloader.trouble(u'\nERROR: Unable to download video')
3117
3118 class ComedyCentralIE(InfoExtractor):
3119         """Information extractor for The Daily Show and Colbert Report """
3120
3121         _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3122         IE_NAME = u'comedycentral'
3123
3124         def report_extraction(self, episode_id):
3125                 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3126
3127         def report_config_download(self, episode_id):
3128                 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3129
3130         def report_index_download(self, episode_id):
3131                 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3132
3133         def report_player_url(self, episode_id):
3134                 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3135
3136         def _real_extract(self, url):
3137                 mobj = re.match(self._VALID_URL, url)
3138                 if mobj is None:
3139                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3140                         return
3141
3142                 if mobj.group('shortname'):
3143                         if mobj.group('shortname') in ('tds', 'thedailyshow'):
3144                                 url = u'http://www.thedailyshow.com/full-episodes/'
3145                         else:
3146                                 url = u'http://www.colbertnation.com/full-episodes/'
3147                         mobj = re.match(self._VALID_URL, url)
3148                         assert mobj is not None
3149
3150                 dlNewest = not mobj.group('episode')
3151                 if dlNewest:
3152                         epTitle = mobj.group('showname')
3153                 else:
3154                         epTitle = mobj.group('episode')
3155
3156                 req = urllib2.Request(url)
3157                 self.report_extraction(epTitle)
3158                 try:
3159                         htmlHandle = urllib2.urlopen(req)
3160                         html = htmlHandle.read()
3161                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3162                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3163                         return
3164                 if dlNewest:
3165                         url = htmlHandle.geturl()
3166                         mobj = re.match(self._VALID_URL, url)
3167                         if mobj is None:
3168                                 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3169                                 return
3170                         if mobj.group('episode') == '':
3171                                 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3172                                 return
3173                         epTitle = mobj.group('episode')
3174
3175                 mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
3176                 if len(mMovieParams) == 0:
3177                         self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3178                         return
3179
3180                 playerUrl_raw = mMovieParams[0][0]
3181                 self.report_player_url(epTitle)
3182                 try:
3183                         urlHandle = urllib2.urlopen(playerUrl_raw)
3184                         playerUrl = urlHandle.geturl()
3185                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3186                         self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3187                         return
3188
3189                 uri = mMovieParams[0][1]
3190                 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3191                 self.report_index_download(epTitle)
3192                 try:
3193                         indexXml = urllib2.urlopen(indexUrl).read()
3194                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3195                         self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
3196                         return
3197
3198                 idoc = xml.etree.ElementTree.fromstring(indexXml)
3199                 itemEls = idoc.findall('.//item')
3200                 for itemEl in itemEls:
3201                         mediaId = itemEl.findall('./guid')[0].text
3202                         shortMediaId = mediaId.split(':')[-1]
3203                         showId = mediaId.split(':')[-2].replace('.com', '')
3204                         officialTitle = itemEl.findall('./title')[0].text
3205                         officialDate = itemEl.findall('./pubDate')[0].text
3206
3207                         configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3208                                                 urllib.urlencode({'uri': mediaId}))
3209                         configReq = urllib2.Request(configUrl)
3210                         self.report_config_download(epTitle)
3211                         try:
3212                                 configXml = urllib2.urlopen(configReq).read()
3213                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3214                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3215                                 return
3216
3217                         cdoc = xml.etree.ElementTree.fromstring(configXml)
3218                         turls = []
3219                         for rendition in cdoc.findall('.//rendition'):
3220                                 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3221                                 turls.append(finfo)
3222
3223                         if len(turls) == 0:
3224                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
3225                                 continue
3226
3227                         # For now, just pick the highest bitrate
3228                         format,video_url = turls[-1]
3229
3230                         self._downloader.increment_downloads()
3231
3232                         effTitle = showId + u'-' + epTitle
3233                         info = {
3234                                 'id': shortMediaId,
3235                                 'url': video_url,
3236                                 'uploader': showId,
3237                                 'upload_date': officialDate,
3238                                 'title': effTitle,
3239                                 'stitle': _simplify_title(effTitle),
3240                                 'ext': 'mp4',
3241                                 'format': format,
3242                                 'thumbnail': None,
3243                                 'description': officialTitle,
3244                                 'player_url': playerUrl
3245                         }
3246
3247                         try:
3248                                 self._downloader.process_info(info)
3249                         except UnavailableVideoError, err:
3250                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
3251                                 continue
3252
3253
3254 class EscapistIE(InfoExtractor):
3255         """Information extractor for The Escapist """
3256
3257         _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
3258         IE_NAME = u'escapist'
3259
3260         def report_extraction(self, showName):
3261                 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
3262
3263         def report_config_download(self, showName):
3264                 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
3265
3266         def _real_extract(self, url):
3267                 htmlParser = HTMLParser.HTMLParser()
3268
3269                 mobj = re.match(self._VALID_URL, url)
3270                 if mobj is None:
3271                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3272                         return
3273                 showName = mobj.group('showname')
3274                 videoId = mobj.group('episode')
3275
3276                 self.report_extraction(showName)
3277                 try:
3278                         webPage = urllib2.urlopen(url).read()
3279                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3280                         self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
3281                         return
3282
3283                 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
3284                 description = htmlParser.unescape(descMatch.group(1))
3285                 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
3286                 imgUrl = htmlParser.unescape(imgMatch.group(1))
3287                 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
3288                 playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
3289                 configUrlMatch = re.search('config=(.*)$', playerUrl)
3290                 configUrl = urllib2.unquote(configUrlMatch.group(1))
3291
3292                 self.report_config_download(showName)
3293                 try:
3294                         configJSON = urllib2.urlopen(configUrl).read()
3295                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3296                         self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
3297                         return
3298
3299                 # Technically, it's JavaScript, not JSON
3300                 configJSON = configJSON.replace("'", '"')
3301
3302                 try:
3303                         config = json.loads(configJSON)
3304                 except (ValueError,), err:
3305                         self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
3306                         return
3307
3308                 playlist = config['playlist']
3309                 videoUrl = playlist[1]['url']
3310
3311                 self._downloader.increment_downloads()
3312                 info = {
3313                         'id': videoId,
3314                         'url': videoUrl,
3315                         'uploader': showName,
3316                         'upload_date': None,
3317                         'title': showName,
3318                         'stitle': _simplify_title(showName),
3319                         'ext': 'flv',
3320                         'format': 'flv',
3321                         'thumbnail': imgUrl,
3322                         'description': description,
3323                         'player_url': playerUrl,
3324                 }
3325
3326                 try:
3327                         self._downloader.process_info(info)
3328                 except UnavailableVideoError, err:
3329                         self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
3330
3331
3332 class CollegeHumorIE(InfoExtractor):
3333         """Information extractor for collegehumor.com"""
3334
3335         _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
3336         IE_NAME = u'collegehumor'
3337
3338         def report_webpage(self, video_id):
3339                 """Report information extraction."""
3340                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3341
3342         def report_extraction(self, video_id):
3343                 """Report information extraction."""
3344                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3345
3346         def _real_extract(self, url):
3347                 htmlParser = HTMLParser.HTMLParser()
3348
3349                 mobj = re.match(self._VALID_URL, url)
3350                 if mobj is None:
3351                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3352                         return
3353                 video_id = mobj.group('videoid')
3354
3355                 self.report_webpage(video_id)
3356                 request = urllib2.Request(url)
3357                 try:
3358                         webpage = urllib2.urlopen(request).read()
3359                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3360                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3361                         return
3362
3363                 m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
3364                 if m is None:
3365                         self._downloader.trouble(u'ERROR: Cannot extract internal video ID')
3366                         return
3367                 internal_video_id = m.group('internalvideoid')
3368
3369                 info = {
3370                         'id': video_id,
3371                         'internal_id': internal_video_id,
3372                 }
3373
3374                 self.report_extraction(video_id)
3375                 xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
3376                 try:
3377                         metaXml = urllib2.urlopen(xmlUrl).read()
3378                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3379                         self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
3380                         return
3381
3382                 mdoc = xml.etree.ElementTree.fromstring(metaXml)
3383                 try:
3384                         videoNode = mdoc.findall('./video')[0]
3385                         info['description'] = videoNode.findall('./description')[0].text
3386                         info['title'] = videoNode.findall('./caption')[0].text
3387                         info['stitle'] = _simplify_title(info['title'])
3388                         info['url'] = videoNode.findall('./file')[0].text
3389                         info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
3390                         info['ext'] = info['url'].rpartition('.')[2]
3391                         info['format'] = info['ext']
3392                 except IndexError:
3393                         self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
3394                         return
3395
3396                 self._downloader.increment_downloads()
3397
3398                 try:
3399                         self._downloader.process_info(info)
3400                 except UnavailableVideoError, err:
3401                         self._downloader.trouble(u'\nERROR: unable to download video')
3402
3403
3404 class XVideosIE(InfoExtractor):
3405         """Information extractor for xvideos.com"""
3406
3407         _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
3408         IE_NAME = u'xvideos'
3409
3410         def report_webpage(self, video_id):
3411                 """Report information extraction."""
3412                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3413
3414         def report_extraction(self, video_id):
3415                 """Report information extraction."""
3416                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3417
3418         def _real_extract(self, url):
3419                 htmlParser = HTMLParser.HTMLParser()
3420
3421                 mobj = re.match(self._VALID_URL, url)
3422                 if mobj is None:
3423                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3424                         return
3425                 video_id = mobj.group(1).decode('utf-8')
3426
3427                 self.report_webpage(video_id)
3428
3429                 request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
3430                 try:
3431                         webpage = urllib2.urlopen(request).read()
3432                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3433                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3434                         return
3435
3436                 self.report_extraction(video_id)
3437
3438
3439                 # Extract video URL
3440                 mobj = re.search(r'flv_url=(.+?)&', webpage)
3441                 if mobj is None:
3442                         self._downloader.trouble(u'ERROR: unable to extract video url')
3443                         return
3444                 video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
3445
3446
3447                 # Extract title
3448                 mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
3449                 if mobj is None:
3450                         self._downloader.trouble(u'ERROR: unable to extract video title')
3451                         return
3452                 video_title = mobj.group(1).decode('utf-8')
3453
3454
3455                 # Extract video thumbnail
3456                 mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage)
3457                 if mobj is None:
3458                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
3459                         return
3460                 video_thumbnail = mobj.group(1).decode('utf-8')
3461
3462
3463
3464                 self._downloader.increment_downloads()
3465                 info = {
3466                         'id': video_id,
3467                         'url': video_url,
3468                         'uploader': None,
3469                         'upload_date': None,
3470                         'title': video_title,
3471                         'stitle': _simplify_title(video_title),
3472                         'ext': 'flv',
3473                         'format': 'flv',
3474                         'thumbnail': video_thumbnail,
3475                         'description': None,
3476                         'player_url': None,
3477                 }
3478
3479                 try:
3480                         self._downloader.process_info(info)
3481                 except UnavailableVideoError, err:
3482                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
3483
3484
3485 class SoundcloudIE(InfoExtractor):
3486         """Information extractor for soundcloud.com
3487            To access the media, the uid of the song and a stream token
3488            must be extracted from the page source and the script must make
3489            a request to media.soundcloud.com/crossdomain.xml. Then
3490            the media can be grabbed by requesting from an url composed
3491            of the stream token and uid
3492          """
3493
3494         _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
3495         IE_NAME = u'soundcloud'
3496
3497         def __init__(self, downloader=None):
3498                 InfoExtractor.__init__(self, downloader)
3499
3500         def report_webpage(self, video_id):
3501                 """Report information extraction."""
3502                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3503
3504         def report_extraction(self, video_id):
3505                 """Report information extraction."""
3506                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3507
3508         def _real_extract(self, url):
3509                 htmlParser = HTMLParser.HTMLParser()
3510
3511                 mobj = re.match(self._VALID_URL, url)
3512                 if mobj is None:
3513                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3514                         return
3515
3516                 # extract uploader (which is in the url)
3517                 uploader = mobj.group(1).decode('utf-8')
3518                 # extract simple title (uploader + slug of song title)
3519                 slug_title =  mobj.group(2).decode('utf-8')
3520                 simple_title = uploader + '-' + slug_title
3521
3522                 self.report_webpage('%s/%s' % (uploader, slug_title))
3523
3524                 request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title))
3525                 try:
3526                         webpage = urllib2.urlopen(request).read()
3527                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3528                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3529                         return
3530
3531                 self.report_extraction('%s/%s' % (uploader, slug_title))
3532
3533                 # extract uid and stream token that soundcloud hands out for access
3534                 mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage)
3535                 if mobj:
3536                         video_id = mobj.group(1)
3537                         stream_token = mobj.group(2)
3538
3539                 # extract unsimplified title
3540                 mobj = re.search('"title":"(.*?)",', webpage)
3541                 if mobj:
3542                         title = mobj.group(1)
3543
3544                 # construct media url (with uid/token)
3545                 mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
3546                 mediaURL = mediaURL % (video_id, stream_token)
3547
3548                 # description
3549                 description = u'No description available'
3550                 mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
3551                 if mobj:
3552                         description = mobj.group(1)
3553
3554                 # upload date
3555                 upload_date = None
3556                 mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
3557                 if mobj:
3558                         try:
3559                                 upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
3560                         except Exception, e:
3561                                 print str(e)
3562
3563                 # for soundcloud, a request to a cross domain is required for cookies
3564                 request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
3565
3566                 try:
3567                         self._downloader.process_info({
3568                                 'id':           video_id.decode('utf-8'),
3569                                 'url':          mediaURL,
3570                                 'uploader':     uploader.decode('utf-8'),
3571                                 'upload_date':  upload_date,
3572                                 'title':        simple_title.decode('utf-8'),
3573                                 'stitle':       simple_title.decode('utf-8'),
3574                                 'ext':          u'mp3',
3575                                 'format':       u'NA',
3576                                 'player_url':   None,
3577                                 'description': description.decode('utf-8')
3578                         })
3579                 except UnavailableVideoError:
3580                         self._downloader.trouble(u'\nERROR: unable to download video')
3581
3582
3583 class InfoQIE(InfoExtractor):
3584         """Information extractor for infoq.com"""
3585
3586         _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
3587         IE_NAME = u'infoq'
3588
3589         def report_webpage(self, video_id):
3590                 """Report information extraction."""
3591                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3592
3593         def report_extraction(self, video_id):
3594                 """Report information extraction."""
3595                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3596
3597         def _real_extract(self, url):
3598                 htmlParser = HTMLParser.HTMLParser()
3599
3600                 mobj = re.match(self._VALID_URL, url)
3601                 if mobj is None:
3602                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3603                         return
3604
3605                 self.report_webpage(url)
3606
3607                 request = urllib2.Request(url)
3608                 try:
3609                         webpage = urllib2.urlopen(request).read()
3610                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3611                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3612                         return
3613
3614                 self.report_extraction(url)
3615
3616
3617                 # Extract video URL
3618                 mobj = re.search(r"jsclassref='([^']*)'", webpage)
3619                 if mobj is None:
3620                         self._downloader.trouble(u'ERROR: unable to extract video url')
3621                         return
3622                 video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64'))
3623
3624
3625                 # Extract title
3626                 mobj = re.search(r'contentTitle = "(.*?)";', webpage)
3627                 if mobj is None:
3628                         self._downloader.trouble(u'ERROR: unable to extract video title')
3629                         return
3630                 video_title = mobj.group(1).decode('utf-8')
3631
3632                 # Extract description
3633                 video_description = u'No description available.'
3634                 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
3635                 if mobj is not None:
3636                         video_description = mobj.group(1).decode('utf-8')
3637
3638                 video_filename = video_url.split('/')[-1]
3639                 video_id, extension = video_filename.split('.')
3640
3641                 self._downloader.increment_downloads()
3642                 info = {
3643                         'id': video_id,
3644                         'url': video_url,
3645                         'uploader': None,
3646                         'upload_date': None,
3647                         'title': video_title,
3648                         'stitle': _simplify_title(video_title),
3649                         'ext': extension,
3650                         'format': extension, # Extension is always(?) mp4, but seems to be flv
3651                         'thumbnail': None,
3652                         'description': video_description,
3653                         'player_url': None,
3654                 }
3655
3656                 try:
3657                         self._downloader.process_info(info)
3658                 except UnavailableVideoError, err:
3659                         self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
3660
3661 class MixcloudIE(InfoExtractor):
3662         """Information extractor for www.mixcloud.com"""
3663         _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
3664         IE_NAME = u'mixcloud'
3665
3666         def __init__(self, downloader=None):
3667                 InfoExtractor.__init__(self, downloader)
3668
3669         def report_download_json(self, file_id):
3670                 """Report JSON download."""
3671                 self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME)
3672
3673         def report_extraction(self, file_id):
3674                 """Report information extraction."""
3675                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
3676
3677         def get_urls(self, jsonData, fmt, bitrate='best'):
3678                 """Get urls from 'audio_formats' section in json"""
3679                 file_url = None
3680                 try:
3681                         bitrate_list = jsonData[fmt]
3682                         if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
3683                                 bitrate = max(bitrate_list) # select highest
3684
3685                         url_list = jsonData[fmt][bitrate]
3686                 except TypeError: # we have no bitrate info.
3687                         url_list = jsonData[fmt]
3688
3689                 return url_list
3690
3691         def check_urls(self, url_list):
3692                 """Returns 1st active url from list"""
3693                 for url in url_list:
3694                         try:
3695                                 urllib2.urlopen(url)
3696                                 return url
3697                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3698                                 url = None
3699
3700                 return None
3701
3702         def _print_formats(self, formats):
3703                 print 'Available formats:'
3704                 for fmt in formats.keys():
3705                         for b in formats[fmt]:
3706                                 try:
3707                                         ext = formats[fmt][b][0]
3708                                         print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
3709                                 except TypeError: # we have no bitrate info
3710                                         ext = formats[fmt][0]
3711                                         print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
3712                                         break
3713
3714         def _real_extract(self, url):
3715                 mobj = re.match(self._VALID_URL, url)
3716                 if mobj is None:
3717                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3718                         return
3719                 # extract uploader & filename from url
3720                 uploader = mobj.group(1).decode('utf-8')
3721                 file_id = uploader + "-" + mobj.group(2).decode('utf-8')
3722
3723                 # construct API request
3724                 file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
3725                 # retrieve .json file with links to files
3726                 request = urllib2.Request(file_url)
3727                 try:
3728                         self.report_download_json(file_url)
3729                         jsonData = urllib2.urlopen(request).read()
3730                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3731                         self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
3732                         return
3733
3734                 # parse JSON
3735                 json_data = json.loads(jsonData)
3736                 player_url = json_data['player_swf_url']
3737                 formats = dict(json_data['audio_formats'])
3738
3739                 req_format = self._downloader.params.get('format', None)
3740                 bitrate = None
3741
3742                 if self._downloader.params.get('listformats', None):
3743                         self._print_formats(formats)
3744                         return
3745
3746                 if req_format is None or req_format == 'best':
3747                         for format_param in formats.keys():
3748                                 url_list = self.get_urls(formats, format_param)
3749                                 # check urls
3750                                 file_url = self.check_urls(url_list)
3751                                 if file_url is not None:
3752                                         break # got it!
3753                 else:
3754                         if req_format not in formats.keys():
3755                                 self._downloader.trouble(u'ERROR: format is not available')
3756                                 return
3757
3758                         url_list = self.get_urls(formats, req_format)
3759                         file_url = self.check_urls(url_list)
3760                         format_param = req_format
3761
3762                 # We have audio
3763                 self._downloader.increment_downloads()
3764                 try:
3765                         # Process file information
3766                         self._downloader.process_info({
3767                                 'id': file_id.decode('utf-8'),
3768                                 'url': file_url.decode('utf-8'),
3769                                 'uploader':     uploader.decode('utf-8'),
3770                                 'upload_date': u'NA',
3771                                 'title': json_data['name'],
3772                                 'stitle': _simplify_title(json_data['name']),
3773                                 'ext': file_url.split('.')[-1].decode('utf-8'),
3774                                 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
3775                                 'thumbnail': json_data['thumbnail_url'],
3776                                 'description': json_data['description'],
3777                                 'player_url': player_url.decode('utf-8'),
3778                         })
3779                 except UnavailableVideoError, err:
3780                         self._downloader.trouble(u'ERROR: unable to download file')
3781
3782 class StanfordOpenClassroomIE(InfoExtractor):
3783         """Information extractor for Stanford's Open ClassRoom"""
3784
3785         _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
3786         IE_NAME = u'stanfordoc'
3787
3788         def report_download_webpage(self, objid):
3789                 """Report information extraction."""
3790                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, objid))
3791
3792         def report_extraction(self, video_id):
3793                 """Report information extraction."""
3794                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3795
3796         def _real_extract(self, url):
3797                 mobj = re.match(self._VALID_URL, url)
3798                 if mobj is None:
3799                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3800                         return
3801
3802                 if mobj.group('course') and mobj.group('video'): # A specific video
3803                         course = mobj.group('course')
3804                         video = mobj.group('video')
3805                         info = {
3806                                 'id': _simplify_title(course + '_' + video),
3807                         }
3808
3809                         self.report_extraction(info['id'])
3810                         baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
3811                         xmlUrl = baseUrl + video + '.xml'
3812                         try:
3813                                 metaXml = urllib2.urlopen(xmlUrl).read()
3814                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3815                                 self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err))
3816                                 return
3817                         mdoc = xml.etree.ElementTree.fromstring(metaXml)
3818                         try:
3819                                 info['title'] = mdoc.findall('./title')[0].text
3820                                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
3821                         except IndexError:
3822                                 self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
3823                                 return
3824                         info['stitle'] = _simplify_title(info['title'])
3825                         info['ext'] = info['url'].rpartition('.')[2]
3826                         info['format'] = info['ext']
3827                         self._downloader.increment_downloads()
3828                         try:
3829                                 self._downloader.process_info(info)
3830                         except UnavailableVideoError, err:
3831                                 self._downloader.trouble(u'\nERROR: unable to download video')
3832                 elif mobj.group('course'): # A course page
3833                         unescapeHTML = HTMLParser.HTMLParser().unescape
3834
3835                         course = mobj.group('course')
3836                         info = {
3837                                 'id': _simplify_title(course),
3838                                 'type': 'playlist',
3839                         }
3840
3841                         self.report_download_webpage(info['id'])
3842                         try:
3843                                 coursepage = urllib2.urlopen(url).read()
3844                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3845                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
3846                                 return
3847
3848                         m = re.search('<h1>([^<]+)</h1>', coursepage)
3849                         if m:
3850                                 info['title'] = unescapeHTML(m.group(1))
3851                         else:
3852                                 info['title'] = info['id']
3853                         info['stitle'] = _simplify_title(info['title'])
3854
3855                         m = re.search('<description>([^<]+)</description>', coursepage)
3856                         if m:
3857                                 info['description'] = unescapeHTML(m.group(1))
3858
3859                         links = _orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
3860                         info['list'] = [
3861                                 {
3862                                         'type': 'reference',
3863                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
3864                                 }
3865                                         for vpage in links]
3866
3867                         for entry in info['list']:
3868                                 assert entry['type'] == 'reference'
3869                                 self.extract(entry['url'])
3870                 else: # Root page
3871                         unescapeHTML = HTMLParser.HTMLParser().unescape
3872
3873                         info = {
3874                                 'id': 'Stanford OpenClassroom',
3875                                 'type': 'playlist',
3876                         }
3877
3878                         self.report_download_webpage(info['id'])
3879                         rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
3880                         try:
3881                                 rootpage = urllib2.urlopen(rootURL).read()
3882                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3883                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
3884                                 return
3885
3886                         info['title'] = info['id']
3887                         info['stitle'] = _simplify_title(info['title'])
3888
3889                         links = _orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
3890                         info['list'] = [
3891                                 {
3892                                         'type': 'reference',
3893                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
3894                                 }
3895                                         for cpage in links]
3896
3897                         for entry in info['list']:
3898                                 assert entry['type'] == 'reference'
3899                                 self.extract(entry['url'])
3900
3901 class MTVIE(InfoExtractor):
3902         """Information extractor for MTV.com"""
3903
3904         _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
3905         IE_NAME = u'mtv'
3906
3907         def report_webpage(self, video_id):
3908                 """Report information extraction."""
3909                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3910
3911         def report_extraction(self, video_id):
3912                 """Report information extraction."""
3913                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3914
3915         def _real_extract(self, url):
3916                 mobj = re.match(self._VALID_URL, url)
3917                 if mobj is None:
3918                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3919                         return
3920                 if not mobj.group('proto'):
3921                         url = 'http://' + url
3922                 video_id = mobj.group('videoid')
3923                 self.report_webpage(video_id)
3924
3925                 request = urllib2.Request(url)
3926                 try:
3927                         webpage = urllib2.urlopen(request).read()
3928                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3929                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3930                         return
3931
3932                 mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
3933                 if mobj is None:
3934                         self._downloader.trouble(u'ERROR: unable to extract song name')
3935                         return
3936                 song_name = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
3937                 mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
3938                 if mobj is None:
3939                         self._downloader.trouble(u'ERROR: unable to extract performer')
3940                         return
3941                 performer = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
3942                 video_title = performer + ' - ' + song_name
3943
3944                 mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
3945                 if mobj is None:
3946                         self._downloader.trouble(u'ERROR: unable to mtvn_uri')
3947                         return
3948                 mtvn_uri = mobj.group(1)
3949
3950                 mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
3951                 if mobj is None:
3952                         self._downloader.trouble(u'ERROR: unable to extract content id')
3953                         return
3954                 content_id = mobj.group(1)
3955
3956                 videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
3957                 self.report_extraction(video_id)
3958                 request = urllib2.Request(videogen_url)
3959                 try:
3960                         metadataXml = urllib2.urlopen(request).read()
3961                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3962                         self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
3963                         return
3964
3965                 mdoc = xml.etree.ElementTree.fromstring(metadataXml)
3966                 renditions = mdoc.findall('.//rendition')
3967
3968                 # For now, always pick the highest quality.
3969                 rendition = renditions[-1]
3970
3971                 try:
3972                         _,_,ext = rendition.attrib['type'].partition('/')
3973                         format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
3974                         video_url = rendition.find('./src').text
3975                 except KeyError:
3976                         self._downloader.trouble('Invalid rendition field.')
3977                         return
3978
3979                 self._downloader.increment_downloads()
3980                 info = {
3981                         'id': video_id,
3982                         'url': video_url,
3983                         'uploader': performer,
3984                         'title': video_title,
3985                         'stitle': _simplify_title(video_title),
3986                         'ext': ext,
3987                         'format': format,
3988                 }
3989
3990                 try:
3991                         self._downloader.process_info(info)
3992                 except UnavailableVideoError, err:
3993                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
3994
3995
3996 class PostProcessor(object):
3997         """Post Processor class.
3998
3999         PostProcessor objects can be added to downloaders with their
4000         add_post_processor() method. When the downloader has finished a
4001         successful download, it will take its internal chain of PostProcessors
4002         and start calling the run() method on each one of them, first with
4003         an initial argument and then with the returned value of the previous
4004         PostProcessor.
4005
4006         The chain will be stopped if one of them ever returns None or the end
4007         of the chain is reached.
4008
4009         PostProcessor objects follow a "mutual registration" process similar
4010         to InfoExtractor objects.
4011         """
4012
4013         _downloader = None
4014
4015         def __init__(self, downloader=None):
4016                 self._downloader = downloader
4017
4018         def set_downloader(self, downloader):
4019                 """Sets the downloader for this PP."""
4020                 self._downloader = downloader
4021
4022         def run(self, information):
4023                 """Run the PostProcessor.
4024
4025                 The "information" argument is a dictionary like the ones
4026                 composed by InfoExtractors. The only difference is that this
4027                 one has an extra field called "filepath" that points to the
4028                 downloaded file.
4029
4030                 When this method returns None, the postprocessing chain is
4031                 stopped. However, this method may return an information
4032                 dictionary that will be passed to the next postprocessing
4033                 object in the chain. It can be the one it received after
4034                 changing some fields.
4035
4036                 In addition, this method may raise a PostProcessingError
4037                 exception that will be taken into account by the downloader
4038                 it was called from.
4039                 """
4040                 return information # by default, do nothing
4041
4042 class AudioConversionError(BaseException):
4043         def __init__(self, message):
4044                 self.message = message
4045
4046 class FFmpegExtractAudioPP(PostProcessor):
4047
4048         def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
4049                 PostProcessor.__init__(self, downloader)
4050                 if preferredcodec is None:
4051                         preferredcodec = 'best'
4052                 self._preferredcodec = preferredcodec
4053                 self._preferredquality = preferredquality
4054                 self._keepvideo = keepvideo
4055
4056         @staticmethod
4057         def get_audio_codec(path):
4058                 try:
4059                         cmd = ['ffprobe', '-show_streams', '--', _encodeFilename(path)]
4060                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
4061                         output = handle.communicate()[0]
4062                         if handle.wait() != 0:
4063                                 return None
4064                 except (IOError, OSError):
4065                         return None
4066                 audio_codec = None
4067                 for line in output.split('\n'):
4068                         if line.startswith('codec_name='):
4069                                 audio_codec = line.split('=')[1].strip()
4070                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
4071                                 return audio_codec
4072                 return None
4073
4074         @staticmethod
4075         def run_ffmpeg(path, out_path, codec, more_opts):
4076                 if codec is None:
4077                         acodec_opts = []
4078                 else:
4079                         acodec_opts = ['-acodec', codec]
4080                 cmd = ['ffmpeg', '-y', '-i', _encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', _encodeFilename(out_path)]
4081                 try:
4082                         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
4083                         stdout,stderr = p.communicate()
4084                 except (IOError, OSError):
4085                         e = sys.exc_info()[1]
4086                         if isinstance(e, OSError) and e.errno == 2:
4087                                 raise AudioConversionError('ffmpeg not found. Please install ffmpeg.')
4088                         else:
4089                                 raise e
4090                 if p.returncode != 0:
4091                         msg = stderr.strip().split('\n')[-1]
4092                         raise AudioConversionError(msg)
4093
4094         def run(self, information):
4095                 path = information['filepath']
4096
4097                 filecodec = self.get_audio_codec(path)
4098                 if filecodec is None:
4099                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
4100                         return None
4101
4102                 more_opts = []
4103                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
4104                         if self._preferredcodec == 'm4a' and filecodec == 'aac':
4105                                 # Lossless, but in another container
4106                                 acodec = 'copy'
4107                                 extension = self._preferredcodec
4108                                 more_opts = ['-absf', 'aac_adtstoasc']
4109                         elif filecodec in ['aac', 'mp3', 'vorbis']:
4110                                 # Lossless if possible
4111                                 acodec = 'copy'
4112                                 extension = filecodec
4113                                 if filecodec == 'aac':
4114                                         more_opts = ['-f', 'adts']
4115                                 if filecodec == 'vorbis':
4116                                         extension = 'ogg'
4117                         else:
4118                                 # MP3 otherwise.
4119                                 acodec = 'libmp3lame'
4120                                 extension = 'mp3'
4121                                 more_opts = []
4122                                 if self._preferredquality is not None:
4123                                         more_opts += ['-ab', self._preferredquality]
4124                 else:
4125                         # We convert the audio (lossy)
4126                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
4127                         extension = self._preferredcodec
4128                         more_opts = []
4129                         if self._preferredquality is not None:
4130                                 more_opts += ['-ab', self._preferredquality]
4131                         if self._preferredcodec == 'aac':
4132                                 more_opts += ['-f', 'adts']
4133                         if self._preferredcodec == 'm4a':
4134                                 more_opts += ['-absf', 'aac_adtstoasc']
4135                         if self._preferredcodec == 'vorbis':
4136                                 extension = 'ogg'
4137                         if self._preferredcodec == 'wav':
4138                                 extension = 'wav'
4139                                 more_opts += ['-f', 'wav']
4140
4141                 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
4142                 new_path = prefix + sep + extension
4143                 self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
4144                 try:
4145                         self.run_ffmpeg(path, new_path, acodec, more_opts)
4146                 except:
4147                         etype,e,tb = sys.exc_info()
4148                         if isinstance(e, AudioConversionError):
4149                                 self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
4150                         else:
4151                                 self._downloader.to_stderr(u'ERROR: error running ffmpeg')
4152                         return None
4153
4154                 # Try to update the date time for extracted audio file.
4155                 if information.get('filetime') is not None:
4156                         try:
4157                                 os.utime(_encodeFilename(new_path), (time.time(), information['filetime']))
4158                         except:
4159                                 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
4160
4161                 if not self._keepvideo:
4162                         try:
4163                                 os.remove(_encodeFilename(path))
4164                         except (IOError, OSError):
4165                                 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
4166                                 return None
4167
4168                 information['filepath'] = new_path
4169                 return information
4170
4171
4172 def updateSelf(downloader, filename):
4173         ''' Update the program file with the latest version from the repository '''
4174         # Note: downloader only used for options
4175         if not os.access(filename, os.W_OK):
4176                 sys.exit('ERROR: no write permissions on %s' % filename)
4177
4178         downloader.to_screen(u'Updating to latest version...')
4179
4180         try:
4181                 try:
4182                         urlh = urllib.urlopen(UPDATE_URL)
4183                         newcontent = urlh.read()
4184
4185                         vmatch = re.search("__version__ = '([^']+)'", newcontent)
4186                         if vmatch is not None and vmatch.group(1) == __version__:
4187                                 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
4188                                 return
4189                 finally:
4190                         urlh.close()
4191         except (IOError, OSError), err:
4192                 sys.exit('ERROR: unable to download latest version')
4193
4194         try:
4195                 outf = open(filename, 'wb')
4196                 try:
4197                         outf.write(newcontent)
4198                 finally:
4199                         outf.close()
4200         except (IOError, OSError), err:
4201                 sys.exit('ERROR: unable to overwrite current version')
4202
4203         downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
4204
4205 def parseOpts():
4206         def _readOptions(filename_bytes):
4207                 try:
4208                         optionf = open(filename_bytes)
4209                 except IOError:
4210                         return [] # silently skip if file is not present
4211                 try:
4212                         res = []
4213                         for l in optionf:
4214                                 res += shlex.split(l, comments=True)
4215                 finally:
4216                         optionf.close()
4217                 return res
4218
4219         def _format_option_string(option):
4220                 ''' ('-o', '--option') -> -o, --format METAVAR'''
4221
4222                 opts = []
4223
4224                 if option._short_opts: opts.append(option._short_opts[0])
4225                 if option._long_opts: opts.append(option._long_opts[0])
4226                 if len(opts) > 1: opts.insert(1, ', ')
4227
4228                 if option.takes_value(): opts.append(' %s' % option.metavar)
4229
4230                 return "".join(opts)
4231
4232         def _find_term_columns():
4233                 columns = os.environ.get('COLUMNS', None)
4234                 if columns:
4235                         return int(columns)
4236
4237                 try:
4238                         sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
4239                         out,err = sp.communicate()
4240                         return int(out.split()[1])
4241                 except:
4242                         pass
4243                 return None
4244
4245         max_width = 80
4246         max_help_position = 80
4247
4248         # No need to wrap help messages if we're on a wide console
4249         columns = _find_term_columns()
4250         if columns: max_width = columns
4251
4252         fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
4253         fmt.format_option_strings = _format_option_string
4254
4255         kw = {
4256                 'version'   : __version__,
4257                 'formatter' : fmt,
4258                 'usage' : '%prog [options] url [url...]',
4259                 'conflict_handler' : 'resolve',
4260         }
4261
4262         parser = optparse.OptionParser(**kw)
4263
4264         # option groups
4265         general        = optparse.OptionGroup(parser, 'General Options')
4266         selection      = optparse.OptionGroup(parser, 'Video Selection')
4267         authentication = optparse.OptionGroup(parser, 'Authentication Options')
4268         video_format   = optparse.OptionGroup(parser, 'Video Format Options')
4269         postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
4270         filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
4271         verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
4272
4273         general.add_option('-h', '--help',
4274                         action='help', help='print this help text and exit')
4275         general.add_option('-v', '--version',
4276                         action='version', help='print program version and exit')
4277         general.add_option('-U', '--update',
4278                         action='store_true', dest='update_self', help='update this program to latest version')
4279         general.add_option('-i', '--ignore-errors',
4280                         action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
4281         general.add_option('-r', '--rate-limit',
4282                         dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
4283         general.add_option('-R', '--retries',
4284                         dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
4285         general.add_option('--dump-user-agent',
4286                         action='store_true', dest='dump_user_agent',
4287                         help='display the current browser identification', default=False)
4288         general.add_option('--list-extractors',
4289                         action='store_true', dest='list_extractors',
4290                         help='List all supported extractors and the URLs they would handle', default=False)
4291
4292         selection.add_option('--playlist-start',
4293                         dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
4294         selection.add_option('--playlist-end',
4295                         dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
4296         selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
4297         selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
4298         selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
4299
4300         authentication.add_option('-u', '--username',
4301                         dest='username', metavar='USERNAME', help='account username')
4302         authentication.add_option('-p', '--password',
4303                         dest='password', metavar='PASSWORD', help='account password')
4304         authentication.add_option('-n', '--netrc',
4305                         action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
4306
4307
4308         video_format.add_option('-f', '--format',
4309                         action='store', dest='format', metavar='FORMAT', help='video format code')
4310         video_format.add_option('--all-formats',
4311                         action='store_const', dest='format', help='download all available video formats', const='all')
4312         video_format.add_option('--prefer-free-formats',
4313                         action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
4314         video_format.add_option('--max-quality',
4315                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
4316         video_format.add_option('-F', '--list-formats',
4317                         action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
4318
4319
4320         verbosity.add_option('-q', '--quiet',
4321                         action='store_true', dest='quiet', help='activates quiet mode', default=False)
4322         verbosity.add_option('-s', '--simulate',
4323                         action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
4324         verbosity.add_option('--skip-download',
4325                         action='store_true', dest='skip_download', help='do not download the video', default=False)
4326         verbosity.add_option('-g', '--get-url',
4327                         action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
4328         verbosity.add_option('-e', '--get-title',
4329                         action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
4330         verbosity.add_option('--get-thumbnail',
4331                         action='store_true', dest='getthumbnail',
4332                         help='simulate, quiet but print thumbnail URL', default=False)
4333         verbosity.add_option('--get-description',
4334                         action='store_true', dest='getdescription',
4335                         help='simulate, quiet but print video description', default=False)
4336         verbosity.add_option('--get-filename',
4337                         action='store_true', dest='getfilename',
4338                         help='simulate, quiet but print output filename', default=False)
4339         verbosity.add_option('--get-format',
4340                         action='store_true', dest='getformat',
4341                         help='simulate, quiet but print output format', default=False)
4342         verbosity.add_option('--no-progress',
4343                         action='store_true', dest='noprogress', help='do not print progress bar', default=False)
4344         verbosity.add_option('--console-title',
4345                         action='store_true', dest='consoletitle',
4346                         help='display progress in console titlebar', default=False)
4347         verbosity.add_option('-v', '--verbose',
4348                         action='store_true', dest='verbose', help='print various debugging information', default=False)
4349
4350
4351         filesystem.add_option('-t', '--title',
4352                         action='store_true', dest='usetitle', help='use title in file name', default=False)
4353         filesystem.add_option('-l', '--literal',
4354                         action='store_true', dest='useliteral', help='use literal title in file name', default=False)
4355         filesystem.add_option('-A', '--auto-number',
4356                         action='store_true', dest='autonumber',
4357                         help='number downloaded files starting from 00000', default=False)
4358         filesystem.add_option('-o', '--output',
4359                         dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
4360         filesystem.add_option('-a', '--batch-file',
4361                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
4362         filesystem.add_option('-w', '--no-overwrites',
4363                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
4364         filesystem.add_option('-c', '--continue',
4365                         action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
4366         filesystem.add_option('--no-continue',
4367                         action='store_false', dest='continue_dl',
4368                         help='do not resume partially downloaded files (restart from beginning)')
4369         filesystem.add_option('--cookies',
4370                         dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
4371         filesystem.add_option('--no-part',
4372                         action='store_true', dest='nopart', help='do not use .part files', default=False)
4373         filesystem.add_option('--no-mtime',
4374                         action='store_false', dest='updatetime',
4375                         help='do not use the Last-modified header to set the file modification time', default=True)
4376         filesystem.add_option('--write-description',
4377                         action='store_true', dest='writedescription',
4378                         help='write video description to a .description file', default=False)
4379         filesystem.add_option('--write-info-json',
4380                         action='store_true', dest='writeinfojson',
4381                         help='write video metadata to a .info.json file', default=False)
4382
4383
4384         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
4385                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
4386         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
4387                         help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
4388         postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
4389                         help='ffmpeg audio bitrate specification, 128k by default')
4390         postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
4391                         help='keeps the video file on disk after the post-processing; the video is erased by default')
4392
4393
4394         parser.add_option_group(general)
4395         parser.add_option_group(selection)
4396         parser.add_option_group(filesystem)
4397         parser.add_option_group(verbosity)
4398         parser.add_option_group(video_format)
4399         parser.add_option_group(authentication)
4400         parser.add_option_group(postproc)
4401
4402         xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
4403         if xdg_config_home:
4404                 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
4405         else:
4406                 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
4407         argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
4408         opts, args = parser.parse_args(argv)
4409
4410         return parser, opts, args
4411
4412 def gen_extractors():
4413         """ Return a list of an instance of every supported extractor.
4414         The order does matter; the first extractor matched is the one handling the URL.
4415         """
4416         youtube_ie = YoutubeIE()
4417         google_ie = GoogleIE()
4418         yahoo_ie = YahooIE()
4419         return [
4420                 YoutubePlaylistIE(youtube_ie),
4421                 YoutubeUserIE(youtube_ie),
4422                 YoutubeSearchIE(youtube_ie),
4423                 youtube_ie,
4424                 MetacafeIE(youtube_ie),
4425                 DailymotionIE(),
4426                 google_ie,
4427                 GoogleSearchIE(google_ie),
4428                 PhotobucketIE(),
4429                 yahoo_ie,
4430                 YahooSearchIE(yahoo_ie),
4431                 DepositFilesIE(),
4432                 FacebookIE(),
4433                 BlipTVIE(),
4434                 VimeoIE(),
4435                 MyVideoIE(),
4436                 ComedyCentralIE(),
4437                 EscapistIE(),
4438                 CollegeHumorIE(),
4439                 XVideosIE(),
4440                 SoundcloudIE(),
4441                 InfoQIE(),
4442                 MixcloudIE(),
4443                 StanfordOpenClassroomIE(),
4444                 MTVIE(),
4445
4446                 GenericIE()
4447         ]
4448
4449 def _real_main():
4450         parser, opts, args = parseOpts()
4451
4452         # Open appropriate CookieJar
4453         if opts.cookiefile is None:
4454                 jar = cookielib.CookieJar()
4455         else:
4456                 try:
4457                         jar = cookielib.MozillaCookieJar(opts.cookiefile)
4458                         if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
4459                                 jar.load()
4460                 except (IOError, OSError), err:
4461                         sys.exit(u'ERROR: unable to open cookie file')
4462
4463         # Dump user agent
4464         if opts.dump_user_agent:
4465                 print std_headers['User-Agent']
4466                 sys.exit(0)
4467
4468         # Batch file verification
4469         batchurls = []
4470         if opts.batchfile is not None:
4471                 try:
4472                         if opts.batchfile == '-':
4473                                 batchfd = sys.stdin
4474                         else:
4475                                 batchfd = open(opts.batchfile, 'r')
4476                         batchurls = batchfd.readlines()
4477                         batchurls = [x.strip() for x in batchurls]
4478                         batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
4479                 except IOError:
4480                         sys.exit(u'ERROR: batch file could not be read')
4481         all_urls = batchurls + args
4482
4483         # General configuration
4484         cookie_processor = urllib2.HTTPCookieProcessor(jar)
4485         proxy_handler = urllib2.ProxyHandler()
4486         opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
4487         urllib2.install_opener(opener)
4488         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
4489
4490         if opts.verbose:
4491                 print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
4492
4493         extractors = gen_extractors()
4494
4495         if opts.list_extractors:
4496                 for ie in extractors:
4497                         print(ie.IE_NAME)
4498                         matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
4499                         all_urls = filter(lambda url: url not in matchedUrls, all_urls)
4500                         for mu in matchedUrls:
4501                                 print(u'  ' + mu)
4502                 sys.exit(0)
4503
4504         # Conflicting, missing and erroneous options
4505         if opts.usenetrc and (opts.username is not None or opts.password is not None):
4506                 parser.error(u'using .netrc conflicts with giving username/password')
4507         if opts.password is not None and opts.username is None:
4508                 parser.error(u'account username missing')
4509         if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
4510                 parser.error(u'using output template conflicts with using title, literal title or auto number')
4511         if opts.usetitle and opts.useliteral:
4512                 parser.error(u'using title conflicts with using literal title')
4513         if opts.username is not None and opts.password is None:
4514                 opts.password = getpass.getpass(u'Type account password and press return:')
4515         if opts.ratelimit is not None:
4516                 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
4517                 if numeric_limit is None:
4518                         parser.error(u'invalid rate limit specified')
4519                 opts.ratelimit = numeric_limit
4520         if opts.retries is not None:
4521                 try:
4522                         opts.retries = long(opts.retries)
4523                 except (TypeError, ValueError), err:
4524                         parser.error(u'invalid retry count specified')
4525         try:
4526                 opts.playliststart = int(opts.playliststart)
4527                 if opts.playliststart <= 0:
4528                         raise ValueError(u'Playlist start must be positive')
4529         except (TypeError, ValueError), err:
4530                 parser.error(u'invalid playlist start number specified')
4531         try:
4532                 opts.playlistend = int(opts.playlistend)
4533                 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
4534                         raise ValueError(u'Playlist end must be greater than playlist start')
4535         except (TypeError, ValueError), err:
4536                 parser.error(u'invalid playlist end number specified')
4537         if opts.extractaudio:
4538                 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
4539                         parser.error(u'invalid audio format specified')
4540
4541         # File downloader
4542         fd = FileDownloader({
4543                 'usenetrc': opts.usenetrc,
4544                 'username': opts.username,
4545                 'password': opts.password,
4546                 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
4547                 'forceurl': opts.geturl,
4548                 'forcetitle': opts.gettitle,
4549                 'forcethumbnail': opts.getthumbnail,
4550                 'forcedescription': opts.getdescription,
4551                 'forcefilename': opts.getfilename,
4552                 'forceformat': opts.getformat,
4553                 'simulate': opts.simulate,
4554                 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
4555                 'format': opts.format,
4556                 'format_limit': opts.format_limit,
4557                 'listformats': opts.listformats,
4558                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
4559                         or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
4560                         or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
4561                         or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
4562                         or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
4563                         or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
4564                         or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
4565                         or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
4566                         or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
4567                         or u'%(id)s.%(ext)s'),
4568                 'ignoreerrors': opts.ignoreerrors,
4569                 'ratelimit': opts.ratelimit,
4570                 'nooverwrites': opts.nooverwrites,
4571                 'retries': opts.retries,
4572                 'continuedl': opts.continue_dl,
4573                 'noprogress': opts.noprogress,
4574                 'playliststart': opts.playliststart,
4575                 'playlistend': opts.playlistend,
4576                 'logtostderr': opts.outtmpl == '-',
4577                 'consoletitle': opts.consoletitle,
4578                 'nopart': opts.nopart,
4579                 'updatetime': opts.updatetime,
4580                 'writedescription': opts.writedescription,
4581                 'writeinfojson': opts.writeinfojson,
4582                 'matchtitle': opts.matchtitle,
4583                 'rejecttitle': opts.rejecttitle,
4584                 'max_downloads': opts.max_downloads,
4585                 'prefer_free_formats': opts.prefer_free_formats,
4586                 'verbose': opts.verbose,
4587                 })
4588         for extractor in extractors:
4589                 fd.add_info_extractor(extractor)
4590
4591         # PostProcessors
4592         if opts.extractaudio:
4593                 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
4594
4595         # Update version
4596         if opts.update_self:
4597                 updateSelf(fd, sys.argv[0])
4598
4599         # Maybe do nothing
4600         if len(all_urls) < 1:
4601                 if not opts.update_self:
4602                         parser.error(u'you must provide at least one URL')
4603                 else:
4604                         sys.exit()
4605
4606         try:
4607                 retcode = fd.download(all_urls)
4608         except MaxDownloadsReached:
4609                 fd.to_screen(u'--max-download limit reached, aborting.')
4610                 retcode = 101
4611
4612         # Dump cookie jar if requested
4613         if opts.cookiefile is not None:
4614                 try:
4615                         jar.save()
4616                 except (IOError, OSError), err:
4617                         sys.exit(u'ERROR: unable to save cookie jar')
4618
4619         sys.exit(retcode)
4620
4621 def main():
4622         try:
4623                 _real_main()
4624         except DownloadError:
4625                 sys.exit(1)
4626         except SameFileError:
4627                 sys.exit(u'ERROR: fixed output name but more than one file to download')
4628         except KeyboardInterrupt:
4629                 sys.exit(u'\nERROR: Interrupted by user')
4630
4631 if __name__ == '__main__':
4632         main()
4633
4634 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: