_ Git - youtube-dl/blob - youtube_dl/__init__.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 __authors__  = (
   5         'Ricardo Garcia Gonzalez',
   6         'Danny Colligan',
   7         'Benjamin Johnson',
   8         'Vasyl\' Vavrychuk',
   9         'Witold Baryluk',
  10         'Paweł Paprota',
  11         'Gergely Imreh',
  12         'Rogério Brito',
  13         'Philipp Hagemeister',
  14         'Sören Schulze',
  15         'Kevin Ngo',
  16         'Ori Avtalion',
  17         'shizeeg',
  18         )
  19
  20 __license__ = 'Public Domain'
  21 __version__ = '2012.01.08b'
  22
  23 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
  24
  25 import cookielib
  26 import datetime
  27 import gzip
  28 import htmlentitydefs
  29 import HTMLParser
  30 import httplib
  31 import locale
  32 import math
  33 import netrc
  34 import os
  35 import os.path
  36 import re
  37 import socket
  38 import string
  39 import subprocess
  40 import sys
  41 import time
  42 import urllib
  43 import urllib2
  44 import warnings
  45 import zlib
  46
  47 if os.name == 'nt':
  48         import ctypes
  49
  50 try:
  51         import email.utils
  52 except ImportError: # Python 2.4
  53         import email.Utils
  54 try:
  55         import cStringIO as StringIO
  56 except ImportError:
  57         import StringIO
  58
  59 # parse_qs was moved from the cgi module to the urlparse module recently.
  60 try:
  61         from urlparse import parse_qs
  62 except ImportError:
  63         from cgi import parse_qs
  64
  65 try:
  66         import lxml.etree
  67 except ImportError:
  68         pass # Handled below
  69
  70 try:
  71         import xml.etree.ElementTree
  72 except ImportError: # Python<2.5: Not officially supported, but let it slip
  73         warnings.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.')
  74
  75 std_headers = {
  76         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1',
  77         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  78         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  79         'Accept-Encoding': 'gzip, deflate',
  80         'Accept-Language': 'en-us,en;q=0.5',
  81 }
  82
  83 try:
  84         import json
  85 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson):
  86         import re
  87         class json(object):
  88                 @staticmethod
  89                 def loads(s):
  90                         s = s.decode('UTF-8')
  91                         def raiseError(msg, i):
  92                                 raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]))
  93                         def skipSpace(i, expectMore=True):
  94                                 while i < len(s) and s[i] in ' \t\r\n':
  95                                         i += 1
  96                                 if expectMore:
  97                                         if i >= len(s):
  98                                                 raiseError('Premature end', i)
  99                                 return i
 100                         def decodeEscape(match):
 101                                 esc = match.group(1)
 102                                 _STATIC = {
 103                                         '"': '"',
 104                                         '\\': '\\',
 105                                         '/': '/',
 106                                         'b': unichr(0x8),
 107                                         'f': unichr(0xc),
 108                                         'n': '\n',
 109                                         'r': '\r',
 110                                         't': '\t',
 111                                 }
 112                                 if esc in _STATIC:
 113                                         return _STATIC[esc]
 114                                 if esc[0] == 'u':
 115                                         if len(esc) == 1+4:
 116                                                 return unichr(int(esc[1:5], 16))
 117                                         if len(esc) == 5+6 and esc[5:7] == '\\u':
 118                                                 hi = int(esc[1:5], 16)
 119                                                 low = int(esc[7:11], 16)
 120                                                 return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000)
 121                                 raise ValueError('Unknown escape ' + str(esc))
 122                         def parseString(i):
 123                                 i += 1
 124                                 e = i
 125                                 while True:
 126                                         e = s.index('"', e)
 127                                         bslashes = 0
 128                                         while s[e-bslashes-1] == '\\':
 129                                                 bslashes += 1
 130                                         if bslashes % 2 == 1:
 131                                                 e += 1
 132                                                 continue
 133                                         break
 134                                 rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)')
 135                                 stri = rexp.sub(decodeEscape, s[i:e])
 136                                 return (e+1,stri)
 137                         def parseObj(i):
 138                                 i += 1
 139                                 res = {}
 140                                 i = skipSpace(i)
 141                                 if s[i] == '}': # Empty dictionary
 142                                         return (i+1,res)
 143                                 while True:
 144                                         if s[i] != '"':
 145                                                 raiseError('Expected a string object key', i)
 146                                         i,key = parseString(i)
 147                                         i = skipSpace(i)
 148                                         if i >= len(s) or s[i] != ':':
 149                                                 raiseError('Expected a colon', i)
 150                                         i,val = parse(i+1)
 151                                         res[key] = val
 152                                         i = skipSpace(i)
 153                                         if s[i] == '}':
 154                                                 return (i+1, res)
 155                                         if s[i] != ',':
 156                                                 raiseError('Expected comma or closing curly brace', i)
 157                                         i = skipSpace(i+1)
 158                         def parseArray(i):
 159                                 res = []
 160                                 i = skipSpace(i+1)
 161                                 if s[i] == ']': # Empty array
 162                                         return (i+1,res)
 163                                 while True:
 164                                         i,val = parse(i)
 165                                         res.append(val)
 166                                         i = skipSpace(i) # Raise exception if premature end
 167                                         if s[i] == ']':
 168                                                 return (i+1, res)
 169                                         if s[i] != ',':
 170                                                 raiseError('Expected a comma or closing bracket', i)
 171                                         i = skipSpace(i+1)
 172                         def parseDiscrete(i):
 173                                 for k,v in {'true': True, 'false': False, 'null': None}.items():
 174                                         if s.startswith(k, i):
 175                                                 return (i+len(k), v)
 176                                 raiseError('Not a boolean (or null)', i)
 177                         def parseNumber(i):
 178                                 mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:])
 179                                 if mobj is None:
 180                                         raiseError('Not a number', i)
 181                                 nums = mobj.group(1)
 182                                 if '.' in nums or 'e' in nums or 'E' in nums:
 183                                         return (i+len(nums), float(nums))
 184                                 return (i+len(nums), int(nums))
 185                         CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete}
 186                         def parse(i):
 187                                 i = skipSpace(i)
 188                                 i,res = CHARMAP.get(s[i], parseNumber)(i)
 189                                 i = skipSpace(i, False)
 190                                 return (i,res)
 191                         i,res = parse(0)
 192                         if i < len(s):
 193                                 raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')')
 194                         return res
 195
 196 def preferredencoding():
 197         """Get preferred encoding.
 198
 199         Returns the best encoding scheme for the system, based on
 200         locale.getpreferredencoding() and some further tweaks.
 201         """
 202         def yield_preferredencoding():
 203                 try:
 204                         pref = locale.getpreferredencoding()
 205                         u'TEST'.encode(pref)
 206                 except:
 207                         pref = 'UTF-8'
 208                 while True:
 209                         yield pref
 210         return yield_preferredencoding().next()
 211
 212
 213 def htmlentity_transform(matchobj):
 214         """Transforms an HTML entity to a Unicode character.
 215
 216         This function receives a match object and is intended to be used with
 217         the re.sub() function.
 218         """
 219         entity = matchobj.group(1)
 220
 221         # Known non-numeric HTML entity
 222         if entity in htmlentitydefs.name2codepoint:
 223                 return unichr(htmlentitydefs.name2codepoint[entity])
 224
 225         # Unicode character
 226         mobj = re.match(ur'(?u)#(x?\d+)', entity)
 227         if mobj is not None:
 228                 numstr = mobj.group(1)
 229                 if numstr.startswith(u'x'):
 230                         base = 16
 231                         numstr = u'0%s' % numstr
 232                 else:
 233                         base = 10
 234                 return unichr(long(numstr, base))
 235
 236         # Unknown entity in name, return its literal representation
 237         return (u'&%s;' % entity)
 238
 239
 240 def sanitize_title(utitle):
 241         """Sanitizes a video title so it could be used as part of a filename."""
 242         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
 243         return utitle.replace(unicode(os.sep), u'%')
 244
 245
 246 def sanitize_open(filename, open_mode):
 247         """Try to open the given filename, and slightly tweak it if this fails.
 248
 249         Attempts to open the given filename. If this fails, it tries to change
 250         the filename slightly, step by step, until it's either able to open it
 251         or it fails and raises a final exception, like the standard open()
 252         function.
 253
 254         It returns the tuple (stream, definitive_file_name).
 255         """
 256         try:
 257                 if filename == u'-':
 258                         if sys.platform == 'win32':
 259                                 import msvcrt
 260                                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 261                         return (sys.stdout, filename)
 262                 stream = open(_encodeFilename(filename), open_mode)
 263                 return (stream, filename)
 264         except (IOError, OSError), err:
 265                 # In case of error, try to remove win32 forbidden chars
 266                 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
 267
 268                 # An exception here should be caught in the caller
 269                 stream = open(_encodeFilename(filename), open_mode)
 270                 return (stream, filename)
 271
 272
 273 def timeconvert(timestr):
 274         """Convert RFC 2822 defined time string into system timestamp"""
 275         timestamp = None
 276         timetuple = email.utils.parsedate_tz(timestr)
 277         if timetuple is not None:
 278                 timestamp = email.utils.mktime_tz(timetuple)
 279         return timestamp
 280
 281 def _simplify_title(title):
 282         expr = re.compile(ur'[^\w\d_\-]+', flags=re.UNICODE)
 283         return expr.sub(u'_', title).strip(u'_')
 284
 285 def _orderedSet(iterable):
 286         """ Remove all duplicates from the input iterable """
 287         res = []
 288         for el in iterable:
 289                 if el not in res:
 290                         res.append(el)
 291         return res
 292
 293 def _unescapeHTML(s):
 294         """
 295         @param s a string (of type unicode)
 296         """
 297         assert type(s) == type(u'')
 298
 299         htmlParser = HTMLParser.HTMLParser()
 300         return htmlParser.unescape(s)
 301
 302 def _encodeFilename(s):
 303         """
 304         @param s The name of the file (of type unicode)
 305         """
 306
 307         assert type(s) == type(u'')
 308
 309         if sys.platform == 'win32' and sys.getwindowsversion().major >= 5:
 310                 # Pass u'' directly to use Unicode APIs on Windows 2000 and up
 311                 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 312                 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 313                 return s
 314         else:
 315                 return s.encode(sys.getfilesystemencoding(), 'ignore')
 316
 317 class DownloadError(Exception):
 318         """Download Error exception.
 319
 320         This exception may be thrown by FileDownloader objects if they are not
 321         configured to continue on errors. They will contain the appropriate
 322         error message.
 323         """
 324         pass
 325
 326
 327 class SameFileError(Exception):
 328         """Same File exception.
 329
 330         This exception will be thrown by FileDownloader objects if they detect
 331         multiple files would have to be downloaded to the same file on disk.
 332         """
 333         pass
 334
 335
 336 class PostProcessingError(Exception):
 337         """Post Processing exception.
 338
 339         This exception may be raised by PostProcessor's .run() method to
 340         indicate an error in the postprocessing task.
 341         """
 342         pass
 343
 344 class MaxDownloadsReached(Exception):
 345         """ --max-downloads limit has been reached. """
 346         pass
 347
 348
 349 class UnavailableVideoError(Exception):
 350         """Unavailable Format exception.
 351
 352         This exception will be thrown when a video is requested
 353         in a format that is not available for that video.
 354         """
 355         pass
 356
 357
 358 class ContentTooShortError(Exception):
 359         """Content Too Short exception.
 360
 361         This exception may be raised by FileDownloader objects when a file they
 362         download is too small for what the server announced first, indicating
 363         the connection was probably interrupted.
 364         """
 365         # Both in bytes
 366         downloaded = None
 367         expected = None
 368
 369         def __init__(self, downloaded, expected):
 370                 self.downloaded = downloaded
 371                 self.expected = expected
 372
 373
 374 class YoutubeDLHandler(urllib2.HTTPHandler):
 375         """Handler for HTTP requests and responses.
 376
 377         This class, when installed with an OpenerDirector, automatically adds
 378         the standard headers to every HTTP request and handles gzipped and
 379         deflated responses from web servers. If compression is to be avoided in
 380         a particular request, the original request in the program code only has
 381         to include the HTTP header "Youtubedl-No-Compression", which will be
 382         removed before making the real request.
 383
 384         Part of this code was copied from:
 385
 386         http://techknack.net/python-urllib2-handlers/
 387
 388         Andrew Rowls, the author of that code, agreed to release it to the
 389         public domain.
 390         """
 391
 392         @staticmethod
 393         def deflate(data):
 394                 try:
 395                         return zlib.decompress(data, -zlib.MAX_WBITS)
 396                 except zlib.error:
 397                         return zlib.decompress(data)
 398
 399         @staticmethod
 400         def addinfourl_wrapper(stream, headers, url, code):
 401                 if hasattr(urllib2.addinfourl, 'getcode'):
 402                         return urllib2.addinfourl(stream, headers, url, code)
 403                 ret = urllib2.addinfourl(stream, headers, url)
 404                 ret.code = code
 405                 return ret
 406
 407         def http_request(self, req):
 408                 for h in std_headers:
 409                         if h in req.headers:
 410                                 del req.headers[h]
 411                         req.add_header(h, std_headers[h])
 412                 if 'Youtubedl-no-compression' in req.headers:
 413                         if 'Accept-encoding' in req.headers:
 414                                 del req.headers['Accept-encoding']
 415                         del req.headers['Youtubedl-no-compression']
 416                 return req
 417
 418         def http_response(self, req, resp):
 419                 old_resp = resp
 420                 # gzip
 421                 if resp.headers.get('Content-encoding', '') == 'gzip':
 422                         gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
 423                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 424                         resp.msg = old_resp.msg
 425                 # deflate
 426                 if resp.headers.get('Content-encoding', '') == 'deflate':
 427                         gz = StringIO.StringIO(self.deflate(resp.read()))
 428                         resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
 429                         resp.msg = old_resp.msg
 430                 return resp
 431
 432
 433 class FileDownloader(object):
 434         """File Downloader class.
 435
 436         File downloader objects are the ones responsible of downloading the
 437         actual video file and writing it to disk if the user has requested
 438         it, among some other tasks. In most cases there should be one per
 439         program. As, given a video URL, the downloader doesn't know how to
 440         extract all the needed information, task that InfoExtractors do, it
 441         has to pass the URL to one of them.
 442
 443         For this, file downloader objects have a method that allows
 444         InfoExtractors to be registered in a given order. When it is passed
 445         a URL, the file downloader handles it to the first InfoExtractor it
 446         finds that reports being able to handle it. The InfoExtractor extracts
 447         all the information about the video or videos the URL refers to, and
 448         asks the FileDownloader to process the video information, possibly
 449         downloading the video.
 450
 451         File downloaders accept a lot of parameters. In order not to saturate
 452         the object constructor with arguments, it receives a dictionary of
 453         options instead. These options are available through the params
 454         attribute for the InfoExtractors to use. The FileDownloader also
 455         registers itself as the downloader in charge for the InfoExtractors
 456         that are added to it, so this is a "mutual registration".
 457
 458         Available options:
 459
 460         username:         Username for authentication purposes.
 461         password:         Password for authentication purposes.
 462         usenetrc:         Use netrc for authentication instead.
 463         quiet:            Do not print messages to stdout.
 464         forceurl:         Force printing final URL.
 465         forcetitle:       Force printing title.
 466         forcethumbnail:   Force printing thumbnail URL.
 467         forcedescription: Force printing description.
 468         forcefilename:    Force printing final filename.
 469         simulate:         Do not download the video files.
 470         format:           Video format code.
 471         format_limit:     Highest quality format to try.
 472         outtmpl:          Template for output names.
 473         ignoreerrors:     Do not stop on download errors.
 474         ratelimit:        Download speed limit, in bytes/sec.
 475         nooverwrites:     Prevent overwriting files.
 476         retries:          Number of times to retry for HTTP error 5xx
 477         continuedl:       Try to continue downloads if possible.
 478         noprogress:       Do not print the progress bar.
 479         playliststart:    Playlist item to start at.
 480         playlistend:      Playlist item to end at.
 481         matchtitle:       Download only matching titles.
 482         rejecttitle:      Reject downloads for matching titles.
 483         logtostderr:      Log messages to stderr instead of stdout.
 484         consoletitle:     Display progress in console window's titlebar.
 485         nopart:           Do not use temporary .part files.
 486         updatetime:       Use the Last-modified header to set output file timestamps.
 487         writedescription: Write the video description to a .description file
 488         writeinfojson:    Write the video description to a .info.json file
 489         """
 490
 491         params = None
 492         _ies = []
 493         _pps = []
 494         _download_retcode = None
 495         _num_downloads = None
 496         _screen_file = None
 497
 498         def __init__(self, params):
 499                 """Create a FileDownloader object with the given options."""
 500                 self._ies = []
 501                 self._pps = []
 502                 self._download_retcode = 0
 503                 self._num_downloads = 0
 504                 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
 505                 self.params = params
 506
 507         @staticmethod
 508         def format_bytes(bytes):
 509                 if bytes is None:
 510                         return 'N/A'
 511                 if type(bytes) is str:
 512                         bytes = float(bytes)
 513                 if bytes == 0.0:
 514                         exponent = 0
 515                 else:
 516                         exponent = long(math.log(bytes, 1024.0))
 517                 suffix = 'bkMGTPEZY'[exponent]
 518                 converted = float(bytes) / float(1024 ** exponent)
 519                 return '%.2f%s' % (converted, suffix)
 520
 521         @staticmethod
 522         def calc_percent(byte_counter, data_len):
 523                 if data_len is None:
 524                         return '---.-%'
 525                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 526
 527         @staticmethod
 528         def calc_eta(start, now, total, current):
 529                 if total is None:
 530                         return '--:--'
 531                 dif = now - start
 532                 if current == 0 or dif < 0.001: # One millisecond
 533                         return '--:--'
 534                 rate = float(current) / dif
 535                 eta = long((float(total) - float(current)) / rate)
 536                 (eta_mins, eta_secs) = divmod(eta, 60)
 537                 if eta_mins > 99:
 538                         return '--:--'
 539                 return '%02d:%02d' % (eta_mins, eta_secs)
 540
 541         @staticmethod
 542         def calc_speed(start, now, bytes):
 543                 dif = now - start
 544                 if bytes == 0 or dif < 0.001: # One millisecond
 545                         return '%10s' % '---b/s'
 546                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 547
 548         @staticmethod
 549         def best_block_size(elapsed_time, bytes):
 550                 new_min = max(bytes / 2.0, 1.0)
 551                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 552                 if elapsed_time < 0.001:
 553                         return long(new_max)
 554                 rate = bytes / elapsed_time
 555                 if rate > new_max:
 556                         return long(new_max)
 557                 if rate < new_min:
 558                         return long(new_min)
 559                 return long(rate)
 560
 561         @staticmethod
 562         def parse_bytes(bytestr):
 563                 """Parse a string indicating a byte quantity into a long integer."""
 564                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 565                 if matchobj is None:
 566                         return None
 567                 number = float(matchobj.group(1))
 568                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 569                 return long(round(number * multiplier))
 570
 571         def add_info_extractor(self, ie):
 572                 """Add an InfoExtractor object to the end of the list."""
 573                 self._ies.append(ie)
 574                 ie.set_downloader(self)
 575
 576         def add_post_processor(self, pp):
 577                 """Add a PostProcessor object to the end of the chain."""
 578                 self._pps.append(pp)
 579                 pp.set_downloader(self)
 580
 581         def to_screen(self, message, skip_eol=False):
 582                 """Print message to stdout if not in quiet mode."""
 583                 assert type(message) == type(u'')
 584                 if not self.params.get('quiet', False):
 585                         terminator = [u'\n', u''][skip_eol]
 586                         output = message + terminator
 587
 588                         if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 589                                 output = output.encode(preferredencoding(), 'ignore')
 590                         self._screen_file.write(output)
 591                         self._screen_file.flush()
 592
 593         def to_stderr(self, message):
 594                 """Print message to stderr."""
 595                 print >>sys.stderr, message.encode(preferredencoding())
 596
 597         def to_cons_title(self, message):
 598                 """Set console/terminal window title to message."""
 599                 if not self.params.get('consoletitle', False):
 600                         return
 601                 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 602                         # c_wchar_p() might not be necessary if `message` is
 603                         # already of type unicode()
 604                         ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 605                 elif 'TERM' in os.environ:
 606                         sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 607
 608         def fixed_template(self):
 609                 """Checks if the output template is fixed."""
 610                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 611
 612         def trouble(self, message=None):
 613                 """Determine action to take when a download problem appears.
 614
 615                 Depending on if the downloader has been configured to ignore
 616                 download errors or not, this method may throw an exception or
 617                 not when errors are found, after printing the message.
 618                 """
 619                 if message is not None:
 620                         self.to_stderr(message)
 621                 if not self.params.get('ignoreerrors', False):
 622                         raise DownloadError(message)
 623                 self._download_retcode = 1
 624
 625         def slow_down(self, start_time, byte_counter):
 626                 """Sleep if the download speed is over the rate limit."""
 627                 rate_limit = self.params.get('ratelimit', None)
 628                 if rate_limit is None or byte_counter == 0:
 629                         return
 630                 now = time.time()
 631                 elapsed = now - start_time
 632                 if elapsed <= 0.0:
 633                         return
 634                 speed = float(byte_counter) / elapsed
 635                 if speed > rate_limit:
 636                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 637
 638         def temp_name(self, filename):
 639                 """Returns a temporary filename for the given filename."""
 640                 if self.params.get('nopart', False) or filename == u'-' or \
 641                                 (os.path.exists(_encodeFilename(filename)) and not os.path.isfile(_encodeFilename(filename))):
 642                         return filename
 643                 return filename + u'.part'
 644
 645         def undo_temp_name(self, filename):
 646                 if filename.endswith(u'.part'):
 647                         return filename[:-len(u'.part')]
 648                 return filename
 649
 650         def try_rename(self, old_filename, new_filename):
 651                 try:
 652                         if old_filename == new_filename:
 653                                 return
 654                         os.rename(_encodeFilename(old_filename), _encodeFilename(new_filename))
 655                 except (IOError, OSError), err:
 656                         self.trouble(u'ERROR: unable to rename file')
 657
 658         def try_utime(self, filename, last_modified_hdr):
 659                 """Try to set the last-modified time of the given file."""
 660                 if last_modified_hdr is None:
 661                         return
 662                 if not os.path.isfile(_encodeFilename(filename)):
 663                         return
 664                 timestr = last_modified_hdr
 665                 if timestr is None:
 666                         return
 667                 filetime = timeconvert(timestr)
 668                 if filetime is None:
 669                         return filetime
 670                 try:
 671                         os.utime(filename, (time.time(), filetime))
 672                 except:
 673                         pass
 674                 return filetime
 675
 676         def report_writedescription(self, descfn):
 677                 """ Report that the description file is being written """
 678                 self.to_screen(u'[info] Writing video description to: ' + descfn)
 679
 680         def report_writeinfojson(self, infofn):
 681                 """ Report that the metadata file has been written """
 682                 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 683
 684         def report_destination(self, filename):
 685                 """Report destination filename."""
 686                 self.to_screen(u'[download] Destination: ' + filename)
 687
 688         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 689                 """Report download progress."""
 690                 if self.params.get('noprogress', False):
 691                         return
 692                 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 693                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 694                 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 695                                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 696
 697         def report_resuming_byte(self, resume_len):
 698                 """Report attempt to resume at given byte."""
 699                 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 700
 701         def report_retry(self, count, retries):
 702                 """Report retry in case of HTTP error 5xx"""
 703                 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 704
 705         def report_file_already_downloaded(self, file_name):
 706                 """Report file has already been fully downloaded."""
 707                 try:
 708                         self.to_screen(u'[download] %s has already been downloaded' % file_name)
 709                 except (UnicodeEncodeError), err:
 710                         self.to_screen(u'[download] The file has already been downloaded')
 711
 712         def report_unable_to_resume(self):
 713                 """Report it was impossible to resume download."""
 714                 self.to_screen(u'[download] Unable to resume')
 715
 716         def report_finish(self):
 717                 """Report download finished."""
 718                 if self.params.get('noprogress', False):
 719                         self.to_screen(u'[download] Download completed')
 720                 else:
 721                         self.to_screen(u'')
 722
 723         def increment_downloads(self):
 724                 """Increment the ordinal that assigns a number to each file."""
 725                 self._num_downloads += 1
 726
 727         def prepare_filename(self, info_dict):
 728                 """Generate the output filename."""
 729                 try:
 730                         template_dict = dict(info_dict)
 731                         template_dict['epoch'] = unicode(long(time.time()))
 732                         template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
 733                         filename = self.params['outtmpl'] % template_dict
 734                         return filename
 735                 except (ValueError, KeyError), err:
 736                         self.trouble(u'ERROR: invalid system charset or erroneous output template')
 737                         return None
 738
 739         def _match_entry(self, info_dict):
 740                 """ Returns None iff the file should be downloaded """
 741
 742                 title = info_dict['title']
 743                 matchtitle = self.params.get('matchtitle', False)
 744                 if matchtitle and not re.search(matchtitle, title, re.IGNORECASE):
 745                         return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 746                 rejecttitle = self.params.get('rejecttitle', False)
 747                 if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE):
 748                         return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 749                 return None
 750
 751         def process_info(self, info_dict):
 752                 """Process a single dictionary returned by an InfoExtractor."""
 753
 754                 reason = self._match_entry(info_dict)
 755                 if reason is not None:
 756                         self.to_screen(u'[download] ' + reason)
 757                         return
 758
 759                 max_downloads = self.params.get('max_downloads')
 760                 if max_downloads is not None:
 761                         if self._num_downloads > int(max_downloads):
 762                                 raise MaxDownloadsReached()
 763
 764                 filename = self.prepare_filename(info_dict)
 765
 766                 # Forced printings
 767                 if self.params.get('forcetitle', False):
 768                         print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 769                 if self.params.get('forceurl', False):
 770                         print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 771                 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 772                         print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
 773                 if self.params.get('forcedescription', False) and 'description' in info_dict:
 774                         print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
 775                 if self.params.get('forcefilename', False) and filename is not None:
 776                         print filename.encode(preferredencoding(), 'xmlcharrefreplace')
 777                 if self.params.get('forceformat', False):
 778                         print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')
 779
 780                 # Do nothing else if in simulate mode
 781                 if self.params.get('simulate', False):
 782                         return
 783
 784                 if filename is None:
 785                         return
 786
 787                 try:
 788                         dn = os.path.dirname(_encodeFilename(filename))
 789                         if dn != '' and not os.path.exists(dn): # dn is already encoded
 790                                 os.makedirs(dn)
 791                 except (OSError, IOError), err:
 792                         self.trouble(u'ERROR: unable to create directory ' + unicode(err))
 793                         return
 794
 795                 if self.params.get('writedescription', False):
 796                         try:
 797                                 descfn = filename + u'.description'
 798                                 self.report_writedescription(descfn)
 799                                 descfile = open(_encodeFilename(descfn), 'wb')
 800                                 try:
 801                                         descfile.write(info_dict['description'].encode('utf-8'))
 802                                 finally:
 803                                         descfile.close()
 804                         except (OSError, IOError):
 805                                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 806                                 return
 807
 808                 if self.params.get('writeinfojson', False):
 809                         infofn = filename + u'.info.json'
 810                         self.report_writeinfojson(infofn)
 811                         try:
 812                                 json.dump
 813                         except (NameError,AttributeError):
 814                                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 815                                 return
 816                         try:
 817                                 infof = open(_encodeFilename(infofn), 'wb')
 818                                 try:
 819                                         json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
 820                                         json.dump(json_info_dict, infof)
 821                                 finally:
 822                                         infof.close()
 823                         except (OSError, IOError):
 824                                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 825                                 return
 826
 827                 if not self.params.get('skip_download', False):
 828                         if self.params.get('nooverwrites', False) and os.path.exists(_encodeFilename(filename)):
 829                                 success = True
 830                         else:
 831                                 try:
 832                                         success = self._do_download(filename, info_dict)
 833                                 except (OSError, IOError), err:
 834                                         raise UnavailableVideoError
 835                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 836                                         self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 837                                         return
 838                                 except (ContentTooShortError, ), err:
 839                                         self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 840                                         return
 841
 842                         if success:
 843                                 try:
 844                                         self.post_process(filename, info_dict)
 845                                 except (PostProcessingError), err:
 846                                         self.trouble(u'ERROR: postprocessing: %s' % str(err))
 847                                         return
 848
 849         def download(self, url_list):
 850                 """Download a given list of URLs."""
 851                 if len(url_list) > 1 and self.fixed_template():
 852                         raise SameFileError(self.params['outtmpl'])
 853
 854                 for url in url_list:
 855                         suitable_found = False
 856                         for ie in self._ies:
 857                                 # Go to next InfoExtractor if not suitable
 858                                 if not ie.suitable(url):
 859                                         continue
 860
 861                                 # Suitable InfoExtractor found
 862                                 suitable_found = True
 863
 864                                 # Extract information from URL and process it
 865                                 ie.extract(url)
 866
 867                                 # Suitable InfoExtractor had been found; go to next URL
 868                                 break
 869
 870                         if not suitable_found:
 871                                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 872
 873                 return self._download_retcode
 874
 875         def post_process(self, filename, ie_info):
 876                 """Run the postprocessing chain on the given file."""
 877                 info = dict(ie_info)
 878                 info['filepath'] = filename
 879                 for pp in self._pps:
 880                         info = pp.run(info)
 881                         if info is None:
 882                                 break
 883
 884         def _download_with_rtmpdump(self, filename, url, player_url):
 885                 self.report_destination(filename)
 886                 tmpfilename = self.temp_name(filename)
 887
 888                 # Check for rtmpdump first
 889                 try:
 890                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 891                 except (OSError, IOError):
 892                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 893                         return False
 894
 895                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 896                 # the connection was interrumpted and resuming appears to be
 897                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 898                 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 899                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 900                 while retval == 2 or retval == 1:
 901                         prevsize = os.path.getsize(_encodeFilename(tmpfilename))
 902                         self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 903                         time.sleep(5.0) # This seems to be needed
 904                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 905                         cursize = os.path.getsize(_encodeFilename(tmpfilename))
 906                         if prevsize == cursize and retval == 1:
 907                                 break
 908                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 909                         if prevsize == cursize and retval == 2 and cursize > 1024:
 910                                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 911                                 retval = 0
 912                                 break
 913                 if retval == 0:
 914                         self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(_encodeFilename(tmpfilename)))
 915                         self.try_rename(tmpfilename, filename)
 916                         return True
 917                 else:
 918                         self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 919                         return False
 920
 921         def _do_download(self, filename, info_dict):
 922                 url = info_dict['url']
 923                 player_url = info_dict.get('player_url', None)
 924
 925                 # Check file already present
 926                 if self.params.get('continuedl', False) and os.path.isfile(_encodeFilename(filename)) and not self.params.get('nopart', False):
 927                         self.report_file_already_downloaded(filename)
 928                         return True
 929
 930                 # Attempt to download using rtmpdump
 931                 if url.startswith('rtmp'):
 932                         return self._download_with_rtmpdump(filename, url, player_url)
 933
 934                 tmpfilename = self.temp_name(filename)
 935                 stream = None
 936
 937                 # Do not include the Accept-Encoding header
 938                 headers = {'Youtubedl-no-compression': 'True'}
 939                 basic_request = urllib2.Request(url, None, headers)
 940                 request = urllib2.Request(url, None, headers)
 941
 942                 # Establish possible resume length
 943                 if os.path.isfile(_encodeFilename(tmpfilename)):
 944                         resume_len = os.path.getsize(_encodeFilename(tmpfilename))
 945                 else:
 946                         resume_len = 0
 947
 948                 open_mode = 'wb'
 949                 if resume_len != 0:
 950                         if self.params.get('continuedl', False):
 951                                 self.report_resuming_byte(resume_len)
 952                                 request.add_header('Range','bytes=%d-' % resume_len)
 953                                 open_mode = 'ab'
 954                         else:
 955                                 resume_len = 0
 956
 957                 count = 0
 958                 retries = self.params.get('retries', 0)
 959                 while count <= retries:
 960                         # Establish connection
 961                         try:
 962                                 if count == 0 and 'urlhandle' in info_dict:
 963                                         data = info_dict['urlhandle']
 964                                 data = urllib2.urlopen(request)
 965                                 break
 966                         except (urllib2.HTTPError, ), err:
 967                                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 968                                         # Unexpected HTTP error
 969                                         raise
 970                                 elif err.code == 416:
 971                                         # Unable to resume (requested range not satisfiable)
 972                                         try:
 973                                                 # Open the connection again without the range header
 974                                                 data = urllib2.urlopen(basic_request)
 975                                                 content_length = data.info()['Content-Length']
 976                                         except (urllib2.HTTPError, ), err:
 977                                                 if err.code < 500 or err.code >= 600:
 978                                                         raise
 979                                         else:
 980                                                 # Examine the reported length
 981                                                 if (content_length is not None and
 982                                                                 (resume_len - 100 < long(content_length) < resume_len + 100)):
 983                                                         # The file had already been fully downloaded.
 984                                                         # Explanation to the above condition: in issue #175 it was revealed that
 985                                                         # YouTube sometimes adds or removes a few bytes from the end of the file,
 986                                                         # changing the file size slightly and causing problems for some users. So
 987                                                         # I decided to implement a suggested change and consider the file
 988                                                         # completely downloaded if the file size differs less than 100 bytes from
 989                                                         # the one in the hard drive.
 990                                                         self.report_file_already_downloaded(filename)
 991                                                         self.try_rename(tmpfilename, filename)
 992                                                         return True
 993                                                 else:
 994                                                         # The length does not match, we start the download over
 995                                                         self.report_unable_to_resume()
 996                                                         open_mode = 'wb'
 997                                                         break
 998                         # Retry
 999                         count += 1
1000                         if count <= retries:
1001                                 self.report_retry(count, retries)
1002
1003                 if count > retries:
1004                         self.trouble(u'ERROR: giving up after %s retries' % retries)
1005                         return False
1006
1007                 data_len = data.info().get('Content-length', None)
1008                 if data_len is not None:
1009                         data_len = long(data_len) + resume_len
1010                 data_len_str = self.format_bytes(data_len)
1011                 byte_counter = 0 + resume_len
1012                 block_size = 1024
1013                 start = time.time()
1014                 while True:
1015                         # Download and write
1016                         before = time.time()
1017                         data_block = data.read(block_size)
1018                         after = time.time()
1019                         if len(data_block) == 0:
1020                                 break
1021                         byte_counter += len(data_block)
1022
1023                         # Open file just in time
1024                         if stream is None:
1025                                 try:
1026                                         (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
1027                                         assert stream is not None
1028                                         filename = self.undo_temp_name(tmpfilename)
1029                                         self.report_destination(filename)
1030                                 except (OSError, IOError), err:
1031                                         self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
1032                                         return False
1033                         try:
1034                                 stream.write(data_block)
1035                         except (IOError, OSError), err:
1036                                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
1037                                 return False
1038                         block_size = self.best_block_size(after - before, len(data_block))
1039
1040                         # Progress message
1041                         speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
1042                         if data_len is None:
1043                                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
1044                         else:
1045                                 percent_str = self.calc_percent(byte_counter, data_len)
1046                                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
1047                                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
1048
1049                         # Apply rate limit
1050                         self.slow_down(start, byte_counter - resume_len)
1051
1052                 if stream is None:
1053                         self.trouble(u'\nERROR: Did not get any data blocks')
1054                         return False
1055                 stream.close()
1056                 self.report_finish()
1057                 if data_len is not None and byte_counter != data_len:
1058                         raise ContentTooShortError(byte_counter, long(data_len))
1059                 self.try_rename(tmpfilename, filename)
1060
1061                 # Update file modification time
1062                 if self.params.get('updatetime', True):
1063                         info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
1064
1065                 return True
1066
1067
1068 class InfoExtractor(object):
1069         """Information Extractor class.
1070
1071         Information extractors are the classes that, given a URL, extract
1072         information from the video (or videos) the URL refers to. This
1073         information includes the real video URL, the video title and simplified
1074         title, author and others. The information is stored in a dictionary
1075         which is then passed to the FileDownloader. The FileDownloader
1076         processes this information possibly downloading the video to the file
1077         system, among other possible outcomes. The dictionaries must include
1078         the following fields:
1079
1080         id:             Video identifier.
1081         url:            Final video URL.
1082         uploader:       Nickname of the video uploader.
1083         title:          Literal title.
1084         stitle:         Simplified title.
1085         ext:            Video filename extension.
1086         format:         Video format.
1087         player_url:     SWF Player URL (may be None).
1088
1089         The following fields are optional. Their primary purpose is to allow
1090         youtube-dl to serve as the backend for a video search function, such
1091         as the one in youtube2mp3.  They are only used when their respective
1092         forced printing functions are called:
1093
1094         thumbnail:      Full URL to a video thumbnail image.
1095         description:    One-line video description.
1096
1097         Subclasses of this one should re-define the _real_initialize() and
1098         _real_extract() methods and define a _VALID_URL regexp.
1099         Probably, they should also be added to the list of extractors.
1100         """
1101
1102         _ready = False
1103         _downloader = None
1104
1105         def __init__(self, downloader=None):
1106                 """Constructor. Receives an optional downloader."""
1107                 self._ready = False
1108                 self.set_downloader(downloader)
1109
1110         def suitable(self, url):
1111                 """Receives a URL and returns True if suitable for this IE."""
1112                 return re.match(self._VALID_URL, url) is not None
1113
1114         def initialize(self):
1115                 """Initializes an instance (authentication, etc)."""
1116                 if not self._ready:
1117                         self._real_initialize()
1118                         self._ready = True
1119
1120         def extract(self, url):
1121                 """Extracts URL information and returns it in list of dicts."""
1122                 self.initialize()
1123                 return self._real_extract(url)
1124
1125         def set_downloader(self, downloader):
1126                 """Sets the downloader for this IE."""
1127                 self._downloader = downloader
1128
1129         def _real_initialize(self):
1130                 """Real initialization process. Redefine in subclasses."""
1131                 pass
1132
1133         def _real_extract(self, url):
1134                 """Real extraction process. Redefine in subclasses."""
1135                 pass
1136
1137
1138 class YoutubeIE(InfoExtractor):
1139         """Information extractor for youtube.com."""
1140
1141         _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$'
1142         _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
1143         _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
1144         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
1145         _NETRC_MACHINE = 'youtube'
1146         # Listed in order of quality
1147         _available_formats = ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13']
1148         _available_formats_prefer_free = ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13']
1149         _video_extensions = {
1150                 '13': '3gp',
1151                 '17': 'mp4',
1152                 '18': 'mp4',
1153                 '22': 'mp4',
1154                 '37': 'mp4',
1155                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
1156                 '43': 'webm',
1157                 '44': 'webm',
1158                 '45': 'webm',
1159         }
1160         _video_dimensions = {
1161                 '5': '240x400',
1162                 '6': '???',
1163                 '13': '???',
1164                 '17': '144x176',
1165                 '18': '360x640',
1166                 '22': '720x1280',
1167                 '34': '360x640',
1168                 '35': '480x854',
1169                 '37': '1080x1920',
1170                 '38': '3072x4096',
1171                 '43': '360x640',
1172                 '44': '480x854',
1173                 '45': '720x1280',
1174         }
1175         IE_NAME = u'youtube'
1176
1177         def report_lang(self):
1178                 """Report attempt to set language."""
1179                 self._downloader.to_screen(u'[youtube] Setting language')
1180
1181         def report_login(self):
1182                 """Report attempt to log in."""
1183                 self._downloader.to_screen(u'[youtube] Logging in')
1184
1185         def report_age_confirmation(self):
1186                 """Report attempt to confirm age."""
1187                 self._downloader.to_screen(u'[youtube] Confirming age')
1188
1189         def report_video_webpage_download(self, video_id):
1190                 """Report attempt to download video webpage."""
1191                 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
1192
1193         def report_video_info_webpage_download(self, video_id):
1194                 """Report attempt to download video info webpage."""
1195                 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
1196
1197         def report_information_extraction(self, video_id):
1198                 """Report attempt to extract video information."""
1199                 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
1200
1201         def report_unavailable_format(self, video_id, format):
1202                 """Report extracted video URL."""
1203                 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
1204
1205         def report_rtmp_download(self):
1206                 """Indicate the download will use the RTMP protocol."""
1207                 self._downloader.to_screen(u'[youtube] RTMP download detected')
1208
1209         def _print_formats(self, formats):
1210                 print 'Available formats:'
1211                 for x in formats:
1212                         print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))
1213
1214         def _real_initialize(self):
1215                 if self._downloader is None:
1216                         return
1217
1218                 username = None
1219                 password = None
1220                 downloader_params = self._downloader.params
1221
1222                 # Attempt to use provided username and password or .netrc data
1223                 if downloader_params.get('username', None) is not None:
1224                         username = downloader_params['username']
1225                         password = downloader_params['password']
1226                 elif downloader_params.get('usenetrc', False):
1227                         try:
1228                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
1229                                 if info is not None:
1230                                         username = info[0]
1231                                         password = info[2]
1232                                 else:
1233                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
1234                         except (IOError, netrc.NetrcParseError), err:
1235                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
1236                                 return
1237
1238                 # Set language
1239                 request = urllib2.Request(self._LANG_URL)
1240                 try:
1241                         self.report_lang()
1242                         urllib2.urlopen(request).read()
1243                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1244                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
1245                         return
1246
1247                 # No authentication to be performed
1248                 if username is None:
1249                         return
1250
1251                 # Log in
1252                 login_form = {
1253                                 'current_form': 'loginForm',
1254                                 'next':         '/',
1255                                 'action_login': 'Log In',
1256                                 'username':     username,
1257                                 'password':     password,
1258                                 }
1259                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
1260                 try:
1261                         self.report_login()
1262                         login_results = urllib2.urlopen(request).read()
1263                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
1264                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
1265                                 return
1266                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1267                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
1268                         return
1269
1270                 # Confirm age
1271                 age_form = {
1272                                 'next_url':             '/',
1273                                 'action_confirm':       'Confirm',
1274                                 }
1275                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form))
1276                 try:
1277                         self.report_age_confirmation()
1278                         age_results = urllib2.urlopen(request).read()
1279                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1280                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1281                         return
1282
1283         def _real_extract(self, url):
1284                 # Extract video id from URL
1285                 mobj = re.match(self._VALID_URL, url)
1286                 if mobj is None:
1287                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1288                         return
1289                 video_id = mobj.group(2)
1290
1291                 # Get video webpage
1292                 self.report_video_webpage_download(video_id)
1293                 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id)
1294                 try:
1295                         video_webpage = urllib2.urlopen(request).read()
1296                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1297                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
1298                         return
1299
1300                 # Attempt to extract SWF player URL
1301                 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
1302                 if mobj is not None:
1303                         player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
1304                 else:
1305                         player_url = None
1306
1307                 # Get video info
1308                 self.report_video_info_webpage_download(video_id)
1309                 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
1310                         video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
1311                                         % (video_id, el_type))
1312                         request = urllib2.Request(video_info_url)
1313                         try:
1314                                 video_info_webpage = urllib2.urlopen(request).read()
1315                                 video_info = parse_qs(video_info_webpage)
1316                                 if 'token' in video_info:
1317                                         break
1318                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1319                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
1320                                 return
1321                 if 'token' not in video_info:
1322                         if 'reason' in video_info:
1323                                 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
1324                         else:
1325                                 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
1326                         return
1327
1328                 # Start extracting information
1329                 self.report_information_extraction(video_id)
1330
1331                 # uploader
1332                 if 'author' not in video_info:
1333                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1334                         return
1335                 video_uploader = urllib.unquote_plus(video_info['author'][0])
1336
1337                 # title
1338                 if 'title' not in video_info:
1339                         self._downloader.trouble(u'ERROR: unable to extract video title')
1340                         return
1341                 video_title = urllib.unquote_plus(video_info['title'][0])
1342                 video_title = video_title.decode('utf-8')
1343                 video_title = sanitize_title(video_title)
1344
1345                 # simplified title
1346                 simple_title = _simplify_title(video_title)
1347
1348                 # thumbnail image
1349                 if 'thumbnail_url' not in video_info:
1350                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
1351                         video_thumbnail = ''
1352                 else:   # don't panic if we can't find it
1353                         video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
1354
1355                 # upload date
1356                 upload_date = u'NA'
1357                 mobj = re.search(r'id="eow-date.*?>(.*?)</span>', video_webpage, re.DOTALL)
1358                 if mobj is not None:
1359                         upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
1360                         format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
1361                         for expression in format_expressions:
1362                                 try:
1363                                         upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
1364                                 except:
1365                                         pass
1366
1367                 # description
1368                 try:
1369                         lxml.etree
1370                 except NameError:
1371                         video_description = u'No description available.'
1372                         if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False):
1373                                 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
1374                                 if mobj is not None:
1375                                         video_description = mobj.group(1).decode('utf-8')
1376                 else:
1377                         html_parser = lxml.etree.HTMLParser(encoding='utf-8')
1378                         vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser)
1379                         video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()'))
1380                         # TODO use another parser
1381
1382                 # token
1383                 video_token = urllib.unquote_plus(video_info['token'][0])
1384
1385                 # Decide which formats to download
1386                 req_format = self._downloader.params.get('format', None)
1387
1388                 if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
1389                         self.report_rtmp_download()
1390                         video_url_list = [(None, video_info['conn'][0])]
1391                 elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1:
1392                         url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',')
1393                         url_data = [parse_qs(uds) for uds in url_data_strs]
1394                         url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data)
1395                         url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data)
1396
1397                         format_limit = self._downloader.params.get('format_limit', None)
1398                         available_formats = self._available_formats_prefer_free if self._downloader.params.get('prefer_free_formats', False) else self._available_formats
1399                         if format_limit is not None and format_limit in available_formats:
1400                                 format_list = available_formats[available_formats.index(format_limit):]
1401                         else:
1402                                 format_list = available_formats
1403                         existing_formats = [x for x in format_list if x in url_map]
1404                         if len(existing_formats) == 0:
1405                                 self._downloader.trouble(u'ERROR: no known formats available for video')
1406                                 return
1407                         if self._downloader.params.get('listformats', None):
1408                                 self._print_formats(existing_formats)
1409                                 return
1410                         if req_format is None or req_format == 'best':
1411                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
1412                         elif req_format == 'worst':
1413                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
1414                         elif req_format in ('-1', 'all'):
1415                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
1416                         else:
1417                                 # Specific formats. We pick the first in a slash-delimeted sequence.
1418                                 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'.
1419                                 req_formats = req_format.split('/')
1420                                 video_url_list = None
1421                                 for rf in req_formats:
1422                                         if rf in url_map:
1423                                                 video_url_list = [(rf, url_map[rf])]
1424                                                 break
1425                                 if video_url_list is None:
1426                                         self._downloader.trouble(u'ERROR: requested format not available')
1427                                         return
1428                 else:
1429                         self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info')
1430                         return
1431
1432                 for format_param, video_real_url in video_url_list:
1433                         # At this point we have a new video
1434                         self._downloader.increment_downloads()
1435
1436                         # Extension
1437                         video_extension = self._video_extensions.get(format_param, 'flv')
1438
1439                         try:
1440                                 # Process video information
1441                                 self._downloader.process_info({
1442                                         'id':           video_id.decode('utf-8'),
1443                                         'url':          video_real_url.decode('utf-8'),
1444                                         'uploader':     video_uploader.decode('utf-8'),
1445                                         'upload_date':  upload_date,
1446                                         'title':        video_title,
1447                                         'stitle':       simple_title,
1448                                         'ext':          video_extension.decode('utf-8'),
1449                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
1450                                         'thumbnail':    video_thumbnail.decode('utf-8'),
1451                                         'description':  video_description,
1452                                         'player_url':   player_url,
1453                                 })
1454                         except UnavailableVideoError, err:
1455                                 self._downloader.trouble(u'\nERROR: unable to download video')
1456
1457
1458 class MetacafeIE(InfoExtractor):
1459         """Information Extractor for metacafe.com."""
1460
1461         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1462         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1463         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1464         _youtube_ie = None
1465         IE_NAME = u'metacafe'
1466
1467         def __init__(self, youtube_ie, downloader=None):
1468                 InfoExtractor.__init__(self, downloader)
1469                 self._youtube_ie = youtube_ie
1470
1471         def report_disclaimer(self):
1472                 """Report disclaimer retrieval."""
1473                 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1474
1475         def report_age_confirmation(self):
1476                 """Report attempt to confirm age."""
1477                 self._downloader.to_screen(u'[metacafe] Confirming age')
1478
1479         def report_download_webpage(self, video_id):
1480                 """Report webpage download."""
1481                 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1482
1483         def report_extraction(self, video_id):
1484                 """Report information extraction."""
1485                 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1486
1487         def _real_initialize(self):
1488                 # Retrieve disclaimer
1489                 request = urllib2.Request(self._DISCLAIMER)
1490                 try:
1491                         self.report_disclaimer()
1492                         disclaimer = urllib2.urlopen(request).read()
1493                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1494                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1495                         return
1496
1497                 # Confirm age
1498                 disclaimer_form = {
1499                         'filters': '0',
1500                         'submit': "Continue - I'm over 18",
1501                         }
1502                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form))
1503                 try:
1504                         self.report_age_confirmation()
1505                         disclaimer = urllib2.urlopen(request).read()
1506                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1507                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1508                         return
1509
1510         def _real_extract(self, url):
1511                 # Extract id and simplified title from URL
1512                 mobj = re.match(self._VALID_URL, url)
1513                 if mobj is None:
1514                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1515                         return
1516
1517                 video_id = mobj.group(1)
1518
1519                 # Check if video comes from YouTube
1520                 mobj2 = re.match(r'^yt-(.*)$', video_id)
1521                 if mobj2 is not None:
1522                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1523                         return
1524
1525                 # At this point we have a new video
1526                 self._downloader.increment_downloads()
1527
1528                 simple_title = mobj.group(2).decode('utf-8')
1529
1530                 # Retrieve video webpage to extract further information
1531                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1532                 try:
1533                         self.report_download_webpage(video_id)
1534                         webpage = urllib2.urlopen(request).read()
1535                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1536                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1537                         return
1538
1539                 # Extract URL, uploader and title from webpage
1540                 self.report_extraction(video_id)
1541                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1542                 if mobj is not None:
1543                         mediaURL = urllib.unquote(mobj.group(1))
1544                         video_extension = mediaURL[-3:]
1545
1546                         # Extract gdaKey if available
1547                         mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1548                         if mobj is None:
1549                                 video_url = mediaURL
1550                         else:
1551                                 gdaKey = mobj.group(1)
1552                                 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1553                 else:
1554                         mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1555                         if mobj is None:
1556                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1557                                 return
1558                         vardict = parse_qs(mobj.group(1))
1559                         if 'mediaData' not in vardict:
1560                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1561                                 return
1562                         mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1563                         if mobj is None:
1564                                 self._downloader.trouble(u'ERROR: unable to extract media URL')
1565                                 return
1566                         mediaURL = mobj.group(1).replace('\\/', '/')
1567                         video_extension = mediaURL[-3:]
1568                         video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1569
1570                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1571                 if mobj is None:
1572                         self._downloader.trouble(u'ERROR: unable to extract title')
1573                         return
1574                 video_title = mobj.group(1).decode('utf-8')
1575                 video_title = sanitize_title(video_title)
1576
1577                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1578                 if mobj is None:
1579                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1580                         return
1581                 video_uploader = mobj.group(1)
1582
1583                 try:
1584                         # Process video information
1585                         self._downloader.process_info({
1586                                 'id':           video_id.decode('utf-8'),
1587                                 'url':          video_url.decode('utf-8'),
1588                                 'uploader':     video_uploader.decode('utf-8'),
1589                                 'upload_date':  u'NA',
1590                                 'title':        video_title,
1591                                 'stitle':       simple_title,
1592                                 'ext':          video_extension.decode('utf-8'),
1593                                 'format':       u'NA',
1594                                 'player_url':   None,
1595                         })
1596                 except UnavailableVideoError:
1597                         self._downloader.trouble(u'\nERROR: unable to download video')
1598
1599
1600 class DailymotionIE(InfoExtractor):
1601         """Information Extractor for Dailymotion"""
1602
1603         _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1604         IE_NAME = u'dailymotion'
1605
1606         def __init__(self, downloader=None):
1607                 InfoExtractor.__init__(self, downloader)
1608
1609         def report_download_webpage(self, video_id):
1610                 """Report webpage download."""
1611                 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1612
1613         def report_extraction(self, video_id):
1614                 """Report information extraction."""
1615                 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1616
1617         def _real_extract(self, url):
1618                 # Extract id and simplified title from URL
1619                 mobj = re.match(self._VALID_URL, url)
1620                 if mobj is None:
1621                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1622                         return
1623
1624                 # At this point we have a new video
1625                 self._downloader.increment_downloads()
1626                 video_id = mobj.group(1)
1627
1628                 video_extension = 'flv'
1629
1630                 # Retrieve video webpage to extract further information
1631                 request = urllib2.Request(url)
1632                 request.add_header('Cookie', 'family_filter=off')
1633                 try:
1634                         self.report_download_webpage(video_id)
1635                         webpage = urllib2.urlopen(request).read()
1636                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1637                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1638                         return
1639
1640                 # Extract URL, uploader and title from webpage
1641                 self.report_extraction(video_id)
1642                 mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage)
1643                 if mobj is None:
1644                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1645                         return
1646                 sequence = urllib.unquote(mobj.group(1))
1647                 mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence)
1648                 if mobj is None:
1649                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1650                         return
1651                 mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '')
1652
1653                 # if needed add http://www.dailymotion.com/ if relative URL
1654
1655                 video_url = mediaURL
1656
1657                 mobj = re.search(r'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage)
1658                 if mobj is None:
1659                         self._downloader.trouble(u'ERROR: unable to extract title')
1660                         return
1661                 video_title = _unescapeHTML(mobj.group('title').decode('utf-8'))
1662                 video_title = sanitize_title(video_title)
1663                 simple_title = _simplify_title(video_title)
1664
1665                 mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage)
1666                 if mobj is None:
1667                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1668                         return
1669                 video_uploader = mobj.group(1)
1670
1671                 try:
1672                         # Process video information
1673                         self._downloader.process_info({
1674                                 'id':           video_id.decode('utf-8'),
1675                                 'url':          video_url.decode('utf-8'),
1676                                 'uploader':     video_uploader.decode('utf-8'),
1677                                 'upload_date':  u'NA',
1678                                 'title':        video_title,
1679                                 'stitle':       simple_title,
1680                                 'ext':          video_extension.decode('utf-8'),
1681                                 'format':       u'NA',
1682                                 'player_url':   None,
1683                         })
1684                 except UnavailableVideoError:
1685                         self._downloader.trouble(u'\nERROR: unable to download video')
1686
1687
1688 class GoogleIE(InfoExtractor):
1689         """Information extractor for video.google.com."""
1690
1691         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1692         IE_NAME = u'video.google'
1693
1694         def __init__(self, downloader=None):
1695                 InfoExtractor.__init__(self, downloader)
1696
1697         def report_download_webpage(self, video_id):
1698                 """Report webpage download."""
1699                 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1700
1701         def report_extraction(self, video_id):
1702                 """Report information extraction."""
1703                 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1704
1705         def _real_extract(self, url):
1706                 # Extract id from URL
1707                 mobj = re.match(self._VALID_URL, url)
1708                 if mobj is None:
1709                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1710                         return
1711
1712                 # At this point we have a new video
1713                 self._downloader.increment_downloads()
1714                 video_id = mobj.group(1)
1715
1716                 video_extension = 'mp4'
1717
1718                 # Retrieve video webpage to extract further information
1719                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1720                 try:
1721                         self.report_download_webpage(video_id)
1722                         webpage = urllib2.urlopen(request).read()
1723                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1724                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1725                         return
1726
1727                 # Extract URL, uploader, and title from webpage
1728                 self.report_extraction(video_id)
1729                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1730                 if mobj is None:
1731                         video_extension = 'flv'
1732                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1733                 if mobj is None:
1734                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1735                         return
1736                 mediaURL = urllib.unquote(mobj.group(1))
1737                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1738                 mediaURL = mediaURL.replace('\\x26', '\x26')
1739
1740                 video_url = mediaURL
1741
1742                 mobj = re.search(r'<title>(.*)</title>', webpage)
1743                 if mobj is None:
1744                         self._downloader.trouble(u'ERROR: unable to extract title')
1745                         return
1746                 video_title = mobj.group(1).decode('utf-8')
1747                 video_title = sanitize_title(video_title)
1748                 simple_title = _simplify_title(video_title)
1749
1750                 # Extract video description
1751                 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1752                 if mobj is None:
1753                         self._downloader.trouble(u'ERROR: unable to extract video description')
1754                         return
1755                 video_description = mobj.group(1).decode('utf-8')
1756                 if not video_description:
1757                         video_description = 'No description available.'
1758
1759                 # Extract video thumbnail
1760                 if self._downloader.params.get('forcethumbnail', False):
1761                         request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1762                         try:
1763                                 webpage = urllib2.urlopen(request).read()
1764                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1765                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1766                                 return
1767                         mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1768                         if mobj is None:
1769                                 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1770                                 return
1771                         video_thumbnail = mobj.group(1)
1772                 else:   # we need something to pass to process_info
1773                         video_thumbnail = ''
1774
1775                 try:
1776                         # Process video information
1777                         self._downloader.process_info({
1778                                 'id':           video_id.decode('utf-8'),
1779                                 'url':          video_url.decode('utf-8'),
1780                                 'uploader':     u'NA',
1781                                 'upload_date':  u'NA',
1782                                 'title':        video_title,
1783                                 'stitle':       simple_title,
1784                                 'ext':          video_extension.decode('utf-8'),
1785                                 'format':       u'NA',
1786                                 'player_url':   None,
1787                         })
1788                 except UnavailableVideoError:
1789                         self._downloader.trouble(u'\nERROR: unable to download video')
1790
1791
1792 class PhotobucketIE(InfoExtractor):
1793         """Information extractor for photobucket.com."""
1794
1795         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1796         IE_NAME = u'photobucket'
1797
1798         def __init__(self, downloader=None):
1799                 InfoExtractor.__init__(self, downloader)
1800
1801         def report_download_webpage(self, video_id):
1802                 """Report webpage download."""
1803                 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1804
1805         def report_extraction(self, video_id):
1806                 """Report information extraction."""
1807                 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1808
1809         def _real_extract(self, url):
1810                 # Extract id from URL
1811                 mobj = re.match(self._VALID_URL, url)
1812                 if mobj is None:
1813                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1814                         return
1815
1816                 # At this point we have a new video
1817                 self._downloader.increment_downloads()
1818                 video_id = mobj.group(1)
1819
1820                 video_extension = 'flv'
1821
1822                 # Retrieve video webpage to extract further information
1823                 request = urllib2.Request(url)
1824                 try:
1825                         self.report_download_webpage(video_id)
1826                         webpage = urllib2.urlopen(request).read()
1827                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1828                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1829                         return
1830
1831                 # Extract URL, uploader, and title from webpage
1832                 self.report_extraction(video_id)
1833                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1834                 if mobj is None:
1835                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1836                         return
1837                 mediaURL = urllib.unquote(mobj.group(1))
1838
1839                 video_url = mediaURL
1840
1841                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1842                 if mobj is None:
1843                         self._downloader.trouble(u'ERROR: unable to extract title')
1844                         return
1845                 video_title = mobj.group(1).decode('utf-8')
1846                 video_title = sanitize_title(video_title)
1847                 simple_title = _simplify_title(vide_title)
1848
1849                 video_uploader = mobj.group(2).decode('utf-8')
1850
1851                 try:
1852                         # Process video information
1853                         self._downloader.process_info({
1854                                 'id':           video_id.decode('utf-8'),
1855                                 'url':          video_url.decode('utf-8'),
1856                                 'uploader':     video_uploader,
1857                                 'upload_date':  u'NA',
1858                                 'title':        video_title,
1859                                 'stitle':       simple_title,
1860                                 'ext':          video_extension.decode('utf-8'),
1861                                 'format':       u'NA',
1862                                 'player_url':   None,
1863                         })
1864                 except UnavailableVideoError:
1865                         self._downloader.trouble(u'\nERROR: unable to download video')
1866
1867
1868 class YahooIE(InfoExtractor):
1869         """Information extractor for video.yahoo.com."""
1870
1871         # _VALID_URL matches all Yahoo! Video URLs
1872         # _VPAGE_URL matches only the extractable '/watch/' URLs
1873         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1874         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1875         IE_NAME = u'video.yahoo'
1876
1877         def __init__(self, downloader=None):
1878                 InfoExtractor.__init__(self, downloader)
1879
1880         def report_download_webpage(self, video_id):
1881                 """Report webpage download."""
1882                 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1883
1884         def report_extraction(self, video_id):
1885                 """Report information extraction."""
1886                 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1887
1888         def _real_extract(self, url, new_video=True):
1889                 # Extract ID from URL
1890                 mobj = re.match(self._VALID_URL, url)
1891                 if mobj is None:
1892                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1893                         return
1894
1895                 # At this point we have a new video
1896                 self._downloader.increment_downloads()
1897                 video_id = mobj.group(2)
1898                 video_extension = 'flv'
1899
1900                 # Rewrite valid but non-extractable URLs as
1901                 # extractable English language /watch/ URLs
1902                 if re.match(self._VPAGE_URL, url) is None:
1903                         request = urllib2.Request(url)
1904                         try:
1905                                 webpage = urllib2.urlopen(request).read()
1906                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1907                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1908                                 return
1909
1910                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1911                         if mobj is None:
1912                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1913                                 return
1914                         yahoo_id = mobj.group(1)
1915
1916                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1917                         if mobj is None:
1918                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1919                                 return
1920                         yahoo_vid = mobj.group(1)
1921
1922                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1923                         return self._real_extract(url, new_video=False)
1924
1925                 # Retrieve video webpage to extract further information
1926                 request = urllib2.Request(url)
1927                 try:
1928                         self.report_download_webpage(video_id)
1929                         webpage = urllib2.urlopen(request).read()
1930                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1931                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1932                         return
1933
1934                 # Extract uploader and title from webpage
1935                 self.report_extraction(video_id)
1936                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1937                 if mobj is None:
1938                         self._downloader.trouble(u'ERROR: unable to extract video title')
1939                         return
1940                 video_title = mobj.group(1).decode('utf-8')
1941                 simple_title = _simplify_title(video_title)
1942
1943                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1944                 if mobj is None:
1945                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1946                         return
1947                 video_uploader = mobj.group(1).decode('utf-8')
1948
1949                 # Extract video thumbnail
1950                 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1951                 if mobj is None:
1952                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1953                         return
1954                 video_thumbnail = mobj.group(1).decode('utf-8')
1955
1956                 # Extract video description
1957                 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1958                 if mobj is None:
1959                         self._downloader.trouble(u'ERROR: unable to extract video description')
1960                         return
1961                 video_description = mobj.group(1).decode('utf-8')
1962                 if not video_description:
1963                         video_description = 'No description available.'
1964
1965                 # Extract video height and width
1966                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1967                 if mobj is None:
1968                         self._downloader.trouble(u'ERROR: unable to extract video height')
1969                         return
1970                 yv_video_height = mobj.group(1)
1971
1972                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1973                 if mobj is None:
1974                         self._downloader.trouble(u'ERROR: unable to extract video width')
1975                         return
1976                 yv_video_width = mobj.group(1)
1977
1978                 # Retrieve video playlist to extract media URL
1979                 # I'm not completely sure what all these options are, but we
1980                 # seem to need most of them, otherwise the server sends a 401.
1981                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1982                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1983                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1984                                 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1985                                 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1986                 try:
1987                         self.report_download_webpage(video_id)
1988                         webpage = urllib2.urlopen(request).read()
1989                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1990                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1991                         return
1992
1993                 # Extract media URL from playlist XML
1994                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1995                 if mobj is None:
1996                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1997                         return
1998                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1999                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
2000
2001                 try:
2002                         # Process video information
2003                         self._downloader.process_info({
2004                                 'id':           video_id.decode('utf-8'),
2005                                 'url':          video_url,
2006                                 'uploader':     video_uploader,
2007                                 'upload_date':  u'NA',
2008                                 'title':        video_title,
2009                                 'stitle':       simple_title,
2010                                 'ext':          video_extension.decode('utf-8'),
2011                                 'thumbnail':    video_thumbnail.decode('utf-8'),
2012                                 'description':  video_description,
2013                                 'thumbnail':    video_thumbnail,
2014                                 'player_url':   None,
2015                         })
2016                 except UnavailableVideoError:
2017                         self._downloader.trouble(u'\nERROR: unable to download video')
2018
2019
2020 class VimeoIE(InfoExtractor):
2021         """Information extractor for vimeo.com."""
2022
2023         # _VALID_URL matches Vimeo URLs
2024         _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)'
2025         IE_NAME = u'vimeo'
2026
2027         def __init__(self, downloader=None):
2028                 InfoExtractor.__init__(self, downloader)
2029
2030         def report_download_webpage(self, video_id):
2031                 """Report webpage download."""
2032                 self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id)
2033
2034         def report_extraction(self, video_id):
2035                 """Report information extraction."""
2036                 self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id)
2037
2038         def _real_extract(self, url, new_video=True):
2039                 # Extract ID from URL
2040                 mobj = re.match(self._VALID_URL, url)
2041                 if mobj is None:
2042                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2043                         return
2044
2045                 # At this point we have a new video
2046                 self._downloader.increment_downloads()
2047                 video_id = mobj.group(1)
2048
2049                 # Retrieve video webpage to extract further information
2050                 request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers)
2051                 try:
2052                         self.report_download_webpage(video_id)
2053                         webpage = urllib2.urlopen(request).read()
2054                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2055                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2056                         return
2057
2058                 # Now we begin extracting as much information as we can from what we
2059                 # retrieved. First we extract the information common to all extractors,
2060                 # and latter we extract those that are Vimeo specific.
2061                 self.report_extraction(video_id)
2062
2063                 # Extract title
2064                 mobj = re.search(r'<caption>(.*?)</caption>', webpage)
2065                 if mobj is None:
2066                         self._downloader.trouble(u'ERROR: unable to extract video title')
2067                         return
2068                 video_title = mobj.group(1).decode('utf-8')
2069                 simple_title = _simplify_title(video_title)
2070
2071                 # Extract uploader
2072                 mobj = re.search(r'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage)
2073                 if mobj is None:
2074                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
2075                         return
2076                 video_uploader = mobj.group(1).decode('utf-8')
2077
2078                 # Extract video thumbnail
2079                 mobj = re.search(r'<thumbnail>(.*?)</thumbnail>', webpage)
2080                 if mobj is None:
2081                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
2082                         return
2083                 video_thumbnail = mobj.group(1).decode('utf-8')
2084
2085                 # # Extract video description
2086                 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage)
2087                 # if mobj is None:
2088                 #       self._downloader.trouble(u'ERROR: unable to extract video description')
2089                 #       return
2090                 # video_description = mobj.group(1).decode('utf-8')
2091                 # if not video_description: video_description = 'No description available.'
2092                 video_description = 'Foo.'
2093
2094                 # Vimeo specific: extract request signature
2095                 mobj = re.search(r'<request_signature>(.*?)</request_signature>', webpage)
2096                 if mobj is None:
2097                         self._downloader.trouble(u'ERROR: unable to extract request signature')
2098                         return
2099                 sig = mobj.group(1).decode('utf-8')
2100
2101                 # Vimeo specific: extract video quality information
2102                 mobj = re.search(r'<isHD>(\d+)</isHD>', webpage)
2103                 if mobj is None:
2104                         self._downloader.trouble(u'ERROR: unable to extract video quality information')
2105                         return
2106                 quality = mobj.group(1).decode('utf-8')
2107
2108                 if int(quality) == 1:
2109                         quality = 'hd'
2110                 else:
2111                         quality = 'sd'
2112
2113                 # Vimeo specific: Extract request signature expiration
2114                 mobj = re.search(r'<request_signature_expires>(.*?)</request_signature_expires>', webpage)
2115                 if mobj is None:
2116                         self._downloader.trouble(u'ERROR: unable to extract request signature expiration')
2117                         return
2118                 sig_exp = mobj.group(1).decode('utf-8')
2119
2120                 video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id, sig, sig_exp, quality)
2121
2122                 try:
2123                         # Process video information
2124                         self._downloader.process_info({
2125                                 'id':           video_id.decode('utf-8'),
2126                                 'url':          video_url,
2127                                 'uploader':     video_uploader,
2128                                 'upload_date':  u'NA',
2129                                 'title':        video_title,
2130                                 'stitle':       simple_title,
2131                                 'ext':          u'mp4',
2132                                 'thumbnail':    video_thumbnail.decode('utf-8'),
2133                                 'description':  video_description,
2134                                 'thumbnail':    video_thumbnail,
2135                                 'description':  video_description,
2136                                 'player_url':   None,
2137                         })
2138                 except UnavailableVideoError:
2139                         self._downloader.trouble(u'ERROR: unable to download video')
2140
2141
2142 class GenericIE(InfoExtractor):
2143         """Generic last-resort information extractor."""
2144
2145         _VALID_URL = r'.*'
2146         IE_NAME = u'generic'
2147
2148         def __init__(self, downloader=None):
2149                 InfoExtractor.__init__(self, downloader)
2150
2151         def report_download_webpage(self, video_id):
2152                 """Report webpage download."""
2153                 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
2154                 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
2155
2156         def report_extraction(self, video_id):
2157                 """Report information extraction."""
2158                 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
2159
2160         def _real_extract(self, url):
2161                 # At this point we have a new video
2162                 self._downloader.increment_downloads()
2163
2164                 video_id = url.split('/')[-1]
2165                 request = urllib2.Request(url)
2166                 try:
2167                         self.report_download_webpage(video_id)
2168                         webpage = urllib2.urlopen(request).read()
2169                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2170                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
2171                         return
2172                 except ValueError, err:
2173                         # since this is the last-resort InfoExtractor, if
2174                         # this error is thrown, it'll be thrown here
2175                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2176                         return
2177
2178                 self.report_extraction(video_id)
2179                 # Start with something easy: JW Player in SWFObject
2180                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
2181                 if mobj is None:
2182                         # Broaden the search a little bit
2183                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
2184                 if mobj is None:
2185                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2186                         return
2187
2188                 # It's possible that one of the regexes
2189                 # matched, but returned an empty group:
2190                 if mobj.group(1) is None:
2191                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
2192                         return
2193
2194                 video_url = urllib.unquote(mobj.group(1))
2195                 video_id = os.path.basename(video_url)
2196
2197                 # here's a fun little line of code for you:
2198                 video_extension = os.path.splitext(video_id)[1][1:]
2199                 video_id = os.path.splitext(video_id)[0]
2200
2201                 # it's tempting to parse this further, but you would
2202                 # have to take into account all the variations like
2203                 #   Video Title - Site Name
2204                 #   Site Name | Video Title
2205                 #   Video Title - Tagline | Site Name
2206                 # and so on and so forth; it's just not practical
2207                 mobj = re.search(r'<title>(.*)</title>', webpage)
2208                 if mobj is None:
2209                         self._downloader.trouble(u'ERROR: unable to extract title')
2210                         return
2211                 video_title = mobj.group(1).decode('utf-8')
2212                 video_title = sanitize_title(video_title)
2213                 simple_title = _simplify_title(video_title)
2214
2215                 # video uploader is domain name
2216                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
2217                 if mobj is None:
2218                         self._downloader.trouble(u'ERROR: unable to extract title')
2219                         return
2220                 video_uploader = mobj.group(1).decode('utf-8')
2221
2222                 try:
2223                         # Process video information
2224                         self._downloader.process_info({
2225                                 'id':           video_id.decode('utf-8'),
2226                                 'url':          video_url.decode('utf-8'),
2227                                 'uploader':     video_uploader,
2228                                 'upload_date':  u'NA',
2229                                 'title':        video_title,
2230                                 'stitle':       simple_title,
2231                                 'ext':          video_extension.decode('utf-8'),
2232                                 'format':       u'NA',
2233                                 'player_url':   None,
2234                         })
2235                 except UnavailableVideoError, err:
2236                         self._downloader.trouble(u'\nERROR: unable to download video')
2237
2238
2239 class YoutubeSearchIE(InfoExtractor):
2240         """Information Extractor for YouTube search queries."""
2241         _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+'
2242         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
2243         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
2244         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2245         _youtube_ie = None
2246         _max_youtube_results = 1000
2247         IE_NAME = u'youtube:search'
2248
2249         def __init__(self, youtube_ie, downloader=None):
2250                 InfoExtractor.__init__(self, downloader)
2251                 self._youtube_ie = youtube_ie
2252
2253         def report_download_page(self, query, pagenum):
2254                 """Report attempt to download playlist page with given number."""
2255                 query = query.decode(preferredencoding())
2256                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
2257
2258         def _real_initialize(self):
2259                 self._youtube_ie.initialize()
2260
2261         def _real_extract(self, query):
2262                 mobj = re.match(self._VALID_URL, query)
2263                 if mobj is None:
2264                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2265                         return
2266
2267                 prefix, query = query.split(':')
2268                 prefix = prefix[8:]
2269                 query = query.encode('utf-8')
2270                 if prefix == '':
2271                         self._download_n_results(query, 1)
2272                         return
2273                 elif prefix == 'all':
2274                         self._download_n_results(query, self._max_youtube_results)
2275                         return
2276                 else:
2277                         try:
2278                                 n = long(prefix)
2279                                 if n <= 0:
2280                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2281                                         return
2282                                 elif n > self._max_youtube_results:
2283                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
2284                                         n = self._max_youtube_results
2285                                 self._download_n_results(query, n)
2286                                 return
2287                         except ValueError: # parsing prefix as integer fails
2288                                 self._download_n_results(query, 1)
2289                                 return
2290
2291         def _download_n_results(self, query, n):
2292                 """Downloads a specified number of results for a query"""
2293
2294                 video_ids = []
2295                 already_seen = set()
2296                 pagenum = 1
2297
2298                 while True:
2299                         self.report_download_page(query, pagenum)
2300                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2301                         request = urllib2.Request(result_url)
2302                         try:
2303                                 page = urllib2.urlopen(request).read()
2304                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2305                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2306                                 return
2307
2308                         # Extract video identifiers
2309                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2310                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
2311                                 if video_id not in already_seen:
2312                                         video_ids.append(video_id)
2313                                         already_seen.add(video_id)
2314                                         if len(video_ids) == n:
2315                                                 # Specified n videos reached
2316                                                 for id in video_ids:
2317                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2318                                                 return
2319
2320                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2321                                 for id in video_ids:
2322                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2323                                 return
2324
2325                         pagenum = pagenum + 1
2326
2327
2328 class GoogleSearchIE(InfoExtractor):
2329         """Information Extractor for Google Video search queries."""
2330         _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+'
2331         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
2332         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
2333         _MORE_PAGES_INDICATOR = r'<span>Next</span>'
2334         _google_ie = None
2335         _max_google_results = 1000
2336         IE_NAME = u'video.google:search'
2337
2338         def __init__(self, google_ie, downloader=None):
2339                 InfoExtractor.__init__(self, downloader)
2340                 self._google_ie = google_ie
2341
2342         def report_download_page(self, query, pagenum):
2343                 """Report attempt to download playlist page with given number."""
2344                 query = query.decode(preferredencoding())
2345                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
2346
2347         def _real_initialize(self):
2348                 self._google_ie.initialize()
2349
2350         def _real_extract(self, query):
2351                 mobj = re.match(self._VALID_URL, query)
2352                 if mobj is None:
2353                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2354                         return
2355
2356                 prefix, query = query.split(':')
2357                 prefix = prefix[8:]
2358                 query = query.encode('utf-8')
2359                 if prefix == '':
2360                         self._download_n_results(query, 1)
2361                         return
2362                 elif prefix == 'all':
2363                         self._download_n_results(query, self._max_google_results)
2364                         return
2365                 else:
2366                         try:
2367                                 n = long(prefix)
2368                                 if n <= 0:
2369                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2370                                         return
2371                                 elif n > self._max_google_results:
2372                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
2373                                         n = self._max_google_results
2374                                 self._download_n_results(query, n)
2375                                 return
2376                         except ValueError: # parsing prefix as integer fails
2377                                 self._download_n_results(query, 1)
2378                                 return
2379
2380         def _download_n_results(self, query, n):
2381                 """Downloads a specified number of results for a query"""
2382
2383                 video_ids = []
2384                 already_seen = set()
2385                 pagenum = 1
2386
2387                 while True:
2388                         self.report_download_page(query, pagenum)
2389                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2390                         request = urllib2.Request(result_url)
2391                         try:
2392                                 page = urllib2.urlopen(request).read()
2393                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2394                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2395                                 return
2396
2397                         # Extract video identifiers
2398                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2399                                 video_id = mobj.group(1)
2400                                 if video_id not in already_seen:
2401                                         video_ids.append(video_id)
2402                                         already_seen.add(video_id)
2403                                         if len(video_ids) == n:
2404                                                 # Specified n videos reached
2405                                                 for id in video_ids:
2406                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2407                                                 return
2408
2409                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2410                                 for id in video_ids:
2411                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
2412                                 return
2413
2414                         pagenum = pagenum + 1
2415
2416
2417 class YahooSearchIE(InfoExtractor):
2418         """Information Extractor for Yahoo! Video search queries."""
2419         _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+'
2420         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
2421         _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
2422         _MORE_PAGES_INDICATOR = r'\s*Next'
2423         _yahoo_ie = None
2424         _max_yahoo_results = 1000
2425         IE_NAME = u'video.yahoo:search'
2426
2427         def __init__(self, yahoo_ie, downloader=None):
2428                 InfoExtractor.__init__(self, downloader)
2429                 self._yahoo_ie = yahoo_ie
2430
2431         def report_download_page(self, query, pagenum):
2432                 """Report attempt to download playlist page with given number."""
2433                 query = query.decode(preferredencoding())
2434                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
2435
2436         def _real_initialize(self):
2437                 self._yahoo_ie.initialize()
2438
2439         def _real_extract(self, query):
2440                 mobj = re.match(self._VALID_URL, query)
2441                 if mobj is None:
2442                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
2443                         return
2444
2445                 prefix, query = query.split(':')
2446                 prefix = prefix[8:]
2447                 query = query.encode('utf-8')
2448                 if prefix == '':
2449                         self._download_n_results(query, 1)
2450                         return
2451                 elif prefix == 'all':
2452                         self._download_n_results(query, self._max_yahoo_results)
2453                         return
2454                 else:
2455                         try:
2456                                 n = long(prefix)
2457                                 if n <= 0:
2458                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
2459                                         return
2460                                 elif n > self._max_yahoo_results:
2461                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
2462                                         n = self._max_yahoo_results
2463                                 self._download_n_results(query, n)
2464                                 return
2465                         except ValueError: # parsing prefix as integer fails
2466                                 self._download_n_results(query, 1)
2467                                 return
2468
2469         def _download_n_results(self, query, n):
2470                 """Downloads a specified number of results for a query"""
2471
2472                 video_ids = []
2473                 already_seen = set()
2474                 pagenum = 1
2475
2476                 while True:
2477                         self.report_download_page(query, pagenum)
2478                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
2479                         request = urllib2.Request(result_url)
2480                         try:
2481                                 page = urllib2.urlopen(request).read()
2482                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2483                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2484                                 return
2485
2486                         # Extract video identifiers
2487                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2488                                 video_id = mobj.group(1)
2489                                 if video_id not in already_seen:
2490                                         video_ids.append(video_id)
2491                                         already_seen.add(video_id)
2492                                         if len(video_ids) == n:
2493                                                 # Specified n videos reached
2494                                                 for id in video_ids:
2495                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2496                                                 return
2497
2498                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2499                                 for id in video_ids:
2500                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
2501                                 return
2502
2503                         pagenum = pagenum + 1
2504
2505
2506 class YoutubePlaylistIE(InfoExtractor):
2507         """Information Extractor for YouTube playlists."""
2508
2509         _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
2510         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
2511         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
2512         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
2513         _youtube_ie = None
2514         IE_NAME = u'youtube:playlist'
2515
2516         def __init__(self, youtube_ie, downloader=None):
2517                 InfoExtractor.__init__(self, downloader)
2518                 self._youtube_ie = youtube_ie
2519
2520         def report_download_page(self, playlist_id, pagenum):
2521                 """Report attempt to download playlist page with given number."""
2522                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
2523
2524         def _real_initialize(self):
2525                 self._youtube_ie.initialize()
2526
2527         def _real_extract(self, url):
2528                 # Extract playlist id
2529                 mobj = re.match(self._VALID_URL, url)
2530                 if mobj is None:
2531                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2532                         return
2533
2534                 # Single video case
2535                 if mobj.group(3) is not None:
2536                         self._youtube_ie.extract(mobj.group(3))
2537                         return
2538
2539                 # Download playlist pages
2540                 # prefix is 'p' as default for playlists but there are other types that need extra care
2541                 playlist_prefix = mobj.group(1)
2542                 if playlist_prefix == 'a':
2543                         playlist_access = 'artist'
2544                 else:
2545                         playlist_prefix = 'p'
2546                         playlist_access = 'view_play_list'
2547                 playlist_id = mobj.group(2)
2548                 video_ids = []
2549                 pagenum = 1
2550
2551                 while True:
2552                         self.report_download_page(playlist_id, pagenum)
2553                         url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)
2554                         request = urllib2.Request(url)
2555                         try:
2556                                 page = urllib2.urlopen(request).read()
2557                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2558                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2559                                 return
2560
2561                         # Extract video identifiers
2562                         ids_in_page = []
2563                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2564                                 if mobj.group(1) not in ids_in_page:
2565                                         ids_in_page.append(mobj.group(1))
2566                         video_ids.extend(ids_in_page)
2567
2568                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2569                                 break
2570                         pagenum = pagenum + 1
2571
2572                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2573                 playlistend = self._downloader.params.get('playlistend', -1)
2574                 video_ids = video_ids[playliststart:playlistend]
2575
2576                 for id in video_ids:
2577                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2578                 return
2579
2580
2581 class YoutubeUserIE(InfoExtractor):
2582         """Information Extractor for YouTube users."""
2583
2584         _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)'
2585         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2586         _GDATA_PAGE_SIZE = 50
2587         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d'
2588         _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]'
2589         _youtube_ie = None
2590         IE_NAME = u'youtube:user'
2591
2592         def __init__(self, youtube_ie, downloader=None):
2593                 InfoExtractor.__init__(self, downloader)
2594                 self._youtube_ie = youtube_ie
2595
2596         def report_download_page(self, username, start_index):
2597                 """Report attempt to download user page."""
2598                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' %
2599                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE))
2600
2601         def _real_initialize(self):
2602                 self._youtube_ie.initialize()
2603
2604         def _real_extract(self, url):
2605                 # Extract username
2606                 mobj = re.match(self._VALID_URL, url)
2607                 if mobj is None:
2608                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2609                         return
2610
2611                 username = mobj.group(1)
2612
2613                 # Download video ids using YouTube Data API. Result size per
2614                 # query is limited (currently to 50 videos) so we need to query
2615                 # page by page until there are no video ids - it means we got
2616                 # all of them.
2617
2618                 video_ids = []
2619                 pagenum = 0
2620
2621                 while True:
2622                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1
2623                         self.report_download_page(username, start_index)
2624
2625                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index))
2626
2627                         try:
2628                                 page = urllib2.urlopen(request).read()
2629                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2630                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2631                                 return
2632
2633                         # Extract video identifiers
2634                         ids_in_page = []
2635
2636                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2637                                 if mobj.group(1) not in ids_in_page:
2638                                         ids_in_page.append(mobj.group(1))
2639
2640                         video_ids.extend(ids_in_page)
2641
2642                         # A little optimization - if current page is not
2643                         # "full", ie. does not contain PAGE_SIZE video ids then
2644                         # we can assume that this page is the last one - there
2645                         # are no more ids on further pages - no need to query
2646                         # again.
2647
2648                         if len(ids_in_page) < self._GDATA_PAGE_SIZE:
2649                                 break
2650
2651                         pagenum += 1
2652
2653                 all_ids_count = len(video_ids)
2654                 playliststart = self._downloader.params.get('playliststart', 1) - 1
2655                 playlistend = self._downloader.params.get('playlistend', -1)
2656
2657                 if playlistend == -1:
2658                         video_ids = video_ids[playliststart:]
2659                 else:
2660                         video_ids = video_ids[playliststart:playlistend]
2661
2662                 self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %
2663                                 (username, all_ids_count, len(video_ids)))
2664
2665                 for video_id in video_ids:
2666                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id)
2667
2668
2669 class DepositFilesIE(InfoExtractor):
2670         """Information extractor for depositfiles.com"""
2671
2672         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)'
2673         IE_NAME = u'DepositFiles'
2674
2675         def __init__(self, downloader=None):
2676                 InfoExtractor.__init__(self, downloader)
2677
2678         def report_download_webpage(self, file_id):
2679                 """Report webpage download."""
2680                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2681
2682         def report_extraction(self, file_id):
2683                 """Report information extraction."""
2684                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2685
2686         def _real_extract(self, url):
2687                 # At this point we have a new file
2688                 self._downloader.increment_downloads()
2689
2690                 file_id = url.split('/')[-1]
2691                 # Rebuild url in english locale
2692                 url = 'http://depositfiles.com/en/files/' + file_id
2693
2694                 # Retrieve file webpage with 'Free download' button pressed
2695                 free_download_indication = { 'gateway_result' : '1' }
2696                 request = urllib2.Request(url, urllib.urlencode(free_download_indication))
2697                 try:
2698                         self.report_download_webpage(file_id)
2699                         webpage = urllib2.urlopen(request).read()
2700                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2701                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2702                         return
2703
2704                 # Search for the real file URL
2705                 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2706                 if (mobj is None) or (mobj.group(1) is None):
2707                         # Try to figure out reason of the error.
2708                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2709                         if (mobj is not None) and (mobj.group(1) is not None):
2710                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2711                                 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2712                         else:
2713                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2714                         return
2715
2716                 file_url = mobj.group(1)
2717                 file_extension = os.path.splitext(file_url)[1][1:]
2718
2719                 # Search for file title
2720                 mobj = re.search(r'<b title="(.*?)">', webpage)
2721                 if mobj is None:
2722                         self._downloader.trouble(u'ERROR: unable to extract title')
2723                         return
2724                 file_title = mobj.group(1).decode('utf-8')
2725
2726                 try:
2727                         # Process file information
2728                         self._downloader.process_info({
2729                                 'id':           file_id.decode('utf-8'),
2730                                 'url':          file_url.decode('utf-8'),
2731                                 'uploader':     u'NA',
2732                                 'upload_date':  u'NA',
2733                                 'title':        file_title,
2734                                 'stitle':       file_title,
2735                                 'ext':          file_extension.decode('utf-8'),
2736                                 'format':       u'NA',
2737                                 'player_url':   None,
2738                         })
2739                 except UnavailableVideoError, err:
2740                         self._downloader.trouble(u'ERROR: unable to download file')
2741
2742
2743 class FacebookIE(InfoExtractor):
2744         """Information Extractor for Facebook"""
2745
2746         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
2747         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
2748         _NETRC_MACHINE = 'facebook'
2749         _available_formats = ['video', 'highqual', 'lowqual']
2750         _video_extensions = {
2751                 'video': 'mp4',
2752                 'highqual': 'mp4',
2753                 'lowqual': 'mp4',
2754         }
2755         IE_NAME = u'facebook'
2756
2757         def __init__(self, downloader=None):
2758                 InfoExtractor.__init__(self, downloader)
2759
2760         def _reporter(self, message):
2761                 """Add header and report message."""
2762                 self._downloader.to_screen(u'[facebook] %s' % message)
2763
2764         def report_login(self):
2765                 """Report attempt to log in."""
2766                 self._reporter(u'Logging in')
2767
2768         def report_video_webpage_download(self, video_id):
2769                 """Report attempt to download video webpage."""
2770                 self._reporter(u'%s: Downloading video webpage' % video_id)
2771
2772         def report_information_extraction(self, video_id):
2773                 """Report attempt to extract video information."""
2774                 self._reporter(u'%s: Extracting video information' % video_id)
2775
2776         def _parse_page(self, video_webpage):
2777                 """Extract video information from page"""
2778                 # General data
2779                 data = {'title': r'\("video_title", "(.*?)"\)',
2780                         'description': r'<div class="datawrap">(.*?)</div>',
2781                         'owner': r'\("video_owner_name", "(.*?)"\)',
2782                         'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
2783                         }
2784                 video_info = {}
2785                 for piece in data.keys():
2786                         mobj = re.search(data[piece], video_webpage)
2787                         if mobj is not None:
2788                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2789
2790                 # Video urls
2791                 video_urls = {}
2792                 for fmt in self._available_formats:
2793                         mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
2794                         if mobj is not None:
2795                                 # URL is in a Javascript segment inside an escaped Unicode format within
2796                                 # the generally utf-8 page
2797                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
2798                 video_info['video_urls'] = video_urls
2799
2800                 return video_info
2801
2802         def _real_initialize(self):
2803                 if self._downloader is None:
2804                         return
2805
2806                 useremail = None
2807                 password = None
2808                 downloader_params = self._downloader.params
2809
2810                 # Attempt to use provided username and password or .netrc data
2811                 if downloader_params.get('username', None) is not None:
2812                         useremail = downloader_params['username']
2813                         password = downloader_params['password']
2814                 elif downloader_params.get('usenetrc', False):
2815                         try:
2816                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
2817                                 if info is not None:
2818                                         useremail = info[0]
2819                                         password = info[2]
2820                                 else:
2821                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
2822                         except (IOError, netrc.NetrcParseError), err:
2823                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
2824                                 return
2825
2826                 if useremail is None:
2827                         return
2828
2829                 # Log in
2830                 login_form = {
2831                         'email': useremail,
2832                         'pass': password,
2833                         'login': 'Log+In'
2834                         }
2835                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
2836                 try:
2837                         self.report_login()
2838                         login_results = urllib2.urlopen(request).read()
2839                         if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
2840                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
2841                                 return
2842                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2843                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
2844                         return
2845
2846         def _real_extract(self, url):
2847                 mobj = re.match(self._VALID_URL, url)
2848                 if mobj is None:
2849                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2850                         return
2851                 video_id = mobj.group('ID')
2852
2853                 # Get video webpage
2854                 self.report_video_webpage_download(video_id)
2855                 request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
2856                 try:
2857                         page = urllib2.urlopen(request)
2858                         video_webpage = page.read()
2859                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2860                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
2861                         return
2862
2863                 # Start extracting information
2864                 self.report_information_extraction(video_id)
2865
2866                 # Extract information
2867                 video_info = self._parse_page(video_webpage)
2868
2869                 # uploader
2870                 if 'owner' not in video_info:
2871                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
2872                         return
2873                 video_uploader = video_info['owner']
2874
2875                 # title
2876                 if 'title' not in video_info:
2877                         self._downloader.trouble(u'ERROR: unable to extract video title')
2878                         return
2879                 video_title = video_info['title']
2880                 video_title = video_title.decode('utf-8')
2881                 video_title = sanitize_title(video_title)
2882
2883                 simple_title = _simplify_title(video_title)
2884
2885                 # thumbnail image
2886                 if 'thumbnail' not in video_info:
2887                         self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
2888                         video_thumbnail = ''
2889                 else:
2890                         video_thumbnail = video_info['thumbnail']
2891
2892                 # upload date
2893                 upload_date = u'NA'
2894                 if 'upload_date' in video_info:
2895                         upload_time = video_info['upload_date']
2896                         timetuple = email.utils.parsedate_tz(upload_time)
2897                         if timetuple is not None:
2898                                 try:
2899                                         upload_date = time.strftime('%Y%m%d', timetuple[0:9])
2900                                 except:
2901                                         pass
2902
2903                 # description
2904                 video_description = video_info.get('description', 'No description available.')
2905
2906                 url_map = video_info['video_urls']
2907                 if len(url_map.keys()) > 0:
2908                         # Decide which formats to download
2909                         req_format = self._downloader.params.get('format', None)
2910                         format_limit = self._downloader.params.get('format_limit', None)
2911
2912                         if format_limit is not None and format_limit in self._available_formats:
2913                                 format_list = self._available_formats[self._available_formats.index(format_limit):]
2914                         else:
2915                                 format_list = self._available_formats
2916                         existing_formats = [x for x in format_list if x in url_map]
2917                         if len(existing_formats) == 0:
2918                                 self._downloader.trouble(u'ERROR: no known formats available for video')
2919                                 return
2920                         if req_format is None:
2921                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
2922                         elif req_format == 'worst':
2923                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality
2924                         elif req_format == '-1':
2925                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
2926                         else:
2927                                 # Specific format
2928                                 if req_format not in url_map:
2929                                         self._downloader.trouble(u'ERROR: requested format not available')
2930                                         return
2931                                 video_url_list = [(req_format, url_map[req_format])] # Specific format
2932
2933                 for format_param, video_real_url in video_url_list:
2934
2935                         # At this point we have a new video
2936                         self._downloader.increment_downloads()
2937
2938                         # Extension
2939                         video_extension = self._video_extensions.get(format_param, 'mp4')
2940
2941                         try:
2942                                 # Process video information
2943                                 self._downloader.process_info({
2944                                         'id':           video_id.decode('utf-8'),
2945                                         'url':          video_real_url.decode('utf-8'),
2946                                         'uploader':     video_uploader.decode('utf-8'),
2947                                         'upload_date':  upload_date,
2948                                         'title':        video_title,
2949                                         'stitle':       simple_title,
2950                                         'ext':          video_extension.decode('utf-8'),
2951                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
2952                                         'thumbnail':    video_thumbnail.decode('utf-8'),
2953                                         'description':  video_description.decode('utf-8'),
2954                                         'player_url':   None,
2955                                 })
2956                         except UnavailableVideoError, err:
2957                                 self._downloader.trouble(u'\nERROR: unable to download video')
2958
2959 class BlipTVIE(InfoExtractor):
2960         """Information extractor for blip.tv"""
2961
2962         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$'
2963         _URL_EXT = r'^.*\.([a-z0-9]+)$'
2964         IE_NAME = u'blip.tv'
2965
2966         def report_extraction(self, file_id):
2967                 """Report information extraction."""
2968                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
2969
2970         def report_direct_download(self, title):
2971                 """Report information extraction."""
2972                 self._downloader.to_screen(u'[%s] %s: Direct download detected' % (self.IE_NAME, title))
2973
2974         def _real_extract(self, url):
2975                 mobj = re.match(self._VALID_URL, url)
2976                 if mobj is None:
2977                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
2978                         return
2979
2980                 if '?' in url:
2981                         cchar = '&'
2982                 else:
2983                         cchar = '?'
2984                 json_url = url + cchar + 'skin=json&version=2&no_wrap=1'
2985                 request = urllib2.Request(json_url)
2986                 self.report_extraction(mobj.group(1))
2987                 info = None
2988                 try:
2989                         urlh = urllib2.urlopen(request)
2990                         if urlh.headers.get('Content-Type', '').startswith('video/'): # Direct download
2991                                 basename = url.split('/')[-1]
2992                                 title,ext = os.path.splitext(basename)
2993                                 title = title.decode('UTF-8')
2994                                 ext = ext.replace('.', '')
2995                                 self.report_direct_download(title)
2996                                 info = {
2997                                         'id': title,
2998                                         'url': url,
2999                                         'title': title,
3000                                         'stitle': _simplify_title(title),
3001                                         'ext': ext,
3002                                         'urlhandle': urlh
3003                                 }
3004                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3005                         self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
3006                         return
3007                 if info is None: # Regular URL
3008                         try:
3009                                 json_code = urlh.read()
3010                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3011                                 self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err))
3012                                 return
3013
3014                         try:
3015                                 json_data = json.loads(json_code)
3016                                 if 'Post' in json_data:
3017                                         data = json_data['Post']
3018                                 else:
3019                                         data = json_data
3020
3021                                 upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d')
3022                                 video_url = data['media']['url']
3023                                 umobj = re.match(self._URL_EXT, video_url)
3024                                 if umobj is None:
3025                                         raise ValueError('Can not determine filename extension')
3026                                 ext = umobj.group(1)
3027
3028                                 info = {
3029                                         'id': data['item_id'],
3030                                         'url': video_url,
3031                                         'uploader': data['display_name'],
3032                                         'upload_date': upload_date,
3033                                         'title': data['title'],
3034                                         'stitle': _simplify_title(data['title']),
3035                                         'ext': ext,
3036                                         'format': data['media']['mimeType'],
3037                                         'thumbnail': data['thumbnailUrl'],
3038                                         'description': data['description'],
3039                                         'player_url': data['embedUrl']
3040                                 }
3041                         except (ValueError,KeyError), err:
3042                                 self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))
3043                                 return
3044
3045                 self._downloader.increment_downloads()
3046
3047                 try:
3048                         self._downloader.process_info(info)
3049                 except UnavailableVideoError, err:
3050                         self._downloader.trouble(u'\nERROR: unable to download video')
3051
3052
3053 class MyVideoIE(InfoExtractor):
3054         """Information Extractor for myvideo.de."""
3055
3056         _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
3057         IE_NAME = u'myvideo'
3058
3059         def __init__(self, downloader=None):
3060                 InfoExtractor.__init__(self, downloader)
3061
3062         def report_download_webpage(self, video_id):
3063                 """Report webpage download."""
3064                 self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id)
3065
3066         def report_extraction(self, video_id):
3067                 """Report information extraction."""
3068                 self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id)
3069
3070         def _real_extract(self,url):
3071                 mobj = re.match(self._VALID_URL, url)
3072                 if mobj is None:
3073                         self._download.trouble(u'ERROR: invalid URL: %s' % url)
3074                         return
3075
3076                 video_id = mobj.group(1)
3077
3078                 # Get video webpage
3079                 request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id)
3080                 try:
3081                         self.report_download_webpage(video_id)
3082                         webpage = urllib2.urlopen(request).read()
3083                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3084                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
3085                         return
3086
3087                 self.report_extraction(video_id)
3088                 mobj = re.search(r'<link rel=\'image_src\' href=\'(http://is[0-9].myvideo\.de/de/movie[0-9]+/[a-f0-9]+)/thumbs/[^.]+\.jpg\' />',
3089                                  webpage)
3090                 if mobj is None:
3091                         self._downloader.trouble(u'ERROR: unable to extract media URL')
3092                         return
3093                 video_url = mobj.group(1) + ('/%s.flv' % video_id)
3094
3095                 mobj = re.search('<title>([^<]+)</title>', webpage)
3096                 if mobj is None:
3097                         self._downloader.trouble(u'ERROR: unable to extract title')
3098                         return
3099
3100                 video_title = mobj.group(1)
3101                 video_title = sanitize_title(video_title)
3102
3103                 simple_title = _simplify_title(video_title)
3104
3105                 try:
3106                         self._downloader.process_info({
3107                                 'id':           video_id,
3108                                 'url':          video_url,
3109                                 'uploader':     u'NA',
3110                                 'upload_date':  u'NA',
3111                                 'title':        video_title,
3112                                 'stitle':       simple_title,
3113                                 'ext':          u'flv',
3114                                 'format':       u'NA',
3115                                 'player_url':   None,
3116                         })
3117                 except UnavailableVideoError:
3118                         self._downloader.trouble(u'\nERROR: Unable to download video')
3119
3120 class ComedyCentralIE(InfoExtractor):
3121         """Information extractor for The Daily Show and Colbert Report """
3122
3123         _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$'
3124         IE_NAME = u'comedycentral'
3125
3126         def report_extraction(self, episode_id):
3127                 self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id)
3128
3129         def report_config_download(self, episode_id):
3130                 self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id)
3131
3132         def report_index_download(self, episode_id):
3133                 self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id)
3134
3135         def report_player_url(self, episode_id):
3136                 self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id)
3137
3138         def _real_extract(self, url):
3139                 mobj = re.match(self._VALID_URL, url)
3140                 if mobj is None:
3141                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3142                         return
3143
3144                 if mobj.group('shortname'):
3145                         if mobj.group('shortname') in ('tds', 'thedailyshow'):
3146                                 url = u'http://www.thedailyshow.com/full-episodes/'
3147                         else:
3148                                 url = u'http://www.colbertnation.com/full-episodes/'
3149                         mobj = re.match(self._VALID_URL, url)
3150                         assert mobj is not None
3151
3152                 dlNewest = not mobj.group('episode')
3153                 if dlNewest:
3154                         epTitle = mobj.group('showname')
3155                 else:
3156                         epTitle = mobj.group('episode')
3157
3158                 req = urllib2.Request(url)
3159                 self.report_extraction(epTitle)
3160                 try:
3161                         htmlHandle = urllib2.urlopen(req)
3162                         html = htmlHandle.read()
3163                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3164                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3165                         return
3166                 if dlNewest:
3167                         url = htmlHandle.geturl()
3168                         mobj = re.match(self._VALID_URL, url)
3169                         if mobj is None:
3170                                 self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url)
3171                                 return
3172                         if mobj.group('episode') == '':
3173                                 self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url)
3174                                 return
3175                         epTitle = mobj.group('episode')
3176
3177                 mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html)
3178                 if len(mMovieParams) == 0:
3179                         self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url)
3180                         return
3181
3182                 playerUrl_raw = mMovieParams[0][0]
3183                 self.report_player_url(epTitle)
3184                 try:
3185                         urlHandle = urllib2.urlopen(playerUrl_raw)
3186                         playerUrl = urlHandle.geturl()
3187                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3188                         self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err))
3189                         return
3190
3191                 uri = mMovieParams[0][1]
3192                 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri})
3193                 self.report_index_download(epTitle)
3194                 try:
3195                         indexXml = urllib2.urlopen(indexUrl).read()
3196                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3197                         self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err))
3198                         return
3199
3200                 idoc = xml.etree.ElementTree.fromstring(indexXml)
3201                 itemEls = idoc.findall('.//item')
3202                 for itemEl in itemEls:
3203                         mediaId = itemEl.findall('./guid')[0].text
3204                         shortMediaId = mediaId.split(':')[-1]
3205                         showId = mediaId.split(':')[-2].replace('.com', '')
3206                         officialTitle = itemEl.findall('./title')[0].text
3207                         officialDate = itemEl.findall('./pubDate')[0].text
3208
3209                         configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' +
3210                                                 urllib.urlencode({'uri': mediaId}))
3211                         configReq = urllib2.Request(configUrl)
3212                         self.report_config_download(epTitle)
3213                         try:
3214                                 configXml = urllib2.urlopen(configReq).read()
3215                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3216                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err))
3217                                 return
3218
3219                         cdoc = xml.etree.ElementTree.fromstring(configXml)
3220                         turls = []
3221                         for rendition in cdoc.findall('.//rendition'):
3222                                 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text)
3223                                 turls.append(finfo)
3224
3225                         if len(turls) == 0:
3226                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found')
3227                                 continue
3228
3229                         # For now, just pick the highest bitrate
3230                         format,video_url = turls[-1]
3231
3232                         self._downloader.increment_downloads()
3233
3234                         effTitle = showId + u'-' + epTitle
3235                         info = {
3236                                 'id': shortMediaId,
3237                                 'url': video_url,
3238                                 'uploader': showId,
3239                                 'upload_date': officialDate,
3240                                 'title': effTitle,
3241                                 'stitle': _simplify_title(effTitle),
3242                                 'ext': 'mp4',
3243                                 'format': format,
3244                                 'thumbnail': None,
3245                                 'description': officialTitle,
3246                                 'player_url': playerUrl
3247                         }
3248
3249                         try:
3250                                 self._downloader.process_info(info)
3251                         except UnavailableVideoError, err:
3252                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId)
3253                                 continue
3254
3255
3256 class EscapistIE(InfoExtractor):
3257         """Information extractor for The Escapist """
3258
3259         _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$'
3260         IE_NAME = u'escapist'
3261
3262         def report_extraction(self, showName):
3263                 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName)
3264
3265         def report_config_download(self, showName):
3266                 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName)
3267
3268         def _real_extract(self, url):
3269                 htmlParser = HTMLParser.HTMLParser()
3270
3271                 mobj = re.match(self._VALID_URL, url)
3272                 if mobj is None:
3273                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3274                         return
3275                 showName = mobj.group('showname')
3276                 videoId = mobj.group('episode')
3277
3278                 self.report_extraction(showName)
3279                 try:
3280                         webPage = urllib2.urlopen(url).read()
3281                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3282                         self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err))
3283                         return
3284
3285                 descMatch = re.search('<meta name="description" content="([^"]*)"', webPage)
3286                 description = htmlParser.unescape(descMatch.group(1))
3287                 imgMatch = re.search('<meta property="og:image" content="([^"]*)"', webPage)
3288                 imgUrl = htmlParser.unescape(imgMatch.group(1))
3289                 playerUrlMatch = re.search('<meta property="og:video" content="([^"]*)"', webPage)
3290                 playerUrl = htmlParser.unescape(playerUrlMatch.group(1))
3291                 configUrlMatch = re.search('config=(.*)$', playerUrl)
3292                 configUrl = urllib2.unquote(configUrlMatch.group(1))
3293
3294                 self.report_config_download(showName)
3295                 try:
3296                         configJSON = urllib2.urlopen(configUrl).read()
3297                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3298                         self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err))
3299                         return
3300
3301                 # Technically, it's JavaScript, not JSON
3302                 configJSON = configJSON.replace("'", '"')
3303
3304                 try:
3305                         config = json.loads(configJSON)
3306                 except (ValueError,), err:
3307                         self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err))
3308                         return
3309
3310                 playlist = config['playlist']
3311                 videoUrl = playlist[1]['url']
3312
3313                 self._downloader.increment_downloads()
3314                 info = {
3315                         'id': videoId,
3316                         'url': videoUrl,
3317                         'uploader': showName,
3318                         'upload_date': None,
3319                         'title': showName,
3320                         'stitle': _simplify_title(showName),
3321                         'ext': 'flv',
3322                         'format': 'flv',
3323                         'thumbnail': imgUrl,
3324                         'description': description,
3325                         'player_url': playerUrl,
3326                 }
3327
3328                 try:
3329                         self._downloader.process_info(info)
3330                 except UnavailableVideoError, err:
3331                         self._downloader.trouble(u'\nERROR: unable to download ' + videoId)
3332
3333
3334 class CollegeHumorIE(InfoExtractor):
3335         """Information extractor for collegehumor.com"""
3336
3337         _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$'
3338         IE_NAME = u'collegehumor'
3339
3340         def report_webpage(self, video_id):
3341                 """Report information extraction."""
3342                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3343
3344         def report_extraction(self, video_id):
3345                 """Report information extraction."""
3346                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3347
3348         def _real_extract(self, url):
3349                 htmlParser = HTMLParser.HTMLParser()
3350
3351                 mobj = re.match(self._VALID_URL, url)
3352                 if mobj is None:
3353                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3354                         return
3355                 video_id = mobj.group('videoid')
3356
3357                 self.report_webpage(video_id)
3358                 request = urllib2.Request(url)
3359                 try:
3360                         webpage = urllib2.urlopen(request).read()
3361                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3362                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3363                         return
3364
3365                 m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage)
3366                 if m is None:
3367                         self._downloader.trouble(u'ERROR: Cannot extract internal video ID')
3368                         return
3369                 internal_video_id = m.group('internalvideoid')
3370
3371                 info = {
3372                         'id': video_id,
3373                         'internal_id': internal_video_id,
3374                 }
3375
3376                 self.report_extraction(video_id)
3377                 xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
3378                 try:
3379                         metaXml = urllib2.urlopen(xmlUrl).read()
3380                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3381                         self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err))
3382                         return
3383
3384                 mdoc = xml.etree.ElementTree.fromstring(metaXml)
3385                 try:
3386                         videoNode = mdoc.findall('./video')[0]
3387                         info['description'] = videoNode.findall('./description')[0].text
3388                         info['title'] = videoNode.findall('./caption')[0].text
3389                         info['stitle'] = _simplify_title(info['title'])
3390                         info['url'] = videoNode.findall('./file')[0].text
3391                         info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
3392                         info['ext'] = info['url'].rpartition('.')[2]
3393                         info['format'] = info['ext']
3394                 except IndexError:
3395                         self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
3396                         return
3397
3398                 self._downloader.increment_downloads()
3399
3400                 try:
3401                         self._downloader.process_info(info)
3402                 except UnavailableVideoError, err:
3403                         self._downloader.trouble(u'\nERROR: unable to download video')
3404
3405
3406 class XVideosIE(InfoExtractor):
3407         """Information extractor for xvideos.com"""
3408
3409         _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)'
3410         IE_NAME = u'xvideos'
3411
3412         def report_webpage(self, video_id):
3413                 """Report information extraction."""
3414                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3415
3416         def report_extraction(self, video_id):
3417                 """Report information extraction."""
3418                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3419
3420         def _real_extract(self, url):
3421                 htmlParser = HTMLParser.HTMLParser()
3422
3423                 mobj = re.match(self._VALID_URL, url)
3424                 if mobj is None:
3425                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3426                         return
3427                 video_id = mobj.group(1).decode('utf-8')
3428
3429                 self.report_webpage(video_id)
3430
3431                 request = urllib2.Request(r'http://www.xvideos.com/video' + video_id)
3432                 try:
3433                         webpage = urllib2.urlopen(request).read()
3434                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3435                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3436                         return
3437
3438                 self.report_extraction(video_id)
3439
3440
3441                 # Extract video URL
3442                 mobj = re.search(r'flv_url=(.+?)&', webpage)
3443                 if mobj is None:
3444                         self._downloader.trouble(u'ERROR: unable to extract video url')
3445                         return
3446                 video_url = urllib2.unquote(mobj.group(1).decode('utf-8'))
3447
3448
3449                 # Extract title
3450                 mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage)
3451                 if mobj is None:
3452                         self._downloader.trouble(u'ERROR: unable to extract video title')
3453                         return
3454                 video_title = mobj.group(1).decode('utf-8')
3455
3456
3457                 # Extract video thumbnail
3458                 mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage)
3459                 if mobj is None:
3460                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
3461                         return
3462                 video_thumbnail = mobj.group(1).decode('utf-8')
3463
3464
3465
3466                 self._downloader.increment_downloads()
3467                 info = {
3468                         'id': video_id,
3469                         'url': video_url,
3470                         'uploader': None,
3471                         'upload_date': None,
3472                         'title': video_title,
3473                         'stitle': _simplify_title(video_title),
3474                         'ext': 'flv',
3475                         'format': 'flv',
3476                         'thumbnail': video_thumbnail,
3477                         'description': None,
3478                         'player_url': None,
3479                 }
3480
3481                 try:
3482                         self._downloader.process_info(info)
3483                 except UnavailableVideoError, err:
3484                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
3485
3486
3487 class SoundcloudIE(InfoExtractor):
3488         """Information extractor for soundcloud.com
3489            To access the media, the uid of the song and a stream token
3490            must be extracted from the page source and the script must make
3491            a request to media.soundcloud.com/crossdomain.xml. Then
3492            the media can be grabbed by requesting from an url composed
3493            of the stream token and uid
3494          """
3495
3496         _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)'
3497         IE_NAME = u'soundcloud'
3498
3499         def __init__(self, downloader=None):
3500                 InfoExtractor.__init__(self, downloader)
3501
3502         def report_webpage(self, video_id):
3503                 """Report information extraction."""
3504                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3505
3506         def report_extraction(self, video_id):
3507                 """Report information extraction."""
3508                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3509
3510         def _real_extract(self, url):
3511                 htmlParser = HTMLParser.HTMLParser()
3512
3513                 mobj = re.match(self._VALID_URL, url)
3514                 if mobj is None:
3515                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3516                         return
3517
3518                 # extract uploader (which is in the url)
3519                 uploader = mobj.group(1).decode('utf-8')
3520                 # extract simple title (uploader + slug of song title)
3521                 slug_title =  mobj.group(2).decode('utf-8')
3522                 simple_title = uploader + '-' + slug_title
3523
3524                 self.report_webpage('%s/%s' % (uploader, slug_title))
3525
3526                 request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title))
3527                 try:
3528                         webpage = urllib2.urlopen(request).read()
3529                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3530                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3531                         return
3532
3533                 self.report_extraction('%s/%s' % (uploader, slug_title))
3534
3535                 # extract uid and stream token that soundcloud hands out for access
3536                 mobj = re.search('"uid":"([\w\d]+?)".*?stream_token=([\w\d]+)', webpage)
3537                 if mobj:
3538                         video_id = mobj.group(1)
3539                         stream_token = mobj.group(2)
3540
3541                 # extract unsimplified title
3542                 mobj = re.search('"title":"(.*?)",', webpage)
3543                 if mobj:
3544                         title = mobj.group(1)
3545
3546                 # construct media url (with uid/token)
3547                 mediaURL = "http://media.soundcloud.com/stream/%s?stream_token=%s"
3548                 mediaURL = mediaURL % (video_id, stream_token)
3549
3550                 # description
3551                 description = u'No description available'
3552                 mobj = re.search('track-description-value"><p>(.*?)</p>', webpage)
3553                 if mobj:
3554                         description = mobj.group(1)
3555
3556                 # upload date
3557                 upload_date = None
3558                 mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage)
3559                 if mobj:
3560                         try:
3561                                 upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d')
3562                         except Exception, e:
3563                                 print str(e)
3564
3565                 # for soundcloud, a request to a cross domain is required for cookies
3566                 request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers)
3567
3568                 try:
3569                         self._downloader.process_info({
3570                                 'id':           video_id.decode('utf-8'),
3571                                 'url':          mediaURL,
3572                                 'uploader':     uploader.decode('utf-8'),
3573                                 'upload_date':  upload_date,
3574                                 'title':        simple_title.decode('utf-8'),
3575                                 'stitle':       simple_title.decode('utf-8'),
3576                                 'ext':          u'mp3',
3577                                 'format':       u'NA',
3578                                 'player_url':   None,
3579                                 'description': description.decode('utf-8')
3580                         })
3581                 except UnavailableVideoError:
3582                         self._downloader.trouble(u'\nERROR: unable to download video')
3583
3584
3585 class InfoQIE(InfoExtractor):
3586         """Information extractor for infoq.com"""
3587
3588         _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$'
3589         IE_NAME = u'infoq'
3590
3591         def report_webpage(self, video_id):
3592                 """Report information extraction."""
3593                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3594
3595         def report_extraction(self, video_id):
3596                 """Report information extraction."""
3597                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3598
3599         def _real_extract(self, url):
3600                 htmlParser = HTMLParser.HTMLParser()
3601
3602                 mobj = re.match(self._VALID_URL, url)
3603                 if mobj is None:
3604                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3605                         return
3606
3607                 self.report_webpage(url)
3608
3609                 request = urllib2.Request(url)
3610                 try:
3611                         webpage = urllib2.urlopen(request).read()
3612                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3613                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3614                         return
3615
3616                 self.report_extraction(url)
3617
3618
3619                 # Extract video URL
3620                 mobj = re.search(r"jsclassref='([^']*)'", webpage)
3621                 if mobj is None:
3622                         self._downloader.trouble(u'ERROR: unable to extract video url')
3623                         return
3624                 video_url = 'rtmpe://video.infoq.com/cfx/st/' + urllib2.unquote(mobj.group(1).decode('base64'))
3625
3626
3627                 # Extract title
3628                 mobj = re.search(r'contentTitle = "(.*?)";', webpage)
3629                 if mobj is None:
3630                         self._downloader.trouble(u'ERROR: unable to extract video title')
3631                         return
3632                 video_title = mobj.group(1).decode('utf-8')
3633
3634                 # Extract description
3635                 video_description = u'No description available.'
3636                 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', webpage)
3637                 if mobj is not None:
3638                         video_description = mobj.group(1).decode('utf-8')
3639
3640                 video_filename = video_url.split('/')[-1]
3641                 video_id, extension = video_filename.split('.')
3642
3643                 self._downloader.increment_downloads()
3644                 info = {
3645                         'id': video_id,
3646                         'url': video_url,
3647                         'uploader': None,
3648                         'upload_date': None,
3649                         'title': video_title,
3650                         'stitle': _simplify_title(video_title),
3651                         'ext': extension,
3652                         'format': extension, # Extension is always(?) mp4, but seems to be flv
3653                         'thumbnail': None,
3654                         'description': video_description,
3655                         'player_url': None,
3656                 }
3657
3658                 try:
3659                         self._downloader.process_info(info)
3660                 except UnavailableVideoError, err:
3661                         self._downloader.trouble(u'\nERROR: unable to download ' + video_url)
3662
3663 class MixcloudIE(InfoExtractor):
3664         """Information extractor for www.mixcloud.com"""
3665         _VALID_URL = r'^(?:https?://)?(?:www\.)?mixcloud\.com/([\w\d-]+)/([\w\d-]+)'
3666         IE_NAME = u'mixcloud'
3667
3668         def __init__(self, downloader=None):
3669                 InfoExtractor.__init__(self, downloader)
3670
3671         def report_download_json(self, file_id):
3672                 """Report JSON download."""
3673                 self._downloader.to_screen(u'[%s] Downloading json' % self.IE_NAME)
3674
3675         def report_extraction(self, file_id):
3676                 """Report information extraction."""
3677                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, file_id))
3678
3679         def get_urls(self, jsonData, fmt, bitrate='best'):
3680                 """Get urls from 'audio_formats' section in json"""
3681                 file_url = None
3682                 try:
3683                         bitrate_list = jsonData[fmt]
3684                         if bitrate is None or bitrate == 'best' or bitrate not in bitrate_list:
3685                                 bitrate = max(bitrate_list) # select highest
3686
3687                         url_list = jsonData[fmt][bitrate]
3688                 except TypeError: # we have no bitrate info.
3689                         url_list = jsonData[fmt]
3690
3691                 return url_list
3692
3693         def check_urls(self, url_list):
3694                 """Returns 1st active url from list"""
3695                 for url in url_list:
3696                         try:
3697                                 urllib2.urlopen(url)
3698                                 return url
3699                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3700                                 url = None
3701
3702                 return None
3703
3704         def _print_formats(self, formats):
3705                 print 'Available formats:'
3706                 for fmt in formats.keys():
3707                         for b in formats[fmt]:
3708                                 try:
3709                                         ext = formats[fmt][b][0]
3710                                         print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])
3711                                 except TypeError: # we have no bitrate info
3712                                         ext = formats[fmt][0]
3713                                         print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])
3714                                         break
3715
3716         def _real_extract(self, url):
3717                 mobj = re.match(self._VALID_URL, url)
3718                 if mobj is None:
3719                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3720                         return
3721                 # extract uploader & filename from url
3722                 uploader = mobj.group(1).decode('utf-8')
3723                 file_id = uploader + "-" + mobj.group(2).decode('utf-8')
3724
3725                 # construct API request
3726                 file_url = 'http://www.mixcloud.com/api/1/cloudcast/' + '/'.join(url.split('/')[-3:-1]) + '.json'
3727                 # retrieve .json file with links to files
3728                 request = urllib2.Request(file_url)
3729                 try:
3730                         self.report_download_json(file_url)
3731                         jsonData = urllib2.urlopen(request).read()
3732                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3733                         self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err))
3734                         return
3735
3736                 # parse JSON
3737                 json_data = json.loads(jsonData)
3738                 player_url = json_data['player_swf_url']
3739                 formats = dict(json_data['audio_formats'])
3740
3741                 req_format = self._downloader.params.get('format', None)
3742                 bitrate = None
3743
3744                 if self._downloader.params.get('listformats', None):
3745                         self._print_formats(formats)
3746                         return
3747
3748                 if req_format is None or req_format == 'best':
3749                         for format_param in formats.keys():
3750                                 url_list = self.get_urls(formats, format_param)
3751                                 # check urls
3752                                 file_url = self.check_urls(url_list)
3753                                 if file_url is not None:
3754                                         break # got it!
3755                 else:
3756                         if req_format not in formats.keys():
3757                                 self._downloader.trouble(u'ERROR: format is not available')
3758                                 return
3759
3760                         url_list = self.get_urls(formats, req_format)
3761                         file_url = self.check_urls(url_list)
3762                         format_param = req_format
3763
3764                 # We have audio
3765                 self._downloader.increment_downloads()
3766                 try:
3767                         # Process file information
3768                         self._downloader.process_info({
3769                                 'id': file_id.decode('utf-8'),
3770                                 'url': file_url.decode('utf-8'),
3771                                 'uploader':     uploader.decode('utf-8'),
3772                                 'upload_date': u'NA',
3773                                 'title': json_data['name'],
3774                                 'stitle': _simplify_title(json_data['name']),
3775                                 'ext': file_url.split('.')[-1].decode('utf-8'),
3776                                 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
3777                                 'thumbnail': json_data['thumbnail_url'],
3778                                 'description': json_data['description'],
3779                                 'player_url': player_url.decode('utf-8'),
3780                         })
3781                 except UnavailableVideoError, err:
3782                         self._downloader.trouble(u'ERROR: unable to download file')
3783
3784 class StanfordOpenClassroomIE(InfoExtractor):
3785         """Information extractor for Stanford's Open ClassRoom"""
3786
3787         _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
3788         IE_NAME = u'stanfordoc'
3789
3790         def report_download_webpage(self, objid):
3791                 """Report information extraction."""
3792                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, objid))
3793
3794         def report_extraction(self, video_id):
3795                 """Report information extraction."""
3796                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3797
3798         def _real_extract(self, url):
3799                 mobj = re.match(self._VALID_URL, url)
3800                 if mobj is None:
3801                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3802                         return
3803
3804                 if mobj.group('course') and mobj.group('video'): # A specific video
3805                         course = mobj.group('course')
3806                         video = mobj.group('video')
3807                         info = {
3808                                 'id': _simplify_title(course + '_' + video),
3809                         }
3810
3811                         self.report_extraction(info['id'])
3812                         baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/'
3813                         xmlUrl = baseUrl + video + '.xml'
3814                         try:
3815                                 metaXml = urllib2.urlopen(xmlUrl).read()
3816                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3817                                 self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err))
3818                                 return
3819                         mdoc = xml.etree.ElementTree.fromstring(metaXml)
3820                         try:
3821                                 info['title'] = mdoc.findall('./title')[0].text
3822                                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text
3823                         except IndexError:
3824                                 self._downloader.trouble(u'\nERROR: Invalid metadata XML file')
3825                                 return
3826                         info['stitle'] = _simplify_title(info['title'])
3827                         info['ext'] = info['url'].rpartition('.')[2]
3828                         info['format'] = info['ext']
3829                         self._downloader.increment_downloads()
3830                         try:
3831                                 self._downloader.process_info(info)
3832                         except UnavailableVideoError, err:
3833                                 self._downloader.trouble(u'\nERROR: unable to download video')
3834                 elif mobj.group('course'): # A course page
3835                         unescapeHTML = HTMLParser.HTMLParser().unescape
3836
3837                         course = mobj.group('course')
3838                         info = {
3839                                 'id': _simplify_title(course),
3840                                 'type': 'playlist',
3841                         }
3842
3843                         self.report_download_webpage(info['id'])
3844                         try:
3845                                 coursepage = urllib2.urlopen(url).read()
3846                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3847                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
3848                                 return
3849
3850                         m = re.search('<h1>([^<]+)</h1>', coursepage)
3851                         if m:
3852                                 info['title'] = unescapeHTML(m.group(1))
3853                         else:
3854                                 info['title'] = info['id']
3855                         info['stitle'] = _simplify_title(info['title'])
3856
3857                         m = re.search('<description>([^<]+)</description>', coursepage)
3858                         if m:
3859                                 info['description'] = unescapeHTML(m.group(1))
3860
3861                         links = _orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage))
3862                         info['list'] = [
3863                                 {
3864                                         'type': 'reference',
3865                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage),
3866                                 }
3867                                         for vpage in links]
3868
3869                         for entry in info['list']:
3870                                 assert entry['type'] == 'reference'
3871                                 self.extract(entry['url'])
3872                 else: # Root page
3873                         unescapeHTML = HTMLParser.HTMLParser().unescape
3874
3875                         info = {
3876                                 'id': 'Stanford OpenClassroom',
3877                                 'type': 'playlist',
3878                         }
3879
3880                         self.report_download_webpage(info['id'])
3881                         rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
3882                         try:
3883                                 rootpage = urllib2.urlopen(rootURL).read()
3884                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3885                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err))
3886                                 return
3887
3888                         info['title'] = info['id']
3889                         info['stitle'] = _simplify_title(info['title'])
3890
3891                         links = _orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage))
3892                         info['list'] = [
3893                                 {
3894                                         'type': 'reference',
3895                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage),
3896                                 }
3897                                         for cpage in links]
3898
3899                         for entry in info['list']:
3900                                 assert entry['type'] == 'reference'
3901                                 self.extract(entry['url'])
3902
3903 class MTVIE(InfoExtractor):
3904         """Information extractor for MTV.com"""
3905
3906         _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$'
3907         IE_NAME = u'mtv'
3908
3909         def report_webpage(self, video_id):
3910                 """Report information extraction."""
3911                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id))
3912
3913         def report_extraction(self, video_id):
3914                 """Report information extraction."""
3915                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id))
3916
3917         def _real_extract(self, url):
3918                 mobj = re.match(self._VALID_URL, url)
3919                 if mobj is None:
3920                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
3921                         return
3922                 if not mobj.group('proto'):
3923                         url = 'http://' + url
3924                 video_id = mobj.group('videoid')
3925                 self.report_webpage(video_id)
3926
3927                 request = urllib2.Request(url)
3928                 try:
3929                         webpage = urllib2.urlopen(request).read()
3930                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3931                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
3932                         return
3933
3934                 mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage)
3935                 if mobj is None:
3936                         self._downloader.trouble(u'ERROR: unable to extract song name')
3937                         return
3938                 song_name = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
3939                 mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage)
3940                 if mobj is None:
3941                         self._downloader.trouble(u'ERROR: unable to extract performer')
3942                         return
3943                 performer = _unescapeHTML(mobj.group(1).decode('iso-8859-1'))
3944                 video_title = performer + ' - ' + song_name
3945
3946                 mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage)
3947                 if mobj is None:
3948                         self._downloader.trouble(u'ERROR: unable to mtvn_uri')
3949                         return
3950                 mtvn_uri = mobj.group(1)
3951
3952                 mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage)
3953                 if mobj is None:
3954                         self._downloader.trouble(u'ERROR: unable to extract content id')
3955                         return
3956                 content_id = mobj.group(1)
3957
3958                 videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri
3959                 self.report_extraction(video_id)
3960                 request = urllib2.Request(videogen_url)
3961                 try:
3962                         metadataXml = urllib2.urlopen(request).read()
3963                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
3964                         self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err))
3965                         return
3966
3967                 mdoc = xml.etree.ElementTree.fromstring(metadataXml)
3968                 renditions = mdoc.findall('.//rendition')
3969
3970                 # For now, always pick the highest quality.
3971                 rendition = renditions[-1]
3972
3973                 try:
3974                         _,_,ext = rendition.attrib['type'].partition('/')
3975                         format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate']
3976                         video_url = rendition.find('./src').text
3977                 except KeyError:
3978                         self._downloader.trouble('Invalid rendition field.')
3979                         return
3980
3981                 self._downloader.increment_downloads()
3982                 info = {
3983                         'id': video_id,
3984                         'url': video_url,
3985                         'uploader': performer,
3986                         'title': video_title,
3987                         'stitle': _simplify_title(video_title),
3988                         'ext': ext,
3989                         'format': format,
3990                 }
3991
3992                 try:
3993                         self._downloader.process_info(info)
3994                 except UnavailableVideoError, err:
3995                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id)
3996
3997
3998 class PostProcessor(object):
3999         """Post Processor class.
4000
4001         PostProcessor objects can be added to downloaders with their
4002         add_post_processor() method. When the downloader has finished a
4003         successful download, it will take its internal chain of PostProcessors
4004         and start calling the run() method on each one of them, first with
4005         an initial argument and then with the returned value of the previous
4006         PostProcessor.
4007
4008         The chain will be stopped if one of them ever returns None or the end
4009         of the chain is reached.
4010
4011         PostProcessor objects follow a "mutual registration" process similar
4012         to InfoExtractor objects.
4013         """
4014
4015         _downloader = None
4016
4017         def __init__(self, downloader=None):
4018                 self._downloader = downloader
4019
4020         def set_downloader(self, downloader):
4021                 """Sets the downloader for this PP."""
4022                 self._downloader = downloader
4023
4024         def run(self, information):
4025                 """Run the PostProcessor.
4026
4027                 The "information" argument is a dictionary like the ones
4028                 composed by InfoExtractors. The only difference is that this
4029                 one has an extra field called "filepath" that points to the
4030                 downloaded file.
4031
4032                 When this method returns None, the postprocessing chain is
4033                 stopped. However, this method may return an information
4034                 dictionary that will be passed to the next postprocessing
4035                 object in the chain. It can be the one it received after
4036                 changing some fields.
4037
4038                 In addition, this method may raise a PostProcessingError
4039                 exception that will be taken into account by the downloader
4040                 it was called from.
4041                 """
4042                 return information # by default, do nothing
4043
4044 class AudioConversionError(BaseException):
4045         def __init__(self, message):
4046                 self.message = message
4047
4048 class FFmpegExtractAudioPP(PostProcessor):
4049
4050         def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False):
4051                 PostProcessor.__init__(self, downloader)
4052                 if preferredcodec is None:
4053                         preferredcodec = 'best'
4054                 self._preferredcodec = preferredcodec
4055                 self._preferredquality = preferredquality
4056                 self._keepvideo = keepvideo
4057
4058         @staticmethod
4059         def get_audio_codec(path):
4060                 try:
4061                         cmd = ['ffprobe', '-show_streams', '--', _encodeFilename(path)]
4062                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
4063                         output = handle.communicate()[0]
4064                         if handle.wait() != 0:
4065                                 return None
4066                 except (IOError, OSError):
4067                         return None
4068                 audio_codec = None
4069                 for line in output.split('\n'):
4070                         if line.startswith('codec_name='):
4071                                 audio_codec = line.split('=')[1].strip()
4072                         elif line.strip() == 'codec_type=audio' and audio_codec is not None:
4073                                 return audio_codec
4074                 return None
4075
4076         @staticmethod
4077         def run_ffmpeg(path, out_path, codec, more_opts):
4078                 if codec is None:
4079                         acodec_opts = []
4080                 else:
4081                         acodec_opts = ['-acodec', codec]
4082                 cmd = ['ffmpeg', '-y', '-i', _encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', _encodeFilename(out_path)]
4083                 try:
4084                         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
4085                         stdout,stderr = p.communicate()
4086                 except (IOError, OSError):
4087                         e = sys.exc_info()[1]
4088                         if isinstance(e, OSError) and e.errno == 2:
4089                                 raise AudioConversionError('ffmpeg not found. Please install ffmpeg.')
4090                         else:
4091                                 raise e
4092                 if p.returncode != 0:
4093                         msg = stderr.strip().split('\n')[-1]
4094                         raise AudioConversionError(msg)
4095
4096         def run(self, information):
4097                 path = information['filepath']
4098
4099                 filecodec = self.get_audio_codec(path)
4100                 if filecodec is None:
4101                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
4102                         return None
4103
4104                 more_opts = []
4105                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
4106                         if self._preferredcodec == 'm4a' and filecodec == 'aac':
4107                                 # Lossless, but in another container
4108                                 acodec = 'copy'
4109                                 extension = self._preferredcodec
4110                                 more_opts = ['-absf', 'aac_adtstoasc']
4111                         elif filecodec in ['aac', 'mp3', 'vorbis']:
4112                                 # Lossless if possible
4113                                 acodec = 'copy'
4114                                 extension = filecodec
4115                                 if filecodec == 'aac':
4116                                         more_opts = ['-f', 'adts']
4117                                 if filecodec == 'vorbis':
4118                                         extension = 'ogg'
4119                         else:
4120                                 # MP3 otherwise.
4121                                 acodec = 'libmp3lame'
4122                                 extension = 'mp3'
4123                                 more_opts = []
4124                                 if self._preferredquality is not None:
4125                                         more_opts += ['-ab', self._preferredquality]
4126                 else:
4127                         # We convert the audio (lossy)
4128                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
4129                         extension = self._preferredcodec
4130                         more_opts = []
4131                         if self._preferredquality is not None:
4132                                 more_opts += ['-ab', self._preferredquality]
4133                         if self._preferredcodec == 'aac':
4134                                 more_opts += ['-f', 'adts']
4135                         if self._preferredcodec == 'm4a':
4136                                 more_opts += ['-absf', 'aac_adtstoasc']
4137                         if self._preferredcodec == 'vorbis':
4138                                 extension = 'ogg'
4139                         if self._preferredcodec == 'wav':
4140                                 extension = 'wav'
4141                                 more_opts += ['-f', 'wav']
4142
4143                 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
4144                 new_path = prefix + sep + extension
4145                 self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path)
4146                 try:
4147                         self.run_ffmpeg(path, new_path, acodec, more_opts)
4148                 except:
4149                         etype,e,tb = sys.exc_info()
4150                         if isinstance(e, AudioConversionError):
4151                                 self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message)
4152                         else:
4153                                 self._downloader.to_stderr(u'ERROR: error running ffmpeg')
4154                         return None
4155
4156                 # Try to update the date time for extracted audio file.
4157                 if information.get('filetime') is not None:
4158                         try:
4159                                 os.utime(_encodeFilename(new_path), (time.time(), information['filetime']))
4160                         except:
4161                                 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file')
4162
4163                 if not self._keepvideo:
4164                         try:
4165                                 os.remove(_encodeFilename(path))
4166                         except (IOError, OSError):
4167                                 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
4168                                 return None
4169
4170                 information['filepath'] = new_path
4171                 return information
4172
4173
4174 def updateSelf(downloader, filename):
4175         ''' Update the program file with the latest version from the repository '''
4176         # Note: downloader only used for options
4177         if not os.access(filename, os.W_OK):
4178                 sys.exit('ERROR: no write permissions on %s' % filename)
4179
4180         downloader.to_screen(u'Updating to latest version...')
4181
4182         try:
4183                 try:
4184                         urlh = urllib.urlopen(UPDATE_URL)
4185                         newcontent = urlh.read()
4186
4187                         vmatch = re.search("__version__ = '([^']+)'", newcontent)
4188                         if vmatch is not None and vmatch.group(1) == __version__:
4189                                 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
4190                                 return
4191                 finally:
4192                         urlh.close()
4193         except (IOError, OSError), err:
4194                 sys.exit('ERROR: unable to download latest version')
4195
4196         try:
4197                 outf = open(filename, 'wb')
4198                 try:
4199                         outf.write(newcontent)
4200                 finally:
4201                         outf.close()
4202         except (IOError, OSError), err:
4203                 sys.exit('ERROR: unable to overwrite current version')
4204
4205         downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
4206
4207 def parseOpts():
4208         # Deferred imports
4209         import getpass
4210         import optparse
4211         import shlex
4212
4213         def _readOptions(filename_bytes):
4214                 try:
4215                         optionf = open(filename_bytes)
4216                 except IOError:
4217                         return [] # silently skip if file is not present
4218                 try:
4219                         res = []
4220                         for l in optionf:
4221                                 res += shlex.split(l, comments=True)
4222                 finally:
4223                         optionf.close()
4224                 return res
4225
4226         def _format_option_string(option):
4227                 ''' ('-o', '--option') -> -o, --format METAVAR'''
4228
4229                 opts = []
4230
4231                 if option._short_opts: opts.append(option._short_opts[0])
4232                 if option._long_opts: opts.append(option._long_opts[0])
4233                 if len(opts) > 1: opts.insert(1, ', ')
4234
4235                 if option.takes_value(): opts.append(' %s' % option.metavar)
4236
4237                 return "".join(opts)
4238
4239         def _find_term_columns():
4240                 columns = os.environ.get('COLUMNS', None)
4241                 if columns:
4242                         return int(columns)
4243
4244                 try:
4245                         sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
4246                         out,err = sp.communicate()
4247                         return int(out.split()[1])
4248                 except:
4249                         pass
4250                 return None
4251
4252         max_width = 80
4253         max_help_position = 80
4254
4255         # No need to wrap help messages if we're on a wide console
4256         columns = _find_term_columns()
4257         if columns: max_width = columns
4258
4259         fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
4260         fmt.format_option_strings = _format_option_string
4261
4262         kw = {
4263                 'version'   : __version__,
4264                 'formatter' : fmt,
4265                 'usage' : '%prog [options] url [url...]',
4266                 'conflict_handler' : 'resolve',
4267         }
4268
4269         parser = optparse.OptionParser(**kw)
4270
4271         # option groups
4272         general        = optparse.OptionGroup(parser, 'General Options')
4273         selection      = optparse.OptionGroup(parser, 'Video Selection')
4274         authentication = optparse.OptionGroup(parser, 'Authentication Options')
4275         video_format   = optparse.OptionGroup(parser, 'Video Format Options')
4276         postproc       = optparse.OptionGroup(parser, 'Post-processing Options')
4277         filesystem     = optparse.OptionGroup(parser, 'Filesystem Options')
4278         verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
4279
4280         general.add_option('-h', '--help',
4281                         action='help', help='print this help text and exit')
4282         general.add_option('-v', '--version',
4283                         action='version', help='print program version and exit')
4284         general.add_option('-U', '--update',
4285                         action='store_true', dest='update_self', help='update this program to latest version')
4286         general.add_option('-i', '--ignore-errors',
4287                         action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
4288         general.add_option('-r', '--rate-limit',
4289                         dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
4290         general.add_option('-R', '--retries',
4291                         dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
4292         general.add_option('--dump-user-agent',
4293                         action='store_true', dest='dump_user_agent',
4294                         help='display the current browser identification', default=False)
4295         general.add_option('--list-extractors',
4296                         action='store_true', dest='list_extractors',
4297                         help='List all supported extractors and the URLs they would handle', default=False)
4298
4299         selection.add_option('--playlist-start',
4300                         dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
4301         selection.add_option('--playlist-end',
4302                         dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
4303         selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
4304         selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
4305         selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
4306
4307         authentication.add_option('-u', '--username',
4308                         dest='username', metavar='USERNAME', help='account username')
4309         authentication.add_option('-p', '--password',
4310                         dest='password', metavar='PASSWORD', help='account password')
4311         authentication.add_option('-n', '--netrc',
4312                         action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
4313
4314
4315         video_format.add_option('-f', '--format',
4316                         action='store', dest='format', metavar='FORMAT', help='video format code')
4317         video_format.add_option('--all-formats',
4318                         action='store_const', dest='format', help='download all available video formats', const='all')
4319         video_format.add_option('--prefer-free-formats',
4320                         action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
4321         video_format.add_option('--max-quality',
4322                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
4323         video_format.add_option('-F', '--list-formats',
4324                         action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
4325
4326
4327         verbosity.add_option('-q', '--quiet',
4328                         action='store_true', dest='quiet', help='activates quiet mode', default=False)
4329         verbosity.add_option('-s', '--simulate',
4330                         action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
4331         verbosity.add_option('--skip-download',
4332                         action='store_true', dest='skip_download', help='do not download the video', default=False)
4333         verbosity.add_option('-g', '--get-url',
4334                         action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
4335         verbosity.add_option('-e', '--get-title',
4336                         action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
4337         verbosity.add_option('--get-thumbnail',
4338                         action='store_true', dest='getthumbnail',
4339                         help='simulate, quiet but print thumbnail URL', default=False)
4340         verbosity.add_option('--get-description',
4341                         action='store_true', dest='getdescription',
4342                         help='simulate, quiet but print video description', default=False)
4343         verbosity.add_option('--get-filename',
4344                         action='store_true', dest='getfilename',
4345                         help='simulate, quiet but print output filename', default=False)
4346         verbosity.add_option('--get-format',
4347                         action='store_true', dest='getformat',
4348                         help='simulate, quiet but print output format', default=False)
4349         verbosity.add_option('--no-progress',
4350                         action='store_true', dest='noprogress', help='do not print progress bar', default=False)
4351         verbosity.add_option('--console-title',
4352                         action='store_true', dest='consoletitle',
4353                         help='display progress in console titlebar', default=False)
4354         verbosity.add_option('-v', '--verbose',
4355                         action='store_true', dest='verbose', help='print various debugging information', default=False)
4356
4357
4358         filesystem.add_option('-t', '--title',
4359                         action='store_true', dest='usetitle', help='use title in file name', default=False)
4360         filesystem.add_option('-l', '--literal',
4361                         action='store_true', dest='useliteral', help='use literal title in file name', default=False)
4362         filesystem.add_option('-A', '--auto-number',
4363                         action='store_true', dest='autonumber',
4364                         help='number downloaded files starting from 00000', default=False)
4365         filesystem.add_option('-o', '--output',
4366                         dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
4367         filesystem.add_option('-a', '--batch-file',
4368                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
4369         filesystem.add_option('-w', '--no-overwrites',
4370                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
4371         filesystem.add_option('-c', '--continue',
4372                         action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
4373         filesystem.add_option('--no-continue',
4374                         action='store_false', dest='continue_dl',
4375                         help='do not resume partially downloaded files (restart from beginning)')
4376         filesystem.add_option('--cookies',
4377                         dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
4378         filesystem.add_option('--no-part',
4379                         action='store_true', dest='nopart', help='do not use .part files', default=False)
4380         filesystem.add_option('--no-mtime',
4381                         action='store_false', dest='updatetime',
4382                         help='do not use the Last-modified header to set the file modification time', default=True)
4383         filesystem.add_option('--write-description',
4384                         action='store_true', dest='writedescription',
4385                         help='write video description to a .description file', default=False)
4386         filesystem.add_option('--write-info-json',
4387                         action='store_true', dest='writeinfojson',
4388                         help='write video metadata to a .info.json file', default=False)
4389
4390
4391         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
4392                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
4393         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
4394                         help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
4395         postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
4396                         help='ffmpeg audio bitrate specification, 128k by default')
4397         postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
4398                         help='keeps the video file on disk after the post-processing; the video is erased by default')
4399
4400
4401         parser.add_option_group(general)
4402         parser.add_option_group(selection)
4403         parser.add_option_group(filesystem)
4404         parser.add_option_group(verbosity)
4405         parser.add_option_group(video_format)
4406         parser.add_option_group(authentication)
4407         parser.add_option_group(postproc)
4408
4409         xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
4410         if xdg_config_home:
4411                 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
4412         else:
4413                 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
4414         argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
4415         opts, args = parser.parse_args(argv)
4416
4417         return parser, opts, args
4418
4419 def gen_extractors():
4420         """ Return a list of an instance of every supported extractor.
4421         The order does matter; the first extractor matched is the one handling the URL.
4422         """
4423         youtube_ie = YoutubeIE()
4424         google_ie = GoogleIE()
4425         yahoo_ie = YahooIE()
4426         return [
4427                 YoutubePlaylistIE(youtube_ie),
4428                 YoutubeUserIE(youtube_ie),
4429                 YoutubeSearchIE(youtube_ie),
4430                 youtube_ie,
4431                 MetacafeIE(youtube_ie),
4432                 DailymotionIE(),
4433                 google_ie,
4434                 GoogleSearchIE(google_ie),
4435                 PhotobucketIE(),
4436                 yahoo_ie,
4437                 YahooSearchIE(yahoo_ie),
4438                 DepositFilesIE(),
4439                 FacebookIE(),
4440                 BlipTVIE(),
4441                 VimeoIE(),
4442                 MyVideoIE(),
4443                 ComedyCentralIE(),
4444                 EscapistIE(),
4445                 CollegeHumorIE(),
4446                 XVideosIE(),
4447                 SoundcloudIE(),
4448                 InfoQIE(),
4449                 MixcloudIE(),
4450                 StanfordOpenClassroomIE(),
4451                 MTVIE(),
4452
4453                 GenericIE()
4454         ]
4455
4456 def _real_main():
4457         parser, opts, args = parseOpts()
4458
4459         # Open appropriate CookieJar
4460         if opts.cookiefile is None:
4461                 jar = cookielib.CookieJar()
4462         else:
4463                 try:
4464                         jar = cookielib.MozillaCookieJar(opts.cookiefile)
4465                         if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
4466                                 jar.load()
4467                 except (IOError, OSError), err:
4468                         sys.exit(u'ERROR: unable to open cookie file')
4469
4470         # Dump user agent
4471         if opts.dump_user_agent:
4472                 print std_headers['User-Agent']
4473                 sys.exit(0)
4474
4475         # Batch file verification
4476         batchurls = []
4477         if opts.batchfile is not None:
4478                 try:
4479                         if opts.batchfile == '-':
4480                                 batchfd = sys.stdin
4481                         else:
4482                                 batchfd = open(opts.batchfile, 'r')
4483                         batchurls = batchfd.readlines()
4484                         batchurls = [x.strip() for x in batchurls]
4485                         batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
4486                 except IOError:
4487                         sys.exit(u'ERROR: batch file could not be read')
4488         all_urls = batchurls + args
4489
4490         # General configuration
4491         cookie_processor = urllib2.HTTPCookieProcessor(jar)
4492         proxy_handler = urllib2.ProxyHandler()
4493         opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
4494         urllib2.install_opener(opener)
4495         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
4496
4497         if opts.verbose:
4498                 print(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
4499
4500         extractors = gen_extractors()
4501
4502         if opts.list_extractors:
4503                 for ie in extractors:
4504                         print(ie.IE_NAME)
4505                         matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
4506                         all_urls = filter(lambda url: url not in matchedUrls, all_urls)
4507                         for mu in matchedUrls:
4508                                 print(u'  ' + mu)
4509                 sys.exit(0)
4510
4511         # Conflicting, missing and erroneous options
4512         if opts.usenetrc and (opts.username is not None or opts.password is not None):
4513                 parser.error(u'using .netrc conflicts with giving username/password')
4514         if opts.password is not None and opts.username is None:
4515                 parser.error(u'account username missing')
4516         if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
4517                 parser.error(u'using output template conflicts with using title, literal title or auto number')
4518         if opts.usetitle and opts.useliteral:
4519                 parser.error(u'using title conflicts with using literal title')
4520         if opts.username is not None and opts.password is None:
4521                 opts.password = getpass.getpass(u'Type account password and press return:')
4522         if opts.ratelimit is not None:
4523                 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
4524                 if numeric_limit is None:
4525                         parser.error(u'invalid rate limit specified')
4526                 opts.ratelimit = numeric_limit
4527         if opts.retries is not None:
4528                 try:
4529                         opts.retries = long(opts.retries)
4530                 except (TypeError, ValueError), err:
4531                         parser.error(u'invalid retry count specified')
4532         try:
4533                 opts.playliststart = int(opts.playliststart)
4534                 if opts.playliststart <= 0:
4535                         raise ValueError(u'Playlist start must be positive')
4536         except (TypeError, ValueError), err:
4537                 parser.error(u'invalid playlist start number specified')
4538         try:
4539                 opts.playlistend = int(opts.playlistend)
4540                 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
4541                         raise ValueError(u'Playlist end must be greater than playlist start')
4542         except (TypeError, ValueError), err:
4543                 parser.error(u'invalid playlist end number specified')
4544         if opts.extractaudio:
4545                 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
4546                         parser.error(u'invalid audio format specified')
4547
4548         # File downloader
4549         fd = FileDownloader({
4550                 'usenetrc': opts.usenetrc,
4551                 'username': opts.username,
4552                 'password': opts.password,
4553                 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
4554                 'forceurl': opts.geturl,
4555                 'forcetitle': opts.gettitle,
4556                 'forcethumbnail': opts.getthumbnail,
4557                 'forcedescription': opts.getdescription,
4558                 'forcefilename': opts.getfilename,
4559                 'forceformat': opts.getformat,
4560                 'simulate': opts.simulate,
4561                 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
4562                 'format': opts.format,
4563                 'format_limit': opts.format_limit,
4564                 'listformats': opts.listformats,
4565                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
4566                         or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
4567                         or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
4568                         or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
4569                         or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
4570                         or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
4571                         or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
4572                         or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
4573                         or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
4574                         or u'%(id)s.%(ext)s'),
4575                 'ignoreerrors': opts.ignoreerrors,
4576                 'ratelimit': opts.ratelimit,
4577                 'nooverwrites': opts.nooverwrites,
4578                 'retries': opts.retries,
4579                 'continuedl': opts.continue_dl,
4580                 'noprogress': opts.noprogress,
4581                 'playliststart': opts.playliststart,
4582                 'playlistend': opts.playlistend,
4583                 'logtostderr': opts.outtmpl == '-',
4584                 'consoletitle': opts.consoletitle,
4585                 'nopart': opts.nopart,
4586                 'updatetime': opts.updatetime,
4587                 'writedescription': opts.writedescription,
4588                 'writeinfojson': opts.writeinfojson,
4589                 'matchtitle': opts.matchtitle,
4590                 'rejecttitle': opts.rejecttitle,
4591                 'max_downloads': opts.max_downloads,
4592                 'prefer_free_formats': opts.prefer_free_formats,
4593                 })
4594         for extractor in extractors:
4595                 fd.add_info_extractor(extractor)
4596
4597         # PostProcessors
4598         if opts.extractaudio:
4599                 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
4600
4601         # Update version
4602         if opts.update_self:
4603                 updateSelf(fd, sys.argv[0])
4604
4605         # Maybe do nothing
4606         if len(all_urls) < 1:
4607                 if not opts.update_self:
4608                         parser.error(u'you must provide at least one URL')
4609                 else:
4610                         sys.exit()
4611
4612         try:
4613                 retcode = fd.download(all_urls)
4614         except MaxDownloadsReached:
4615                 fd.to_screen(u'--max-download limit reached, aborting.')
4616                 retcode = 101
4617
4618         # Dump cookie jar if requested
4619         if opts.cookiefile is not None:
4620                 try:
4621                         jar.save()
4622                 except (IOError, OSError), err:
4623                         sys.exit(u'ERROR: unable to save cookie jar')
4624
4625         sys.exit(retcode)
4626
4627 def main():
4628         try:
4629                 _real_main()
4630         except DownloadError:
4631                 sys.exit(1)
4632         except SameFileError:
4633                 sys.exit(u'ERROR: fixed output name but more than one file to download')
4634         except KeyboardInterrupt:
4635                 sys.exit(u'\nERROR: Interrupted by user')
4636
4637 if __name__ == '__main__':
4638         main()
4639
4640 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: