Bump version number
[youtube-dl] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
7 import htmlentitydefs
8 import httplib
9 import locale
10 import math
11 import netrc
12 import os
13 import os.path
14 import re
15 import socket
16 import string
17 import subprocess
18 import sys
19 import time
20 import urllib
21 import urllib2
22
23 # parse_qs was moved from the cgi module to the urlparse module recently.
24 try:
25         from urlparse import parse_qs
26 except ImportError:
27         from cgi import parse_qs
28
29 std_headers = {
30         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
31         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
33         'Accept-Language': 'en-us,en;q=0.5',
34 }
35
36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
37
38 def preferredencoding():
39         """Get preferred encoding.
40
41         Returns the best encoding scheme for the system, based on
42         locale.getpreferredencoding() and some further tweaks.
43         """
44         def yield_preferredencoding():
45                 try:
46                         pref = locale.getpreferredencoding()
47                         u'TEST'.encode(pref)
48                 except:
49                         pref = 'UTF-8'
50                 while True:
51                         yield pref
52         return yield_preferredencoding().next()
53
54 class DownloadError(Exception):
55         """Download Error exception.
56         
57         This exception may be thrown by FileDownloader objects if they are not
58         configured to continue on errors. They will contain the appropriate
59         error message.
60         """
61         pass
62
63 class SameFileError(Exception):
64         """Same File exception.
65
66         This exception will be thrown by FileDownloader objects if they detect
67         multiple files would have to be downloaded to the same file on disk.
68         """
69         pass
70
71 class PostProcessingError(Exception):
72         """Post Processing exception.
73
74         This exception may be raised by PostProcessor's .run() method to
75         indicate an error in the postprocessing task.
76         """
77         pass
78
79 class UnavailableFormatError(Exception):
80         """Unavailable Format exception.
81
82         This exception will be thrown when a video is requested
83         in a format that is not available for that video.
84         """
85         pass
86
87 class ContentTooShortError(Exception):
88         """Content Too Short exception.
89
90         This exception may be raised by FileDownloader objects when a file they
91         download is too small for what the server announced first, indicating
92         the connection was probably interrupted.
93         """
94         # Both in bytes
95         downloaded = None
96         expected = None
97
98         def __init__(self, downloaded, expected):
99                 self.downloaded = downloaded
100                 self.expected = expected
101
102 class FileDownloader(object):
103         """File Downloader class.
104
105         File downloader objects are the ones responsible of downloading the
106         actual video file and writing it to disk if the user has requested
107         it, among some other tasks. In most cases there should be one per
108         program. As, given a video URL, the downloader doesn't know how to
109         extract all the needed information, task that InfoExtractors do, it
110         has to pass the URL to one of them.
111
112         For this, file downloader objects have a method that allows
113         InfoExtractors to be registered in a given order. When it is passed
114         a URL, the file downloader handles it to the first InfoExtractor it
115         finds that reports being able to handle it. The InfoExtractor extracts
116         all the information about the video or videos the URL refers to, and
117         asks the FileDownloader to process the video information, possibly
118         downloading the video.
119
120         File downloaders accept a lot of parameters. In order not to saturate
121         the object constructor with arguments, it receives a dictionary of
122         options instead. These options are available through the params
123         attribute for the InfoExtractors to use. The FileDownloader also
124         registers itself as the downloader in charge for the InfoExtractors
125         that are added to it, so this is a "mutual registration".
126
127         Available options:
128
129         username:       Username for authentication purposes.
130         password:       Password for authentication purposes.
131         usenetrc:       Use netrc for authentication instead.
132         quiet:          Do not print messages to stdout.
133         forceurl:       Force printing final URL.
134         forcetitle:     Force printing title.
135         simulate:       Do not download the video files.
136         format:         Video format code.
137         outtmpl:        Template for output names.
138         ignoreerrors:   Do not stop on download errors.
139         ratelimit:      Download speed limit, in bytes/sec.
140         nooverwrites:   Prevent overwriting files.
141         continuedl:     Try to continue downloads if possible.
142         """
143
144         params = None
145         _ies = []
146         _pps = []
147         _download_retcode = None
148
149         def __init__(self, params):
150                 """Create a FileDownloader object with the given options."""
151                 self._ies = []
152                 self._pps = []
153                 self._download_retcode = 0
154                 self.params = params
155         
156         @staticmethod
157         def pmkdir(filename):
158                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
159                 components = filename.split(os.sep)
160                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
161                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
162                 for dir in aggregate:
163                         if not os.path.exists(dir):
164                                 os.mkdir(dir)
165         
166         @staticmethod
167         def format_bytes(bytes):
168                 if bytes is None:
169                         return 'N/A'
170                 if type(bytes) is str:
171                         bytes = float(bytes)
172                 if bytes == 0.0:
173                         exponent = 0
174                 else:
175                         exponent = long(math.log(bytes, 1024.0))
176                 suffix = 'bkMGTPEZY'[exponent]
177                 converted = float(bytes) / float(1024**exponent)
178                 return '%.2f%s' % (converted, suffix)
179
180         @staticmethod
181         def calc_percent(byte_counter, data_len):
182                 if data_len is None:
183                         return '---.-%'
184                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
185
186         @staticmethod
187         def calc_eta(start, now, total, current):
188                 if total is None:
189                         return '--:--'
190                 dif = now - start
191                 if current == 0 or dif < 0.001: # One millisecond
192                         return '--:--'
193                 rate = float(current) / dif
194                 eta = long((float(total) - float(current)) / rate)
195                 (eta_mins, eta_secs) = divmod(eta, 60)
196                 if eta_mins > 99:
197                         return '--:--'
198                 return '%02d:%02d' % (eta_mins, eta_secs)
199
200         @staticmethod
201         def calc_speed(start, now, bytes):
202                 dif = now - start
203                 if bytes == 0 or dif < 0.001: # One millisecond
204                         return '%10s' % '---b/s'
205                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
206
207         @staticmethod
208         def best_block_size(elapsed_time, bytes):
209                 new_min = max(bytes / 2.0, 1.0)
210                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
211                 if elapsed_time < 0.001:
212                         return long(new_max)
213                 rate = bytes / elapsed_time
214                 if rate > new_max:
215                         return long(new_max)
216                 if rate < new_min:
217                         return long(new_min)
218                 return long(rate)
219
220         @staticmethod
221         def parse_bytes(bytestr):
222                 """Parse a string indicating a byte quantity into a long integer."""
223                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
224                 if matchobj is None:
225                         return None
226                 number = float(matchobj.group(1))
227                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
228                 return long(round(number * multiplier))
229
230         @staticmethod
231         def verify_url(url):
232                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
233                 request = urllib2.Request(url, None, std_headers)
234                 data = urllib2.urlopen(request)
235                 data.read(1)
236                 url = data.geturl()
237                 data.close()
238                 return url
239
240         def add_info_extractor(self, ie):
241                 """Add an InfoExtractor object to the end of the list."""
242                 self._ies.append(ie)
243                 ie.set_downloader(self)
244         
245         def add_post_processor(self, pp):
246                 """Add a PostProcessor object to the end of the chain."""
247                 self._pps.append(pp)
248                 pp.set_downloader(self)
249         
250         def to_stdout(self, message, skip_eol=False):
251                 """Print message to stdout if not in quiet mode."""
252                 if not self.params.get('quiet', False):
253                         print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
254                         sys.stdout.flush()
255         
256         def to_stderr(self, message):
257                 """Print message to stderr."""
258                 print >>sys.stderr, message.encode(preferredencoding())
259         
260         def fixed_template(self):
261                 """Checks if the output template is fixed."""
262                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
263
264         def trouble(self, message=None):
265                 """Determine action to take when a download problem appears.
266
267                 Depending on if the downloader has been configured to ignore
268                 download errors or not, this method may throw an exception or
269                 not when errors are found, after printing the message.
270                 """
271                 if message is not None:
272                         self.to_stderr(message)
273                 if not self.params.get('ignoreerrors', False):
274                         raise DownloadError(message)
275                 self._download_retcode = 1
276
277         def slow_down(self, start_time, byte_counter):
278                 """Sleep if the download speed is over the rate limit."""
279                 rate_limit = self.params.get('ratelimit', None)
280                 if rate_limit is None or byte_counter == 0:
281                         return
282                 now = time.time()
283                 elapsed = now - start_time
284                 if elapsed <= 0.0:
285                         return
286                 speed = float(byte_counter) / elapsed
287                 if speed > rate_limit:
288                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
289
290         def report_destination(self, filename):
291                 """Report destination filename."""
292                 self.to_stdout(u'[download] Destination: %s' % filename)
293         
294         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
295                 """Report download progress."""
296                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
297                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
298
299         def report_resuming_byte(self, resume_len):
300                 """Report attemtp to resume at given byte."""
301                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
302         
303         def report_file_already_downloaded(self, file_name):
304                 """Report file has already been fully downloaded."""
305                 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
306         
307         def report_unable_to_resume(self):
308                 """Report it was impossible to resume download."""
309                 self.to_stdout(u'[download] Unable to resume')
310         
311         def report_finish(self):
312                 """Report download finished."""
313                 self.to_stdout(u'')
314
315         def process_info(self, info_dict):
316                 """Process a single dictionary returned by an InfoExtractor."""
317                 # Do nothing else if in simulate mode
318                 if self.params.get('simulate', False):
319                         # Verify URL if it's an HTTP one
320                         if info_dict['url'].startswith('http'):
321                                 try:
322                                         info_dict['url'] = self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
323                                 except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
324                                         raise UnavailableFormatError
325
326                         # Forced printings
327                         if self.params.get('forcetitle', False):
328                                 print info_dict['title'].encode(preferredencoding())
329                         if self.params.get('forceurl', False):
330                                 print info_dict['url'].encode(preferredencoding())
331
332                         return
333                         
334                 try:
335                         template_dict = dict(info_dict)
336                         template_dict['epoch'] = unicode(long(time.time()))
337                         filename = self.params['outtmpl'] % template_dict
338                 except (ValueError, KeyError), err:
339                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
340                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
341                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
342                         return
343
344                 try:
345                         self.pmkdir(filename)
346                 except (OSError, IOError), err:
347                         self.trouble('ERROR: unable to create directories: %s' % str(err))
348                         return
349
350                 try:
351                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
352                 except (OSError, IOError), err:
353                         raise UnavailableFormatError
354                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
355                         self.trouble('ERROR: unable to download video data: %s' % str(err))
356                         return
357                 except (ContentTooShortError, ), err:
358                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
359                         return
360
361                 if success:
362                         try:
363                                 self.post_process(filename, info_dict)
364                         except (PostProcessingError), err:
365                                 self.trouble('ERROR: postprocessing: %s' % str(err))
366                                 return
367
368         def download(self, url_list):
369                 """Download a given list of URLs."""
370                 if len(url_list) > 1 and self.fixed_template():
371                         raise SameFileError(self.params['outtmpl'])
372
373                 for url in url_list:
374                         suitable_found = False
375                         for ie in self._ies:
376                                 # Go to next InfoExtractor if not suitable
377                                 if not ie.suitable(url):
378                                         continue
379
380                                 # Suitable InfoExtractor found
381                                 suitable_found = True
382
383                                 # Extract information from URL and process it
384                                 ie.extract(url)
385
386                                 # Suitable InfoExtractor had been found; go to next URL
387                                 break
388
389                         if not suitable_found:
390                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
391
392                 return self._download_retcode
393
394         def post_process(self, filename, ie_info):
395                 """Run the postprocessing chain on the given file."""
396                 info = dict(ie_info)
397                 info['filepath'] = filename
398                 for pp in self._pps:
399                         info = pp.run(info)
400                         if info is None:
401                                 break
402         
403         def _download_with_rtmpdump(self, filename, url):
404                 self.report_destination(filename)
405
406                 # Check for rtmpdump first
407                 try:
408                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
409                 except (OSError, IOError):
410                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
411                         return False
412
413                 # Download using rtmpdump. rtmpdump returns exit code 2 when
414                 # the connection was interrumpted and resuming appears to be
415                 # possible. This is part of rtmpdump's normal usage, AFAIK.
416                 basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename]
417                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
418                 while retval == 2 or retval == 1:
419                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
420                         time.sleep(2.0) # This seems to be needed
421                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
422                 if retval == 0:
423                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
424                         return True
425                 else:
426                         self.trouble('ERROR: rtmpdump exited with code %d' % retval)
427                         return False
428
429         def _do_download(self, filename, url):
430                 # Attempt to download using rtmpdump
431                 if url.startswith('rtmp'):
432                         return self._download_with_rtmpdump(filename, url)
433
434                 stream = None
435                 open_mode = 'wb'
436                 basic_request = urllib2.Request(url, None, std_headers)
437                 request = urllib2.Request(url, None, std_headers)
438
439                 # Establish possible resume length
440                 if os.path.isfile(filename):
441                         resume_len = os.path.getsize(filename)
442                 else:
443                         resume_len = 0
444
445                 # Request parameters in case of being able to resume
446                 if self.params.get('continuedl', False) and resume_len != 0:
447                         self.report_resuming_byte(resume_len)
448                         request.add_header('Range','bytes=%d-' % resume_len)
449                         open_mode = 'ab'
450
451                 # Establish connection
452                 try:
453                         data = urllib2.urlopen(request)
454                 except (urllib2.HTTPError, ), err:
455                         if err.code != 416: #  416 is 'Requested range not satisfiable'
456                                 raise
457                         # Unable to resume
458                         data = urllib2.urlopen(basic_request)
459                         content_length = data.info()['Content-Length']
460
461                         if content_length is not None and long(content_length) == resume_len:
462                                 # Because the file had already been fully downloaded
463                                 self.report_file_already_downloaded(filename)
464                                 return True
465                         else:
466                                 # Because the server didn't let us
467                                 self.report_unable_to_resume()
468                                 open_mode = 'wb'
469
470                 data_len = data.info().get('Content-length', None)
471                 data_len_str = self.format_bytes(data_len)
472                 byte_counter = 0
473                 block_size = 1024
474                 start = time.time()
475                 while True:
476                         # Download and write
477                         before = time.time()
478                         data_block = data.read(block_size)
479                         after = time.time()
480                         data_block_len = len(data_block)
481                         if data_block_len == 0:
482                                 break
483                         byte_counter += data_block_len
484
485                         # Open file just in time
486                         if stream is None:
487                                 try:
488                                         stream = open(filename, open_mode)
489                                         self.report_destination(filename)
490                                 except (OSError, IOError), err:
491                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
492                                         return False
493                         stream.write(data_block)
494                         block_size = self.best_block_size(after - before, data_block_len)
495
496                         # Progress message
497                         percent_str = self.calc_percent(byte_counter, data_len)
498                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
499                         speed_str = self.calc_speed(start, time.time(), byte_counter)
500                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
501
502                         # Apply rate limit
503                         self.slow_down(start, byte_counter)
504
505                 self.report_finish()
506                 if data_len is not None and str(byte_counter) != data_len:
507                         raise ContentTooShortError(byte_counter, long(data_len))
508                 return True
509
510 class InfoExtractor(object):
511         """Information Extractor class.
512
513         Information extractors are the classes that, given a URL, extract
514         information from the video (or videos) the URL refers to. This
515         information includes the real video URL, the video title and simplified
516         title, author and others. The information is stored in a dictionary
517         which is then passed to the FileDownloader. The FileDownloader
518         processes this information possibly downloading the video to the file
519         system, among other possible outcomes. The dictionaries must include
520         the following fields:
521
522         id:             Video identifier.
523         url:            Final video URL.
524         uploader:       Nickname of the video uploader.
525         title:          Literal title.
526         stitle:         Simplified title.
527         ext:            Video filename extension.
528
529         Subclasses of this one should re-define the _real_initialize() and
530         _real_extract() methods, as well as the suitable() static method.
531         Probably, they should also be instantiated and added to the main
532         downloader.
533         """
534
535         _ready = False
536         _downloader = None
537
538         def __init__(self, downloader=None):
539                 """Constructor. Receives an optional downloader."""
540                 self._ready = False
541                 self.set_downloader(downloader)
542
543         @staticmethod
544         def suitable(url):
545                 """Receives a URL and returns True if suitable for this IE."""
546                 return False
547
548         def initialize(self):
549                 """Initializes an instance (authentication, etc)."""
550                 if not self._ready:
551                         self._real_initialize()
552                         self._ready = True
553
554         def extract(self, url):
555                 """Extracts URL information and returns it in list of dicts."""
556                 self.initialize()
557                 return self._real_extract(url)
558
559         def set_downloader(self, downloader):
560                 """Sets the downloader for this IE."""
561                 self._downloader = downloader
562         
563         def _real_initialize(self):
564                 """Real initialization process. Redefine in subclasses."""
565                 pass
566
567         def _real_extract(self, url):
568                 """Real extraction process. Redefine in subclasses."""
569                 pass
570
571 class YoutubeIE(InfoExtractor):
572         """Information extractor for youtube.com."""
573
574         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
575         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
576         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
577         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
578         _NETRC_MACHINE = 'youtube'
579         _available_formats = ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
580         _video_extensions = {
581                 '13': '3gp',
582                 '17': 'mp4',
583                 '18': 'mp4',
584                 '22': 'mp4',
585                 '37': 'mp4',
586         }
587
588         @staticmethod
589         def suitable(url):
590                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
591
592         @staticmethod
593         def htmlentity_transform(matchobj):
594                 """Transforms an HTML entity to a Unicode character."""
595                 entity = matchobj.group(1)
596
597                 # Known non-numeric HTML entity
598                 if entity in htmlentitydefs.name2codepoint:
599                         return unichr(htmlentitydefs.name2codepoint[entity])
600
601                 # Unicode character
602                 mobj = re.match(ur'(?u)#(x?\d+)', entity)
603                 if mobj is not None:
604                         numstr = mobj.group(1)
605                         if numstr.startswith(u'x'):
606                                 base = 16
607                                 numstr = u'0%s' % numstr
608                         else:
609                                 base = 10
610                         return unichr(long(numstr, base))
611
612                 # Unknown entity in name, return its literal representation
613                 return (u'&%s;' % entity)
614
615         def report_lang(self):
616                 """Report attempt to set language."""
617                 self._downloader.to_stdout(u'[youtube] Setting language')
618
619         def report_login(self):
620                 """Report attempt to log in."""
621                 self._downloader.to_stdout(u'[youtube] Logging in')
622         
623         def report_age_confirmation(self):
624                 """Report attempt to confirm age."""
625                 self._downloader.to_stdout(u'[youtube] Confirming age')
626         
627         def report_video_info_webpage_download(self, video_id):
628                 """Report attempt to download video info webpage."""
629                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
630         
631         def report_information_extraction(self, video_id):
632                 """Report attempt to extract video information."""
633                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
634         
635         def report_unavailable_format(self, video_id, format):
636                 """Report extracted video URL."""
637                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
638         
639         def report_rtmp_download(self):
640                 """Indicate the download will use the RTMP protocol."""
641                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
642         
643         def _real_initialize(self):
644                 if self._downloader is None:
645                         return
646
647                 username = None
648                 password = None
649                 downloader_params = self._downloader.params
650
651                 # Attempt to use provided username and password or .netrc data
652                 if downloader_params.get('username', None) is not None:
653                         username = downloader_params['username']
654                         password = downloader_params['password']
655                 elif downloader_params.get('usenetrc', False):
656                         try:
657                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
658                                 if info is not None:
659                                         username = info[0]
660                                         password = info[2]
661                                 else:
662                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
663                         except (IOError, netrc.NetrcParseError), err:
664                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
665                                 return
666
667                 # Set language
668                 request = urllib2.Request(self._LANG_URL, None, std_headers)
669                 try:
670                         self.report_lang()
671                         urllib2.urlopen(request).read()
672                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
673                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
674                         return
675
676                 # No authentication to be performed
677                 if username is None:
678                         return
679
680                 # Log in
681                 login_form = {
682                                 'current_form': 'loginForm',
683                                 'next':         '/',
684                                 'action_login': 'Log In',
685                                 'username':     username,
686                                 'password':     password,
687                                 }
688                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
689                 try:
690                         self.report_login()
691                         login_results = urllib2.urlopen(request).read()
692                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
693                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
694                                 return
695                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
696                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
697                         return
698         
699                 # Confirm age
700                 age_form = {
701                                 'next_url':             '/',
702                                 'action_confirm':       'Confirm',
703                                 }
704                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
705                 try:
706                         self.report_age_confirmation()
707                         age_results = urllib2.urlopen(request).read()
708                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
709                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
710                         return
711
712         def _real_extract(self, url):
713                 # Extract video id from URL
714                 mobj = re.match(self._VALID_URL, url)
715                 if mobj is None:
716                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
717                         return
718                 video_id = mobj.group(2)
719
720                 # Downloader parameters
721                 best_quality = False
722                 format_param = None
723                 quality_index = 0
724                 if self._downloader is not None:
725                         params = self._downloader.params
726                         format_param = params.get('format', None)
727                         if format_param == '0':
728                                 format_param = self._available_formats[quality_index]
729                                 best_quality = True
730
731                 while True:
732                         # Extension
733                         video_extension = self._video_extensions.get(format_param, 'flv')
734
735                         # Get video info
736                         video_info_url = 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
737                         request = urllib2.Request(video_info_url, None, std_headers)
738                         try:
739                                 self.report_video_info_webpage_download(video_id)
740                                 video_info_webpage = urllib2.urlopen(request).read()
741                                 video_info = parse_qs(video_info_webpage)
742                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
743                                 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
744                                 return
745                         self.report_information_extraction(video_id)
746
747                         # "t" param
748                         if 'token' not in video_info:
749                                 # Attempt to see if YouTube has issued an error message
750                                 if 'reason' not in video_info:
751                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
752                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
753                                         stream.write(video_info_webpage)
754                                         stream.close()
755                                 else:
756                                         reason = urllib.unquote_plus(video_info['reason'][0])
757                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
758                                 return
759                         token = urllib.unquote_plus(video_info['token'][0])
760                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
761                         if format_param is not None:
762                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
763
764                         # Check possible RTMP download
765                         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
766                                 self.report_rtmp_download()
767                                 video_real_url = video_info['conn'][0]
768
769                         # uploader
770                         if 'author' not in video_info:
771                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
772                                 return
773                         video_uploader = urllib.unquote_plus(video_info['author'][0])
774
775                         # title
776                         if 'title' not in video_info:
777                                 self._downloader.trouble(u'ERROR: unable to extract video title')
778                                 return
779                         video_title = urllib.unquote_plus(video_info['title'][0])
780                         video_title = video_title.decode('utf-8')
781                         video_title = re.sub(ur'(?u)&(.+?);', self.htmlentity_transform, video_title)
782                         video_title = video_title.replace(os.sep, u'%')
783
784                         # simplified title
785                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
786                         simple_title = simple_title.strip(ur'_')
787
788                         try:
789                                 # Process video information
790                                 self._downloader.process_info({
791                                         'id':           video_id.decode('utf-8'),
792                                         'url':          video_real_url.decode('utf-8'),
793                                         'uploader':     video_uploader.decode('utf-8'),
794                                         'title':        video_title,
795                                         'stitle':       simple_title,
796                                         'ext':          video_extension.decode('utf-8'),
797                                 })
798
799                                 return
800
801                         except UnavailableFormatError, err:
802                                 if best_quality:
803                                         if quality_index == len(self._available_formats) - 1:
804                                                 # I don't ever expect this to happen
805                                                 self._downloader.trouble(u'ERROR: no known formats available for video')
806                                                 return
807                                         else:
808                                                 self.report_unavailable_format(video_id, format_param)
809                                                 quality_index += 1
810                                                 format_param = self._available_formats[quality_index]
811                                                 continue
812                                 else: 
813                                         self._downloader.trouble('ERROR: format not available for video')
814                                         return
815
816
817 class MetacafeIE(InfoExtractor):
818         """Information Extractor for metacafe.com."""
819
820         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
821         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
822         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
823         _youtube_ie = None
824
825         def __init__(self, youtube_ie, downloader=None):
826                 InfoExtractor.__init__(self, downloader)
827                 self._youtube_ie = youtube_ie
828
829         @staticmethod
830         def suitable(url):
831                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
832
833         def report_disclaimer(self):
834                 """Report disclaimer retrieval."""
835                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
836
837         def report_age_confirmation(self):
838                 """Report attempt to confirm age."""
839                 self._downloader.to_stdout(u'[metacafe] Confirming age')
840         
841         def report_download_webpage(self, video_id):
842                 """Report webpage download."""
843                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
844         
845         def report_extraction(self, video_id):
846                 """Report information extraction."""
847                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
848
849         def _real_initialize(self):
850                 # Retrieve disclaimer
851                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
852                 try:
853                         self.report_disclaimer()
854                         disclaimer = urllib2.urlopen(request).read()
855                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
856                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
857                         return
858
859                 # Confirm age
860                 disclaimer_form = {
861                         'filters': '0',
862                         'submit': "Continue - I'm over 18",
863                         }
864                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
865                 try:
866                         self.report_age_confirmation()
867                         disclaimer = urllib2.urlopen(request).read()
868                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
869                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
870                         return
871         
872         def _real_extract(self, url):
873                 # Extract id and simplified title from URL
874                 mobj = re.match(self._VALID_URL, url)
875                 if mobj is None:
876                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
877                         return
878
879                 video_id = mobj.group(1)
880
881                 # Check if video comes from YouTube
882                 mobj2 = re.match(r'^yt-(.*)$', video_id)
883                 if mobj2 is not None:
884                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
885                         return
886
887                 simple_title = mobj.group(2).decode('utf-8')
888                 video_extension = 'flv'
889
890                 # Retrieve video webpage to extract further information
891                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
892                 try:
893                         self.report_download_webpage(video_id)
894                         webpage = urllib2.urlopen(request).read()
895                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
896                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
897                         return
898
899                 # Extract URL, uploader and title from webpage
900                 self.report_extraction(video_id)
901                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
902                 if mobj is None:
903                         self._downloader.trouble(u'ERROR: unable to extract media URL')
904                         return
905                 mediaURL = urllib.unquote(mobj.group(1))
906
907                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
908                 #if mobj is None:
909                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
910                 #       return
911                 #gdaKey = mobj.group(1)
912                 #
913                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
914
915                 video_url = mediaURL
916
917                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
918                 if mobj is None:
919                         self._downloader.trouble(u'ERROR: unable to extract title')
920                         return
921                 video_title = mobj.group(1).decode('utf-8')
922
923                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
924                 if mobj is None:
925                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
926                         return
927                 video_uploader = mobj.group(1)
928
929                 try:
930                         # Process video information
931                         self._downloader.process_info({
932                                 'id':           video_id.decode('utf-8'),
933                                 'url':          video_url.decode('utf-8'),
934                                 'uploader':     video_uploader.decode('utf-8'),
935                                 'title':        video_title,
936                                 'stitle':       simple_title,
937                                 'ext':          video_extension.decode('utf-8'),
938                         })
939                 except UnavailableFormatError:
940                         self._downloader.trouble(u'ERROR: format not available for video')
941
942
943 class GoogleIE(InfoExtractor):
944         """Information extractor for video.google.com."""
945
946         _VALID_URL = r'(?:http://)?video\.google\.com/videoplay\?docid=([^\&]+).*'
947
948         def __init__(self, downloader=None):
949                 InfoExtractor.__init__(self, downloader)
950
951         @staticmethod
952         def suitable(url):
953                 return (re.match(GoogleIE._VALID_URL, url) is not None)
954
955         def report_download_webpage(self, video_id):
956                 """Report webpage download."""
957                 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
958
959         def report_extraction(self, video_id):
960                 """Report information extraction."""
961                 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
962
963         def _real_initialize(self):
964                 return
965
966         def _real_extract(self, url):
967                 # Extract id from URL
968                 mobj = re.match(self._VALID_URL, url)
969                 if mobj is None:
970                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
971                         return
972
973                 video_id = mobj.group(1)
974
975                 video_extension = 'mp4'
976
977                 # Retrieve video webpage to extract further information
978                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s' % video_id)
979                 try:
980                         self.report_download_webpage(video_id)
981                         webpage = urllib2.urlopen(request).read()
982                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
983                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
984                         return
985
986                 # Extract URL, uploader, and title from webpage
987                 self.report_extraction(video_id)
988                 mobj = re.search(r"download_url:'(.*)'", webpage)
989                 if mobj is None:
990                         self._downloader.trouble(u'ERROR: unable to extract media URL')
991                         return
992                 mediaURL = urllib.unquote(mobj.group(1))
993                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
994                 mediaURL = mediaURL.replace('\\x26', '\x26')
995
996                 video_url = mediaURL
997
998                 mobj = re.search(r'<title>(.*)</title>', webpage)
999                 if mobj is None:
1000                         self._downloader.trouble(u'ERROR: unable to extract title')
1001                         return
1002                 video_title = mobj.group(1).decode('utf-8')
1003
1004                 # Google Video doesn't show uploader nicknames?
1005                 video_uploader = 'uploader'
1006
1007                 try:
1008                         # Process video information
1009                         self._downloader.process_info({
1010                                 'id':           video_id.decode('utf-8'),
1011                                 'url':          video_url.decode('utf-8'),
1012                                 'uploader':     video_uploader.decode('utf-8'),
1013                                 'title':        video_title.decode('utf-8'),
1014                                 'stitle':       video_title.decode('utf-8'),
1015                                 'ext':          video_extension.decode('utf-8'),
1016                         })
1017                 except UnavailableFormatError:
1018                         self._downloader.trouble(u'ERROR: format not available for video')
1019
1020
1021 class PhotobucketIE(InfoExtractor):
1022         """Information extractor for photobucket.com."""
1023
1024         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1025
1026         def __init__(self, downloader=None):
1027                 InfoExtractor.__init__(self, downloader)
1028
1029         @staticmethod
1030         def suitable(url):
1031                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1032
1033         def report_download_webpage(self, video_id):
1034                 """Report webpage download."""
1035                 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1036
1037         def report_extraction(self, video_id):
1038                 """Report information extraction."""
1039                 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1040
1041         def _real_initialize(self):
1042                 return
1043
1044         def _real_extract(self, url):
1045                 # Extract id from URL
1046                 mobj = re.match(self._VALID_URL, url)
1047                 if mobj is None:
1048                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1049                         return
1050
1051                 video_id = mobj.group(1)
1052
1053                 video_extension = 'flv'
1054
1055                 # Retrieve video webpage to extract further information
1056                 request = urllib2.Request(url)
1057                 try:
1058                         self.report_download_webpage(video_id)
1059                         webpage = urllib2.urlopen(request).read()
1060                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1061                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1062                         return
1063
1064                 # Extract URL, uploader, and title from webpage
1065                 self.report_extraction(video_id)
1066                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1067                 if mobj is None:
1068                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1069                         return
1070                 mediaURL = urllib.unquote(mobj.group(1))
1071
1072                 video_url = mediaURL
1073
1074                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1075                 if mobj is None:
1076                         self._downloader.trouble(u'ERROR: unable to extract title')
1077                         return
1078                 video_title = mobj.group(1).decode('utf-8')
1079
1080                 video_uploader = mobj.group(2).decode('utf-8')
1081
1082                 try:
1083                         # Process video information
1084                         self._downloader.process_info({
1085                                 'id':           video_id.decode('utf-8'),
1086                                 'url':          video_url.decode('utf-8'),
1087                                 'uploader':     video_uploader.decode('utf-8'),
1088                                 'title':        video_title.decode('utf-8'),
1089                                 'stitle':       video_title.decode('utf-8'),
1090                                 'ext':          video_extension.decode('utf-8'),
1091                         })
1092                 except UnavailableFormatError:
1093                         self._downloader.trouble(u'ERROR: format not available for video')
1094
1095
1096 class YoutubeSearchIE(InfoExtractor):
1097         """Information Extractor for YouTube search queries."""
1098         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1099         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1100         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1101         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1102         _youtube_ie = None
1103         _max_youtube_results = 1000
1104
1105         def __init__(self, youtube_ie, downloader=None):
1106                 InfoExtractor.__init__(self, downloader)
1107                 self._youtube_ie = youtube_ie
1108         
1109         @staticmethod
1110         def suitable(url):
1111                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1112
1113         def report_download_page(self, query, pagenum):
1114                 """Report attempt to download playlist page with given number."""
1115                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1116
1117         def _real_initialize(self):
1118                 self._youtube_ie.initialize()
1119         
1120         def _real_extract(self, query):
1121                 mobj = re.match(self._VALID_QUERY, query)
1122                 if mobj is None:
1123                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1124                         return
1125
1126                 prefix, query = query.split(':')
1127                 prefix = prefix[8:]
1128                 if prefix == '':
1129                         self._download_n_results(query, 1)
1130                         return
1131                 elif prefix == 'all':
1132                         self._download_n_results(query, self._max_youtube_results)
1133                         return
1134                 else:
1135                         try:
1136                                 n = long(prefix)
1137                                 if n <= 0:
1138                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1139                                         return
1140                                 elif n > self._max_youtube_results:
1141                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1142                                         n = self._max_youtube_results
1143                                 self._download_n_results(query, n)
1144                                 return
1145                         except ValueError: # parsing prefix as integer fails
1146                                 self._download_n_results(query, 1)
1147                                 return
1148
1149         def _download_n_results(self, query, n):
1150                 """Downloads a specified number of results for a query"""
1151
1152                 video_ids = []
1153                 already_seen = set()
1154                 pagenum = 1
1155
1156                 while True:
1157                         self.report_download_page(query, pagenum)
1158                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1159                         request = urllib2.Request(result_url, None, std_headers)
1160                         try:
1161                                 page = urllib2.urlopen(request).read()
1162                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1163                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1164                                 return
1165
1166                         # Extract video identifiers
1167                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1168                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1169                                 if video_id not in already_seen:
1170                                         video_ids.append(video_id)
1171                                         already_seen.add(video_id)
1172                                         if len(video_ids) == n:
1173                                                 # Specified n videos reached
1174                                                 for id in video_ids:
1175                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1176                                                 return
1177
1178                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1179                                 for id in video_ids:
1180                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1181                                 return
1182
1183                         pagenum = pagenum + 1
1184
1185 class YoutubePlaylistIE(InfoExtractor):
1186         """Information Extractor for YouTube playlists."""
1187
1188         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1189         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1190         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1191         _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
1192         _youtube_ie = None
1193
1194         def __init__(self, youtube_ie, downloader=None):
1195                 InfoExtractor.__init__(self, downloader)
1196                 self._youtube_ie = youtube_ie
1197         
1198         @staticmethod
1199         def suitable(url):
1200                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1201
1202         def report_download_page(self, playlist_id, pagenum):
1203                 """Report attempt to download playlist page with given number."""
1204                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1205
1206         def _real_initialize(self):
1207                 self._youtube_ie.initialize()
1208         
1209         def _real_extract(self, url):
1210                 # Extract playlist id
1211                 mobj = re.match(self._VALID_URL, url)
1212                 if mobj is None:
1213                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1214                         return
1215
1216                 # Download playlist pages
1217                 playlist_id = mobj.group(1)
1218                 video_ids = []
1219                 pagenum = 1
1220
1221                 while True:
1222                         self.report_download_page(playlist_id, pagenum)
1223                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1224                         try:
1225                                 page = urllib2.urlopen(request).read()
1226                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1227                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1228                                 return
1229
1230                         # Extract video identifiers
1231                         ids_in_page = []
1232                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1233                                 if mobj.group(1) not in ids_in_page:
1234                                         ids_in_page.append(mobj.group(1))
1235                         video_ids.extend(ids_in_page)
1236
1237                         if (self._MORE_PAGES_INDICATOR % (playlist_id.upper(), pagenum + 1)) not in page:
1238                                 break
1239                         pagenum = pagenum + 1
1240
1241                 for id in video_ids:
1242                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1243                 return
1244
1245 class YoutubeUserIE(InfoExtractor):
1246         """Information Extractor for YouTube users."""
1247
1248         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1249         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1250         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1251         _youtube_ie = None
1252
1253         def __init__(self, youtube_ie, downloader=None):
1254                 InfoExtractor.__init__(self, downloader)
1255                 self._youtube_ie = youtube_ie
1256         
1257         @staticmethod
1258         def suitable(url):
1259                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1260
1261         def report_download_page(self, username):
1262                 """Report attempt to download user page."""
1263                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1264
1265         def _real_initialize(self):
1266                 self._youtube_ie.initialize()
1267         
1268         def _real_extract(self, url):
1269                 # Extract username
1270                 mobj = re.match(self._VALID_URL, url)
1271                 if mobj is None:
1272                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1273                         return
1274
1275                 # Download user page
1276                 username = mobj.group(1)
1277                 video_ids = []
1278                 pagenum = 1
1279
1280                 self.report_download_page(username)
1281                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1282                 try:
1283                         page = urllib2.urlopen(request).read()
1284                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1285                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1286                         return
1287
1288                 # Extract video identifiers
1289                 ids_in_page = []
1290
1291                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1292                         if mobj.group(1) not in ids_in_page:
1293                                 ids_in_page.append(mobj.group(1))
1294                 video_ids.extend(ids_in_page)
1295
1296                 for id in video_ids:
1297                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1298                 return
1299
1300 class PostProcessor(object):
1301         """Post Processor class.
1302
1303         PostProcessor objects can be added to downloaders with their
1304         add_post_processor() method. When the downloader has finished a
1305         successful download, it will take its internal chain of PostProcessors
1306         and start calling the run() method on each one of them, first with
1307         an initial argument and then with the returned value of the previous
1308         PostProcessor.
1309
1310         The chain will be stopped if one of them ever returns None or the end
1311         of the chain is reached.
1312
1313         PostProcessor objects follow a "mutual registration" process similar
1314         to InfoExtractor objects.
1315         """
1316
1317         _downloader = None
1318
1319         def __init__(self, downloader=None):
1320                 self._downloader = downloader
1321
1322         def set_downloader(self, downloader):
1323                 """Sets the downloader for this PP."""
1324                 self._downloader = downloader
1325         
1326         def run(self, information):
1327                 """Run the PostProcessor.
1328
1329                 The "information" argument is a dictionary like the ones
1330                 composed by InfoExtractors. The only difference is that this
1331                 one has an extra field called "filepath" that points to the
1332                 downloaded file.
1333
1334                 When this method returns None, the postprocessing chain is
1335                 stopped. However, this method may return an information
1336                 dictionary that will be passed to the next postprocessing
1337                 object in the chain. It can be the one it received after
1338                 changing some fields.
1339
1340                 In addition, this method may raise a PostProcessingError
1341                 exception that will be taken into account by the downloader
1342                 it was called from.
1343                 """
1344                 return information # by default, do nothing
1345         
1346 ### MAIN PROGRAM ###
1347 if __name__ == '__main__':
1348         try:
1349                 # Modules needed only when running the main program
1350                 import getpass
1351                 import optparse
1352
1353                 # Function to update the program file with the latest version from bitbucket.org
1354                 def update_self(downloader, filename):
1355                         # Note: downloader only used for options
1356                         if not os.access (filename, os.W_OK):
1357                                 sys.exit('ERROR: no write permissions on %s' % filename)
1358
1359                         downloader.to_stdout('Updating to latest stable version...')
1360                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1361                         latest_version = urllib.urlopen(latest_url).read().strip()
1362                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1363                         newcontent = urllib.urlopen(prog_url).read()
1364                         stream = open(filename, 'w')
1365                         stream.write(newcontent)
1366                         stream.close()
1367                         downloader.to_stdout('Updated to version %s' % latest_version)
1368
1369                 # General configuration
1370                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1371                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1372                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1373
1374                 # Parse command line
1375                 parser = optparse.OptionParser(
1376                         usage='Usage: %prog [options] url...',
1377                         version='2010.01.19',
1378                         conflict_handler='resolve',
1379                 )
1380
1381                 parser.add_option('-h', '--help',
1382                                 action='help', help='print this help text and exit')
1383                 parser.add_option('-v', '--version',
1384                                 action='version', help='print program version and exit')
1385                 parser.add_option('-U', '--update',
1386                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1387                 parser.add_option('-i', '--ignore-errors',
1388                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1389                 parser.add_option('-r', '--rate-limit',
1390                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1391
1392                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1393                 authentication.add_option('-u', '--username',
1394                                 dest='username', metavar='UN', help='account username')
1395                 authentication.add_option('-p', '--password',
1396                                 dest='password', metavar='PW', help='account password')
1397                 authentication.add_option('-n', '--netrc',
1398                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1399                 parser.add_option_group(authentication)
1400
1401                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1402                 video_format.add_option('-f', '--format',
1403                                 action='store', dest='format', metavar='FMT', help='video format code')
1404                 video_format.add_option('-b', '--best-quality',
1405                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1406                 video_format.add_option('-m', '--mobile-version',
1407                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1408                 video_format.add_option('-d', '--high-def',
1409                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1410                 parser.add_option_group(video_format)
1411
1412                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1413                 verbosity.add_option('-q', '--quiet',
1414                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1415                 verbosity.add_option('-s', '--simulate',
1416                                 action='store_true', dest='simulate', help='do not download video', default=False)
1417                 verbosity.add_option('-g', '--get-url',
1418                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1419                 verbosity.add_option('-e', '--get-title',
1420                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1421                 parser.add_option_group(verbosity)
1422
1423                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1424                 filesystem.add_option('-t', '--title',
1425                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1426                 filesystem.add_option('-l', '--literal',
1427                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1428                 filesystem.add_option('-o', '--output',
1429                                 dest='outtmpl', metavar='TPL', help='output filename template')
1430                 filesystem.add_option('-a', '--batch-file',
1431                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1432                 filesystem.add_option('-w', '--no-overwrites',
1433                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1434                 filesystem.add_option('-c', '--continue',
1435                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1436                 parser.add_option_group(filesystem)
1437
1438                 (opts, args) = parser.parse_args()
1439         
1440                 # Batch file verification
1441                 batchurls = []
1442                 if opts.batchfile is not None:
1443                         try:
1444                                 batchurls = open(opts.batchfile, 'r').readlines()
1445                                 batchurls = [x.strip() for x in batchurls]
1446                                 batchurls = [x for x in batchurls if len(x) > 0]
1447                         except IOError:
1448                                 sys.exit(u'ERROR: batch file could not be read')
1449                 all_urls = batchurls + args
1450
1451                 # Conflicting, missing and erroneous options
1452                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1453                         parser.error(u'using .netrc conflicts with giving username/password')
1454                 if opts.password is not None and opts.username is None:
1455                         parser.error(u'account username missing')
1456                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1457                         parser.error(u'using output template conflicts with using title or literal title')
1458                 if opts.usetitle and opts.useliteral:
1459                         parser.error(u'using title conflicts with using literal title')
1460                 if opts.username is not None and opts.password is None:
1461                         opts.password = getpass.getpass(u'Type account password and press return:')
1462                 if opts.ratelimit is not None:
1463                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1464                         if numeric_limit is None:
1465                                 parser.error(u'invalid rate limit specified')
1466                         opts.ratelimit = numeric_limit
1467
1468                 # Information extractors
1469                 youtube_ie = YoutubeIE()
1470                 metacafe_ie = MetacafeIE(youtube_ie)
1471                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1472                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1473                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1474                 google_ie = GoogleIE()
1475                 photobucket_ie = PhotobucketIE()
1476
1477                 # File downloader
1478                 fd = FileDownloader({
1479                         'usenetrc': opts.usenetrc,
1480                         'username': opts.username,
1481                         'password': opts.password,
1482                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1483                         'forceurl': opts.geturl,
1484                         'forcetitle': opts.gettitle,
1485                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1486                         'format': opts.format,
1487                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1488                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1489                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1490                                 or u'%(id)s.%(ext)s'),
1491                         'ignoreerrors': opts.ignoreerrors,
1492                         'ratelimit': opts.ratelimit,
1493                         'nooverwrites': opts.nooverwrites,
1494                         'continuedl': opts.continue_dl,
1495                         })
1496                 fd.add_info_extractor(youtube_search_ie)
1497                 fd.add_info_extractor(youtube_pl_ie)
1498                 fd.add_info_extractor(youtube_user_ie)
1499                 fd.add_info_extractor(metacafe_ie)
1500                 fd.add_info_extractor(youtube_ie)
1501                 fd.add_info_extractor(google_ie)
1502                 fd.add_info_extractor(photobucket_ie)
1503
1504                 # Update version
1505                 if opts.update_self:
1506                         update_self(fd, sys.argv[0])
1507
1508                 # Maybe do nothing
1509                 if len(all_urls) < 1:
1510                         if not opts.update_self:
1511                                 parser.error(u'you must provide at least one URL')
1512                         else:
1513                                 sys.exit()
1514                 retcode = fd.download(all_urls)
1515                 sys.exit(retcode)
1516
1517         except DownloadError:
1518                 sys.exit(1)
1519         except SameFileError:
1520                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1521         except KeyboardInterrupt:
1522                 sys.exit(u'\nERROR: Interrupted by user')