2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
23 # parse_qs was moved from the cgi module to the urlparse module recently.
25 from urlparse import parse_qs
27 from cgi import parse_qs
30 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8',
31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
33 'Accept-Language': 'en-us,en;q=0.5',
36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38 def preferredencoding():
39 """Get preferred encoding.
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
44 def yield_preferredencoding():
46 pref = locale.getpreferredencoding()
52 return yield_preferredencoding().next()
54 def htmlentity_transform(matchobj):
55 """Transforms an HTML entity to a Unicode character.
57 This function receives a match object and is intended to be used with
58 the re.sub() function.
60 entity = matchobj.group(1)
62 # Known non-numeric HTML entity
63 if entity in htmlentitydefs.name2codepoint:
64 return unichr(htmlentitydefs.name2codepoint[entity])
67 mobj = re.match(ur'(?u)#(x?\d+)', entity)
69 numstr = mobj.group(1)
70 if numstr.startswith(u'x'):
72 numstr = u'0%s' % numstr
75 return unichr(long(numstr, base))
77 # Unknown entity in name, return its literal representation
78 return (u'&%s;' % entity)
80 def sanitize_title(utitle):
81 """Sanitizes a video title so it could be used as part of a filename."""
82 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
83 return utitle.replace(unicode(os.sep), u'%')
85 def sanitize_open(filename, open_mode):
86 """Try to open the given filename, and slightly tweak it if this fails.
88 Attempts to open the given filename. If this fails, it tries to change
89 the filename slightly, step by step, until it's either able to open it
90 or it fails and raises a final exception, like the standard open()
93 It returns the tuple (stream, definitive_file_name).
97 return (sys.stdout, filename)
98 stream = open(filename, open_mode)
99 return (stream, filename)
100 except (IOError, OSError), err:
101 # In case of error, try to remove win32 forbidden chars
102 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
104 # An exception here should be caught in the caller
105 stream = open(filename, open_mode)
106 return (stream, filename)
109 class DownloadError(Exception):
110 """Download Error exception.
112 This exception may be thrown by FileDownloader objects if they are not
113 configured to continue on errors. They will contain the appropriate
118 class SameFileError(Exception):
119 """Same File exception.
121 This exception will be thrown by FileDownloader objects if they detect
122 multiple files would have to be downloaded to the same file on disk.
126 class PostProcessingError(Exception):
127 """Post Processing exception.
129 This exception may be raised by PostProcessor's .run() method to
130 indicate an error in the postprocessing task.
134 class UnavailableVideoError(Exception):
135 """Unavailable Format exception.
137 This exception will be thrown when a video is requested
138 in a format that is not available for that video.
142 class ContentTooShortError(Exception):
143 """Content Too Short exception.
145 This exception may be raised by FileDownloader objects when a file they
146 download is too small for what the server announced first, indicating
147 the connection was probably interrupted.
153 def __init__(self, downloaded, expected):
154 self.downloaded = downloaded
155 self.expected = expected
157 class FileDownloader(object):
158 """File Downloader class.
160 File downloader objects are the ones responsible of downloading the
161 actual video file and writing it to disk if the user has requested
162 it, among some other tasks. In most cases there should be one per
163 program. As, given a video URL, the downloader doesn't know how to
164 extract all the needed information, task that InfoExtractors do, it
165 has to pass the URL to one of them.
167 For this, file downloader objects have a method that allows
168 InfoExtractors to be registered in a given order. When it is passed
169 a URL, the file downloader handles it to the first InfoExtractor it
170 finds that reports being able to handle it. The InfoExtractor extracts
171 all the information about the video or videos the URL refers to, and
172 asks the FileDownloader to process the video information, possibly
173 downloading the video.
175 File downloaders accept a lot of parameters. In order not to saturate
176 the object constructor with arguments, it receives a dictionary of
177 options instead. These options are available through the params
178 attribute for the InfoExtractors to use. The FileDownloader also
179 registers itself as the downloader in charge for the InfoExtractors
180 that are added to it, so this is a "mutual registration".
184 username: Username for authentication purposes.
185 password: Password for authentication purposes.
186 usenetrc: Use netrc for authentication instead.
187 quiet: Do not print messages to stdout.
188 forceurl: Force printing final URL.
189 forcetitle: Force printing title.
190 simulate: Do not download the video files.
191 format: Video format code.
192 format_limit: Highest quality format to try.
193 outtmpl: Template for output names.
194 ignoreerrors: Do not stop on download errors.
195 ratelimit: Download speed limit, in bytes/sec.
196 nooverwrites: Prevent overwriting files.
197 retries: Number of times to retry for HTTP error 503
198 continuedl: Try to continue downloads if possible.
199 noprogress: Do not print the progress bar.
205 _download_retcode = None
206 _num_downloads = None
208 def __init__(self, params):
209 """Create a FileDownloader object with the given options."""
212 self._download_retcode = 0
213 self._num_downloads = 0
217 def pmkdir(filename):
218 """Create directory components in filename. Similar to Unix "mkdir -p"."""
219 components = filename.split(os.sep)
220 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
221 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
222 for dir in aggregate:
223 if not os.path.exists(dir):
227 def format_bytes(bytes):
230 if type(bytes) is str:
235 exponent = long(math.log(bytes, 1024.0))
236 suffix = 'bkMGTPEZY'[exponent]
237 converted = float(bytes) / float(1024**exponent)
238 return '%.2f%s' % (converted, suffix)
241 def calc_percent(byte_counter, data_len):
244 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
247 def calc_eta(start, now, total, current):
251 if current == 0 or dif < 0.001: # One millisecond
253 rate = float(current) / dif
254 eta = long((float(total) - float(current)) / rate)
255 (eta_mins, eta_secs) = divmod(eta, 60)
258 return '%02d:%02d' % (eta_mins, eta_secs)
261 def calc_speed(start, now, bytes):
263 if bytes == 0 or dif < 0.001: # One millisecond
264 return '%10s' % '---b/s'
265 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
268 def best_block_size(elapsed_time, bytes):
269 new_min = max(bytes / 2.0, 1.0)
270 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
271 if elapsed_time < 0.001:
273 rate = bytes / elapsed_time
281 def parse_bytes(bytestr):
282 """Parse a string indicating a byte quantity into a long integer."""
283 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
286 number = float(matchobj.group(1))
287 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
288 return long(round(number * multiplier))
290 def add_info_extractor(self, ie):
291 """Add an InfoExtractor object to the end of the list."""
293 ie.set_downloader(self)
295 def add_post_processor(self, pp):
296 """Add a PostProcessor object to the end of the chain."""
298 pp.set_downloader(self)
300 def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
301 """Print message to stdout if not in quiet mode."""
303 if not self.params.get('quiet', False):
304 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
306 except (UnicodeEncodeError), err:
307 if not ignore_encoding_errors:
310 def to_stderr(self, message):
311 """Print message to stderr."""
312 print >>sys.stderr, message.encode(preferredencoding())
314 def fixed_template(self):
315 """Checks if the output template is fixed."""
316 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
318 def trouble(self, message=None):
319 """Determine action to take when a download problem appears.
321 Depending on if the downloader has been configured to ignore
322 download errors or not, this method may throw an exception or
323 not when errors are found, after printing the message.
325 if message is not None:
326 self.to_stderr(message)
327 if not self.params.get('ignoreerrors', False):
328 raise DownloadError(message)
329 self._download_retcode = 1
331 def slow_down(self, start_time, byte_counter):
332 """Sleep if the download speed is over the rate limit."""
333 rate_limit = self.params.get('ratelimit', None)
334 if rate_limit is None or byte_counter == 0:
337 elapsed = now - start_time
340 speed = float(byte_counter) / elapsed
341 if speed > rate_limit:
342 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
344 def report_destination(self, filename):
345 """Report destination filename."""
346 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
348 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
349 """Report download progress."""
350 if self.params.get('noprogress', False):
352 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
353 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
355 def report_resuming_byte(self, resume_len):
356 """Report attempt to resume at given byte."""
357 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
359 def report_retry(self, count, retries):
360 """Report retry in case of HTTP error 503"""
361 self.to_stdout(u'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count, retries))
363 def report_file_already_downloaded(self, file_name):
364 """Report file has already been fully downloaded."""
366 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
367 except (UnicodeEncodeError), err:
368 self.to_stdout(u'[download] The file has already been downloaded')
370 def report_unable_to_resume(self):
371 """Report it was impossible to resume download."""
372 self.to_stdout(u'[download] Unable to resume')
374 def report_finish(self):
375 """Report download finished."""
376 if self.params.get('noprogress', False):
377 self.to_stdout(u'[download] Download completed')
381 def increment_downloads(self):
382 """Increment the ordinal that assigns a number to each file."""
383 self._num_downloads += 1
385 def process_info(self, info_dict):
386 """Process a single dictionary returned by an InfoExtractor."""
387 # Do nothing else if in simulate mode
388 if self.params.get('simulate', False):
390 if self.params.get('forcetitle', False):
391 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
392 if self.params.get('forceurl', False):
393 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
394 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
395 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
396 if self.params.get('forcedescription', False) and 'description' in info_dict:
397 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
402 template_dict = dict(info_dict)
403 template_dict['epoch'] = unicode(long(time.time()))
404 template_dict['ord'] = unicode('%05d' % self._num_downloads)
405 filename = self.params['outtmpl'] % template_dict
406 except (ValueError, KeyError), err:
407 self.trouble(u'ERROR: invalid system charset or erroneous output template')
409 if self.params.get('nooverwrites', False) and os.path.exists(filename):
410 self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
414 self.pmkdir(filename)
415 except (OSError, IOError), err:
416 self.trouble('ERROR: unable to create directories: %s' % str(err))
420 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
421 except (OSError, IOError), err:
422 raise UnavailableVideoError
423 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
424 self.trouble('ERROR: unable to download video data: %s' % str(err))
426 except (ContentTooShortError, ), err:
427 self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
432 self.post_process(filename, info_dict)
433 except (PostProcessingError), err:
434 self.trouble('ERROR: postprocessing: %s' % str(err))
437 def download(self, url_list):
438 """Download a given list of URLs."""
439 if len(url_list) > 1 and self.fixed_template():
440 raise SameFileError(self.params['outtmpl'])
443 suitable_found = False
445 # Go to next InfoExtractor if not suitable
446 if not ie.suitable(url):
449 # Suitable InfoExtractor found
450 suitable_found = True
452 # Extract information from URL and process it
455 # Suitable InfoExtractor had been found; go to next URL
458 if not suitable_found:
459 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
461 return self._download_retcode
463 def post_process(self, filename, ie_info):
464 """Run the postprocessing chain on the given file."""
466 info['filepath'] = filename
472 def _download_with_rtmpdump(self, filename, url, player_url):
473 self.report_destination(filename)
475 # Check for rtmpdump first
477 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
478 except (OSError, IOError):
479 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
482 # Download using rtmpdump. rtmpdump returns exit code 2 when
483 # the connection was interrumpted and resuming appears to be
484 # possible. This is part of rtmpdump's normal usage, AFAIK.
485 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
486 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
487 while retval == 2 or retval == 1:
488 prevsize = os.path.getsize(filename)
489 self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
490 time.sleep(5.0) # This seems to be needed
491 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
492 cursize = os.path.getsize(filename)
493 if prevsize == cursize and retval == 1:
496 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
499 self.trouble('\nERROR: rtmpdump exited with code %d' % retval)
502 def _do_download(self, filename, url, player_url):
503 # Attempt to download using rtmpdump
504 if url.startswith('rtmp'):
505 return self._download_with_rtmpdump(filename, url, player_url)
509 basic_request = urllib2.Request(url, None, std_headers)
510 request = urllib2.Request(url, None, std_headers)
512 # Establish possible resume length
513 if os.path.isfile(filename):
514 resume_len = os.path.getsize(filename)
518 # Request parameters in case of being able to resume
519 if self.params.get('continuedl', False) and resume_len != 0:
520 self.report_resuming_byte(resume_len)
521 request.add_header('Range','bytes=%d-' % resume_len)
525 retries = self.params.get('retries', 0)
526 while count <= retries:
527 # Establish connection
529 data = urllib2.urlopen(request)
531 except (urllib2.HTTPError, ), err:
532 if err.code != 503 and err.code != 416:
533 # Unexpected HTTP error
535 elif err.code == 416:
536 # Unable to resume (requested range not satisfiable)
538 # Open the connection again without the range header
539 data = urllib2.urlopen(basic_request)
540 content_length = data.info()['Content-Length']
541 except (urllib2.HTTPError, ), err:
545 # Examine the reported length
546 if (content_length is not None and
547 (resume_len - 100 < long(content_length) < resume_len + 100)):
548 # The file had already been fully downloaded.
549 # Explanation to the above condition: in issue #175 it was revealed that
550 # YouTube sometimes adds or removes a few bytes from the end of the file,
551 # changing the file size slightly and causing problems for some users. So
552 # I decided to implement a suggested change and consider the file
553 # completely downloaded if the file size differs less than 100 bytes from
554 # the one in the hard drive.
555 self.report_file_already_downloaded(filename)
558 # The length does not match, we start the download over
559 self.report_unable_to_resume()
565 self.report_retry(count, retries)
568 self.trouble(u'ERROR: giving up after %s retries' % retries)
571 data_len = data.info().get('Content-length', None)
572 data_len_str = self.format_bytes(data_len)
579 data_block = data.read(block_size)
581 data_block_len = len(data_block)
582 if data_block_len == 0:
584 byte_counter += data_block_len
586 # Open file just in time
589 (stream, filename) = sanitize_open(filename, open_mode)
590 self.report_destination(filename)
591 except (OSError, IOError), err:
592 self.trouble('ERROR: unable to open for writing: %s' % str(err))
595 stream.write(data_block)
596 except (IOError, OSError), err:
597 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
599 block_size = self.best_block_size(after - before, data_block_len)
602 percent_str = self.calc_percent(byte_counter, data_len)
603 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
604 speed_str = self.calc_speed(start, time.time(), byte_counter)
605 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
608 self.slow_down(start, byte_counter)
611 if data_len is not None and str(byte_counter) != data_len:
612 raise ContentTooShortError(byte_counter, long(data_len))
615 class InfoExtractor(object):
616 """Information Extractor class.
618 Information extractors are the classes that, given a URL, extract
619 information from the video (or videos) the URL refers to. This
620 information includes the real video URL, the video title and simplified
621 title, author and others. The information is stored in a dictionary
622 which is then passed to the FileDownloader. The FileDownloader
623 processes this information possibly downloading the video to the file
624 system, among other possible outcomes. The dictionaries must include
625 the following fields:
627 id: Video identifier.
628 url: Final video URL.
629 uploader: Nickname of the video uploader.
630 title: Literal title.
631 stitle: Simplified title.
632 ext: Video filename extension.
633 format: Video format.
634 player_url: SWF Player URL (may be None).
636 The following fields are optional. Their primary purpose is to allow
637 youtube-dl to serve as the backend for a video search function, such
638 as the one in youtube2mp3. They are only used when their respective
639 forced printing functions are called:
641 thumbnail: Full URL to a video thumbnail image.
642 description: One-line video description.
644 Subclasses of this one should re-define the _real_initialize() and
645 _real_extract() methods, as well as the suitable() static method.
646 Probably, they should also be instantiated and added to the main
653 def __init__(self, downloader=None):
654 """Constructor. Receives an optional downloader."""
656 self.set_downloader(downloader)
660 """Receives a URL and returns True if suitable for this IE."""
663 def initialize(self):
664 """Initializes an instance (authentication, etc)."""
666 self._real_initialize()
669 def extract(self, url):
670 """Extracts URL information and returns it in list of dicts."""
672 return self._real_extract(url)
674 def set_downloader(self, downloader):
675 """Sets the downloader for this IE."""
676 self._downloader = downloader
678 def _real_initialize(self):
679 """Real initialization process. Redefine in subclasses."""
682 def _real_extract(self, url):
683 """Real extraction process. Redefine in subclasses."""
686 class YoutubeIE(InfoExtractor):
687 """Information extractor for youtube.com."""
689 _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
690 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
691 _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
692 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
693 _NETRC_MACHINE = 'youtube'
694 # Listed in order of quality
695 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
696 _video_extensions = {
702 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
709 return (re.match(YoutubeIE._VALID_URL, url) is not None)
711 def report_lang(self):
712 """Report attempt to set language."""
713 self._downloader.to_stdout(u'[youtube] Setting language')
715 def report_login(self):
716 """Report attempt to log in."""
717 self._downloader.to_stdout(u'[youtube] Logging in')
719 def report_age_confirmation(self):
720 """Report attempt to confirm age."""
721 self._downloader.to_stdout(u'[youtube] Confirming age')
723 def report_video_webpage_download(self, video_id):
724 """Report attempt to download video webpage."""
725 self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
727 def report_video_info_webpage_download(self, video_id):
728 """Report attempt to download video info webpage."""
729 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
731 def report_information_extraction(self, video_id):
732 """Report attempt to extract video information."""
733 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
735 def report_unavailable_format(self, video_id, format):
736 """Report extracted video URL."""
737 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
739 def report_rtmp_download(self):
740 """Indicate the download will use the RTMP protocol."""
741 self._downloader.to_stdout(u'[youtube] RTMP download detected')
743 def _real_initialize(self):
744 if self._downloader is None:
749 downloader_params = self._downloader.params
751 # Attempt to use provided username and password or .netrc data
752 if downloader_params.get('username', None) is not None:
753 username = downloader_params['username']
754 password = downloader_params['password']
755 elif downloader_params.get('usenetrc', False):
757 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
762 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
763 except (IOError, netrc.NetrcParseError), err:
764 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
768 request = urllib2.Request(self._LANG_URL, None, std_headers)
771 urllib2.urlopen(request).read()
772 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
773 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
776 # No authentication to be performed
782 'current_form': 'loginForm',
784 'action_login': 'Log In',
785 'username': username,
786 'password': password,
788 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
791 login_results = urllib2.urlopen(request).read()
792 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
793 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
795 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
796 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
802 'action_confirm': 'Confirm',
804 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
806 self.report_age_confirmation()
807 age_results = urllib2.urlopen(request).read()
808 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
809 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
812 def _real_extract(self, url):
813 # Extract video id from URL
814 mobj = re.match(self._VALID_URL, url)
816 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
818 video_id = mobj.group(2)
821 self.report_video_webpage_download(video_id)
822 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers)
824 video_webpage = urllib2.urlopen(request).read()
825 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
826 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
829 # Attempt to extract SWF player URL
830 mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage)
832 player_url = mobj.group(1)
837 self.report_video_info_webpage_download(video_id)
838 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
839 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
840 % (video_id, el_type))
841 request = urllib2.Request(video_info_url, None, std_headers)
843 video_info_webpage = urllib2.urlopen(request).read()
844 video_info = parse_qs(video_info_webpage)
845 if 'token' in video_info:
847 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
848 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
850 if 'token' not in video_info:
851 if 'reason' in video_info:
852 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0])
854 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
857 # Start extracting information
858 self.report_information_extraction(video_id)
861 if 'author' not in video_info:
862 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
864 video_uploader = urllib.unquote_plus(video_info['author'][0])
867 if 'title' not in video_info:
868 self._downloader.trouble(u'ERROR: unable to extract video title')
870 video_title = urllib.unquote_plus(video_info['title'][0])
871 video_title = video_title.decode('utf-8')
872 video_title = sanitize_title(video_title)
875 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
876 simple_title = simple_title.strip(ur'_')
879 if 'thumbnail_url' not in video_info:
880 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
882 else: # don't panic if we can't find it
883 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
886 video_description = 'No description available.'
887 if self._downloader.params.get('forcedescription', False):
888 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
890 video_description = mobj.group(1)
893 video_token = urllib.unquote_plus(video_info['token'][0])
895 # Decide which formats to download
896 requested_format = self._downloader.params.get('format', None)
897 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
899 if 'fmt_url_map' in video_info:
900 url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
901 format_limit = self._downloader.params.get('format_limit', None)
902 if format_limit is not None and format_limit in self._available_formats:
903 format_list = self._available_formats[self._available_formats.index(format_limit):]
905 format_list = self._available_formats
906 existing_formats = [x for x in format_list if x in url_map]
907 if len(existing_formats) == 0:
908 self._downloader.trouble(u'ERROR: no known formats available for video')
910 if requested_format is None:
911 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
912 elif requested_format == '-1':
913 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
915 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
917 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
918 self.report_rtmp_download()
919 video_url_list = [(None, video_info['conn'][0])]
922 self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
925 for format_param, video_real_url in video_url_list:
926 # At this point we have a new video
927 self._downloader.increment_downloads()
930 video_extension = self._video_extensions.get(format_param, 'flv')
932 # Find the video URL in fmt_url_map or conn paramters
934 # Process video information
935 self._downloader.process_info({
936 'id': video_id.decode('utf-8'),
937 'url': video_real_url.decode('utf-8'),
938 'uploader': video_uploader.decode('utf-8'),
939 'title': video_title,
940 'stitle': simple_title,
941 'ext': video_extension.decode('utf-8'),
942 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
943 'thumbnail': video_thumbnail.decode('utf-8'),
944 'description': video_description.decode('utf-8'),
945 'player_url': player_url,
947 except UnavailableVideoError, err:
948 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
951 class MetacafeIE(InfoExtractor):
952 """Information Extractor for metacafe.com."""
954 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
955 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
956 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
959 def __init__(self, youtube_ie, downloader=None):
960 InfoExtractor.__init__(self, downloader)
961 self._youtube_ie = youtube_ie
965 return (re.match(MetacafeIE._VALID_URL, url) is not None)
967 def report_disclaimer(self):
968 """Report disclaimer retrieval."""
969 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
971 def report_age_confirmation(self):
972 """Report attempt to confirm age."""
973 self._downloader.to_stdout(u'[metacafe] Confirming age')
975 def report_download_webpage(self, video_id):
976 """Report webpage download."""
977 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
979 def report_extraction(self, video_id):
980 """Report information extraction."""
981 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
983 def _real_initialize(self):
984 # Retrieve disclaimer
985 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
987 self.report_disclaimer()
988 disclaimer = urllib2.urlopen(request).read()
989 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
990 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
996 'submit': "Continue - I'm over 18",
998 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1000 self.report_age_confirmation()
1001 disclaimer = urllib2.urlopen(request).read()
1002 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1003 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1006 def _real_extract(self, url):
1007 # Extract id and simplified title from URL
1008 mobj = re.match(self._VALID_URL, url)
1010 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1013 video_id = mobj.group(1)
1015 # Check if video comes from YouTube
1016 mobj2 = re.match(r'^yt-(.*)$', video_id)
1017 if mobj2 is not None:
1018 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1021 # At this point we have a new video
1022 self._downloader.increment_downloads()
1024 simple_title = mobj.group(2).decode('utf-8')
1025 video_extension = 'flv'
1027 # Retrieve video webpage to extract further information
1028 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1030 self.report_download_webpage(video_id)
1031 webpage = urllib2.urlopen(request).read()
1032 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1033 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1036 # Extract URL, uploader and title from webpage
1037 self.report_extraction(video_id)
1038 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1040 self._downloader.trouble(u'ERROR: unable to extract media URL')
1042 mediaURL = urllib.unquote(mobj.group(1))
1044 # Extract gdaKey if available
1045 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1047 video_url = mediaURL
1048 #self._downloader.trouble(u'ERROR: unable to extract gdaKey')
1051 gdaKey = mobj.group(1)
1052 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1054 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1056 self._downloader.trouble(u'ERROR: unable to extract title')
1058 video_title = mobj.group(1).decode('utf-8')
1059 video_title = sanitize_title(video_title)
1061 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1063 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1065 video_uploader = mobj.group(1)
1068 # Process video information
1069 self._downloader.process_info({
1070 'id': video_id.decode('utf-8'),
1071 'url': video_url.decode('utf-8'),
1072 'uploader': video_uploader.decode('utf-8'),
1073 'title': video_title,
1074 'stitle': simple_title,
1075 'ext': video_extension.decode('utf-8'),
1079 except UnavailableVideoError:
1080 self._downloader.trouble(u'ERROR: unable to download video')
1083 class DailymotionIE(InfoExtractor):
1084 """Information Extractor for Dailymotion"""
1086 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1088 def __init__(self, downloader=None):
1089 InfoExtractor.__init__(self, downloader)
1093 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1095 def report_download_webpage(self, video_id):
1096 """Report webpage download."""
1097 self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id)
1099 def report_extraction(self, video_id):
1100 """Report information extraction."""
1101 self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id)
1103 def _real_initialize(self):
1106 def _real_extract(self, url):
1107 # Extract id and simplified title from URL
1108 mobj = re.match(self._VALID_URL, url)
1110 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1113 # At this point we have a new video
1114 self._downloader.increment_downloads()
1115 video_id = mobj.group(1)
1117 simple_title = mobj.group(2).decode('utf-8')
1118 video_extension = 'flv'
1120 # Retrieve video webpage to extract further information
1121 request = urllib2.Request(url)
1123 self.report_download_webpage(video_id)
1124 webpage = urllib2.urlopen(request).read()
1125 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1126 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1129 # Extract URL, uploader and title from webpage
1130 self.report_extraction(video_id)
1131 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1133 self._downloader.trouble(u'ERROR: unable to extract media URL')
1135 mediaURL = urllib.unquote(mobj.group(1))
1137 # if needed add http://www.dailymotion.com/ if relative URL
1139 video_url = mediaURL
1141 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1142 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1144 self._downloader.trouble(u'ERROR: unable to extract title')
1146 video_title = mobj.group(1).decode('utf-8')
1147 video_title = sanitize_title(video_title)
1149 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a></div>', webpage)
1151 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1153 video_uploader = mobj.group(1)
1156 # Process video information
1157 self._downloader.process_info({
1158 'id': video_id.decode('utf-8'),
1159 'url': video_url.decode('utf-8'),
1160 'uploader': video_uploader.decode('utf-8'),
1161 'title': video_title,
1162 'stitle': simple_title,
1163 'ext': video_extension.decode('utf-8'),
1167 except UnavailableVideoError:
1168 self._downloader.trouble(u'ERROR: unable to download video')
1170 class GoogleIE(InfoExtractor):
1171 """Information extractor for video.google.com."""
1173 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1175 def __init__(self, downloader=None):
1176 InfoExtractor.__init__(self, downloader)
1180 return (re.match(GoogleIE._VALID_URL, url) is not None)
1182 def report_download_webpage(self, video_id):
1183 """Report webpage download."""
1184 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1186 def report_extraction(self, video_id):
1187 """Report information extraction."""
1188 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1190 def _real_initialize(self):
1193 def _real_extract(self, url):
1194 # Extract id from URL
1195 mobj = re.match(self._VALID_URL, url)
1197 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1200 # At this point we have a new video
1201 self._downloader.increment_downloads()
1202 video_id = mobj.group(1)
1204 video_extension = 'mp4'
1206 # Retrieve video webpage to extract further information
1207 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1209 self.report_download_webpage(video_id)
1210 webpage = urllib2.urlopen(request).read()
1211 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1212 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1215 # Extract URL, uploader, and title from webpage
1216 self.report_extraction(video_id)
1217 mobj = re.search(r"download_url:'([^']+)'", webpage)
1219 video_extension = 'flv'
1220 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1222 self._downloader.trouble(u'ERROR: unable to extract media URL')
1224 mediaURL = urllib.unquote(mobj.group(1))
1225 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1226 mediaURL = mediaURL.replace('\\x26', '\x26')
1228 video_url = mediaURL
1230 mobj = re.search(r'<title>(.*)</title>', webpage)
1232 self._downloader.trouble(u'ERROR: unable to extract title')
1234 video_title = mobj.group(1).decode('utf-8')
1235 video_title = sanitize_title(video_title)
1236 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1238 # Extract video description
1239 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1241 self._downloader.trouble(u'ERROR: unable to extract video description')
1243 video_description = mobj.group(1).decode('utf-8')
1244 if not video_description:
1245 video_description = 'No description available.'
1247 # Extract video thumbnail
1248 if self._downloader.params.get('forcethumbnail', False):
1249 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1251 webpage = urllib2.urlopen(request).read()
1252 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1253 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1255 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1257 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1259 video_thumbnail = mobj.group(1)
1260 else: # we need something to pass to process_info
1261 video_thumbnail = ''
1265 # Process video information
1266 self._downloader.process_info({
1267 'id': video_id.decode('utf-8'),
1268 'url': video_url.decode('utf-8'),
1270 'title': video_title,
1271 'stitle': simple_title,
1272 'ext': video_extension.decode('utf-8'),
1276 except UnavailableVideoError:
1277 self._downloader.trouble(u'ERROR: unable to download video')
1280 class PhotobucketIE(InfoExtractor):
1281 """Information extractor for photobucket.com."""
1283 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1285 def __init__(self, downloader=None):
1286 InfoExtractor.__init__(self, downloader)
1290 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1292 def report_download_webpage(self, video_id):
1293 """Report webpage download."""
1294 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1296 def report_extraction(self, video_id):
1297 """Report information extraction."""
1298 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1300 def _real_initialize(self):
1303 def _real_extract(self, url):
1304 # Extract id from URL
1305 mobj = re.match(self._VALID_URL, url)
1307 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1310 # At this point we have a new video
1311 self._downloader.increment_downloads()
1312 video_id = mobj.group(1)
1314 video_extension = 'flv'
1316 # Retrieve video webpage to extract further information
1317 request = urllib2.Request(url)
1319 self.report_download_webpage(video_id)
1320 webpage = urllib2.urlopen(request).read()
1321 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1322 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1325 # Extract URL, uploader, and title from webpage
1326 self.report_extraction(video_id)
1327 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1329 self._downloader.trouble(u'ERROR: unable to extract media URL')
1331 mediaURL = urllib.unquote(mobj.group(1))
1333 video_url = mediaURL
1335 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1337 self._downloader.trouble(u'ERROR: unable to extract title')
1339 video_title = mobj.group(1).decode('utf-8')
1340 video_title = sanitize_title(video_title)
1341 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1343 video_uploader = mobj.group(2).decode('utf-8')
1346 # Process video information
1347 self._downloader.process_info({
1348 'id': video_id.decode('utf-8'),
1349 'url': video_url.decode('utf-8'),
1350 'uploader': video_uploader,
1351 'title': video_title,
1352 'stitle': simple_title,
1353 'ext': video_extension.decode('utf-8'),
1357 except UnavailableVideoError:
1358 self._downloader.trouble(u'ERROR: unable to download video')
1361 class YahooIE(InfoExtractor):
1362 """Information extractor for video.yahoo.com."""
1364 # _VALID_URL matches all Yahoo! Video URLs
1365 # _VPAGE_URL matches only the extractable '/watch/' URLs
1366 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1367 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1369 def __init__(self, downloader=None):
1370 InfoExtractor.__init__(self, downloader)
1374 return (re.match(YahooIE._VALID_URL, url) is not None)
1376 def report_download_webpage(self, video_id):
1377 """Report webpage download."""
1378 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1380 def report_extraction(self, video_id):
1381 """Report information extraction."""
1382 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1384 def _real_initialize(self):
1387 def _real_extract(self, url, new_video=True):
1388 # Extract ID from URL
1389 mobj = re.match(self._VALID_URL, url)
1391 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1394 # At this point we have a new video
1395 self._downloader.increment_downloads()
1396 video_id = mobj.group(2)
1397 video_extension = 'flv'
1399 # Rewrite valid but non-extractable URLs as
1400 # extractable English language /watch/ URLs
1401 if re.match(self._VPAGE_URL, url) is None:
1402 request = urllib2.Request(url)
1404 webpage = urllib2.urlopen(request).read()
1405 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1406 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1409 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1411 self._downloader.trouble(u'ERROR: Unable to extract id field')
1413 yahoo_id = mobj.group(1)
1415 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1417 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1419 yahoo_vid = mobj.group(1)
1421 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1422 return self._real_extract(url, new_video=False)
1424 # Retrieve video webpage to extract further information
1425 request = urllib2.Request(url)
1427 self.report_download_webpage(video_id)
1428 webpage = urllib2.urlopen(request).read()
1429 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1430 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1433 # Extract uploader and title from webpage
1434 self.report_extraction(video_id)
1435 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1437 self._downloader.trouble(u'ERROR: unable to extract video title')
1439 video_title = mobj.group(1).decode('utf-8')
1440 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1442 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1444 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1446 video_uploader = mobj.group(1).decode('utf-8')
1448 # Extract video thumbnail
1449 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1451 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1453 video_thumbnail = mobj.group(1).decode('utf-8')
1455 # Extract video description
1456 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1458 self._downloader.trouble(u'ERROR: unable to extract video description')
1460 video_description = mobj.group(1).decode('utf-8')
1461 if not video_description: video_description = 'No description available.'
1463 # Extract video height and width
1464 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1466 self._downloader.trouble(u'ERROR: unable to extract video height')
1468 yv_video_height = mobj.group(1)
1470 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1472 self._downloader.trouble(u'ERROR: unable to extract video width')
1474 yv_video_width = mobj.group(1)
1476 # Retrieve video playlist to extract media URL
1477 # I'm not completely sure what all these options are, but we
1478 # seem to need most of them, otherwise the server sends a 401.
1479 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1480 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1481 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1482 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1483 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1485 self.report_download_webpage(video_id)
1486 webpage = urllib2.urlopen(request).read()
1487 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1488 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1491 # Extract media URL from playlist XML
1492 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1494 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1496 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1497 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1500 # Process video information
1501 self._downloader.process_info({
1502 'id': video_id.decode('utf-8'),
1504 'uploader': video_uploader,
1505 'title': video_title,
1506 'stitle': simple_title,
1507 'ext': video_extension.decode('utf-8'),
1508 'thumbnail': video_thumbnail.decode('utf-8'),
1509 'description': video_description,
1510 'thumbnail': video_thumbnail,
1511 'description': video_description,
1514 except UnavailableVideoError:
1515 self._downloader.trouble(u'ERROR: unable to download video')
1518 class GenericIE(InfoExtractor):
1519 """Generic last-resort information extractor."""
1521 def __init__(self, downloader=None):
1522 InfoExtractor.__init__(self, downloader)
1528 def report_download_webpage(self, video_id):
1529 """Report webpage download."""
1530 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1531 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1533 def report_extraction(self, video_id):
1534 """Report information extraction."""
1535 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1537 def _real_initialize(self):
1540 def _real_extract(self, url):
1541 # At this point we have a new video
1542 self._downloader.increment_downloads()
1544 video_id = url.split('/')[-1]
1545 request = urllib2.Request(url)
1547 self.report_download_webpage(video_id)
1548 webpage = urllib2.urlopen(request).read()
1549 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1550 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1552 except ValueError, err:
1553 # since this is the last-resort InfoExtractor, if
1554 # this error is thrown, it'll be thrown here
1555 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1558 # Start with something easy: JW Player in SWFObject
1559 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1561 # Broaden the search a little bit
1562 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1564 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1567 # It's possible that one of the regexes
1568 # matched, but returned an empty group:
1569 if mobj.group(1) is None:
1570 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1573 video_url = urllib.unquote(mobj.group(1))
1574 video_id = os.path.basename(video_url)
1576 # here's a fun little line of code for you:
1577 video_extension = os.path.splitext(video_id)[1][1:]
1578 video_id = os.path.splitext(video_id)[0]
1580 # it's tempting to parse this further, but you would
1581 # have to take into account all the variations like
1582 # Video Title - Site Name
1583 # Site Name | Video Title
1584 # Video Title - Tagline | Site Name
1585 # and so on and so forth; it's just not practical
1586 mobj = re.search(r'<title>(.*)</title>', webpage)
1588 self._downloader.trouble(u'ERROR: unable to extract title')
1590 video_title = mobj.group(1).decode('utf-8')
1591 video_title = sanitize_title(video_title)
1592 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1594 # video uploader is domain name
1595 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1597 self._downloader.trouble(u'ERROR: unable to extract title')
1599 video_uploader = mobj.group(1).decode('utf-8')
1602 # Process video information
1603 self._downloader.process_info({
1604 'id': video_id.decode('utf-8'),
1605 'url': video_url.decode('utf-8'),
1606 'uploader': video_uploader,
1607 'title': video_title,
1608 'stitle': simple_title,
1609 'ext': video_extension.decode('utf-8'),
1613 except UnavailableVideoError, err:
1614 self._downloader.trouble(u'ERROR: unable to download video')
1617 class YoutubeSearchIE(InfoExtractor):
1618 """Information Extractor for YouTube search queries."""
1619 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1620 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1621 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1622 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1624 _max_youtube_results = 1000
1626 def __init__(self, youtube_ie, downloader=None):
1627 InfoExtractor.__init__(self, downloader)
1628 self._youtube_ie = youtube_ie
1632 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1634 def report_download_page(self, query, pagenum):
1635 """Report attempt to download playlist page with given number."""
1636 query = query.decode(preferredencoding())
1637 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1639 def _real_initialize(self):
1640 self._youtube_ie.initialize()
1642 def _real_extract(self, query):
1643 mobj = re.match(self._VALID_QUERY, query)
1645 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1648 prefix, query = query.split(':')
1650 query = query.encode('utf-8')
1652 self._download_n_results(query, 1)
1654 elif prefix == 'all':
1655 self._download_n_results(query, self._max_youtube_results)
1661 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1663 elif n > self._max_youtube_results:
1664 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1665 n = self._max_youtube_results
1666 self._download_n_results(query, n)
1668 except ValueError: # parsing prefix as integer fails
1669 self._download_n_results(query, 1)
1672 def _download_n_results(self, query, n):
1673 """Downloads a specified number of results for a query"""
1676 already_seen = set()
1680 self.report_download_page(query, pagenum)
1681 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1682 request = urllib2.Request(result_url, None, std_headers)
1684 page = urllib2.urlopen(request).read()
1685 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1686 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1689 # Extract video identifiers
1690 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1691 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1692 if video_id not in already_seen:
1693 video_ids.append(video_id)
1694 already_seen.add(video_id)
1695 if len(video_ids) == n:
1696 # Specified n videos reached
1697 for id in video_ids:
1698 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1701 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1702 for id in video_ids:
1703 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1706 pagenum = pagenum + 1
1708 class GoogleSearchIE(InfoExtractor):
1709 """Information Extractor for Google Video search queries."""
1710 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1711 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1712 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1713 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1715 _max_google_results = 1000
1717 def __init__(self, google_ie, downloader=None):
1718 InfoExtractor.__init__(self, downloader)
1719 self._google_ie = google_ie
1723 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1725 def report_download_page(self, query, pagenum):
1726 """Report attempt to download playlist page with given number."""
1727 query = query.decode(preferredencoding())
1728 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1730 def _real_initialize(self):
1731 self._google_ie.initialize()
1733 def _real_extract(self, query):
1734 mobj = re.match(self._VALID_QUERY, query)
1736 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1739 prefix, query = query.split(':')
1741 query = query.encode('utf-8')
1743 self._download_n_results(query, 1)
1745 elif prefix == 'all':
1746 self._download_n_results(query, self._max_google_results)
1752 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1754 elif n > self._max_google_results:
1755 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1756 n = self._max_google_results
1757 self._download_n_results(query, n)
1759 except ValueError: # parsing prefix as integer fails
1760 self._download_n_results(query, 1)
1763 def _download_n_results(self, query, n):
1764 """Downloads a specified number of results for a query"""
1767 already_seen = set()
1771 self.report_download_page(query, pagenum)
1772 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1773 request = urllib2.Request(result_url, None, std_headers)
1775 page = urllib2.urlopen(request).read()
1776 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1777 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1780 # Extract video identifiers
1781 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1782 video_id = mobj.group(1)
1783 if video_id not in already_seen:
1784 video_ids.append(video_id)
1785 already_seen.add(video_id)
1786 if len(video_ids) == n:
1787 # Specified n videos reached
1788 for id in video_ids:
1789 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1792 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1793 for id in video_ids:
1794 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1797 pagenum = pagenum + 1
1799 class YahooSearchIE(InfoExtractor):
1800 """Information Extractor for Yahoo! Video search queries."""
1801 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1802 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1803 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1804 _MORE_PAGES_INDICATOR = r'\s*Next'
1806 _max_yahoo_results = 1000
1808 def __init__(self, yahoo_ie, downloader=None):
1809 InfoExtractor.__init__(self, downloader)
1810 self._yahoo_ie = yahoo_ie
1814 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1816 def report_download_page(self, query, pagenum):
1817 """Report attempt to download playlist page with given number."""
1818 query = query.decode(preferredencoding())
1819 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1821 def _real_initialize(self):
1822 self._yahoo_ie.initialize()
1824 def _real_extract(self, query):
1825 mobj = re.match(self._VALID_QUERY, query)
1827 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1830 prefix, query = query.split(':')
1832 query = query.encode('utf-8')
1834 self._download_n_results(query, 1)
1836 elif prefix == 'all':
1837 self._download_n_results(query, self._max_yahoo_results)
1843 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1845 elif n > self._max_yahoo_results:
1846 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
1847 n = self._max_yahoo_results
1848 self._download_n_results(query, n)
1850 except ValueError: # parsing prefix as integer fails
1851 self._download_n_results(query, 1)
1854 def _download_n_results(self, query, n):
1855 """Downloads a specified number of results for a query"""
1858 already_seen = set()
1862 self.report_download_page(query, pagenum)
1863 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1864 request = urllib2.Request(result_url, None, std_headers)
1866 page = urllib2.urlopen(request).read()
1867 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1868 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1871 # Extract video identifiers
1872 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1873 video_id = mobj.group(1)
1874 if video_id not in already_seen:
1875 video_ids.append(video_id)
1876 already_seen.add(video_id)
1877 if len(video_ids) == n:
1878 # Specified n videos reached
1879 for id in video_ids:
1880 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1883 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1884 for id in video_ids:
1885 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1888 pagenum = pagenum + 1
1890 class YoutubePlaylistIE(InfoExtractor):
1891 """Information Extractor for YouTube playlists."""
1893 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1894 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1895 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1896 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1899 def __init__(self, youtube_ie, downloader=None):
1900 InfoExtractor.__init__(self, downloader)
1901 self._youtube_ie = youtube_ie
1905 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1907 def report_download_page(self, playlist_id, pagenum):
1908 """Report attempt to download playlist page with given number."""
1909 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1911 def _real_initialize(self):
1912 self._youtube_ie.initialize()
1914 def _real_extract(self, url):
1915 # Extract playlist id
1916 mobj = re.match(self._VALID_URL, url)
1918 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1921 # Download playlist pages
1922 playlist_id = mobj.group(1)
1927 self.report_download_page(playlist_id, pagenum)
1928 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1930 page = urllib2.urlopen(request).read()
1931 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1932 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1935 # Extract video identifiers
1937 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1938 if mobj.group(1) not in ids_in_page:
1939 ids_in_page.append(mobj.group(1))
1940 video_ids.extend(ids_in_page)
1942 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1944 pagenum = pagenum + 1
1946 playliststart = self._downloader.params.get('playliststart', 1)
1947 playliststart -= 1 #our arrays are zero-based but the playlist is 1-based
1948 if playliststart > 0:
1949 video_ids = video_ids[playliststart:]
1951 for id in video_ids:
1952 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1955 class YoutubeUserIE(InfoExtractor):
1956 """Information Extractor for YouTube users."""
1958 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1959 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1960 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1963 def __init__(self, youtube_ie, downloader=None):
1964 InfoExtractor.__init__(self, downloader)
1965 self._youtube_ie = youtube_ie
1969 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1971 def report_download_page(self, username):
1972 """Report attempt to download user page."""
1973 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1975 def _real_initialize(self):
1976 self._youtube_ie.initialize()
1978 def _real_extract(self, url):
1980 mobj = re.match(self._VALID_URL, url)
1982 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1985 # Download user page
1986 username = mobj.group(1)
1990 self.report_download_page(username)
1991 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1993 page = urllib2.urlopen(request).read()
1994 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1995 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1998 # Extract video identifiers
2001 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2002 if mobj.group(1) not in ids_in_page:
2003 ids_in_page.append(mobj.group(1))
2004 video_ids.extend(ids_in_page)
2006 playliststart = self._downloader.params.get('playliststart', 1)
2007 playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based
2008 if playliststart > 0:
2009 video_ids = video_ids[playliststart:]
2011 for id in video_ids:
2012 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2015 class PostProcessor(object):
2016 """Post Processor class.
2018 PostProcessor objects can be added to downloaders with their
2019 add_post_processor() method. When the downloader has finished a
2020 successful download, it will take its internal chain of PostProcessors
2021 and start calling the run() method on each one of them, first with
2022 an initial argument and then with the returned value of the previous
2025 The chain will be stopped if one of them ever returns None or the end
2026 of the chain is reached.
2028 PostProcessor objects follow a "mutual registration" process similar
2029 to InfoExtractor objects.
2034 def __init__(self, downloader=None):
2035 self._downloader = downloader
2037 def set_downloader(self, downloader):
2038 """Sets the downloader for this PP."""
2039 self._downloader = downloader
2041 def run(self, information):
2042 """Run the PostProcessor.
2044 The "information" argument is a dictionary like the ones
2045 composed by InfoExtractors. The only difference is that this
2046 one has an extra field called "filepath" that points to the
2049 When this method returns None, the postprocessing chain is
2050 stopped. However, this method may return an information
2051 dictionary that will be passed to the next postprocessing
2052 object in the chain. It can be the one it received after
2053 changing some fields.
2055 In addition, this method may raise a PostProcessingError
2056 exception that will be taken into account by the downloader
2059 return information # by default, do nothing
2061 ### MAIN PROGRAM ###
2062 if __name__ == '__main__':
2064 # Modules needed only when running the main program
2068 # Function to update the program file with the latest version from bitbucket.org
2069 def update_self(downloader, filename):
2070 # Note: downloader only used for options
2071 if not os.access (filename, os.W_OK):
2072 sys.exit('ERROR: no write permissions on %s' % filename)
2074 downloader.to_stdout('Updating to latest stable version...')
2075 latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
2076 latest_version = urllib.urlopen(latest_url).read().strip()
2077 prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2078 newcontent = urllib.urlopen(prog_url).read()
2079 stream = open(filename, 'w')
2080 stream.write(newcontent)
2082 downloader.to_stdout('Updated to version %s' % latest_version)
2084 # General configuration
2085 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2086 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
2087 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2089 # Parse command line
2090 parser = optparse.OptionParser(
2091 usage='Usage: %prog [options] url...',
2092 version='2010.08.04',
2093 conflict_handler='resolve',
2096 parser.add_option('-h', '--help',
2097 action='help', help='print this help text and exit')
2098 parser.add_option('-v', '--version',
2099 action='version', help='print program version and exit')
2100 parser.add_option('-U', '--update',
2101 action='store_true', dest='update_self', help='update this program to latest stable version')
2102 parser.add_option('-i', '--ignore-errors',
2103 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2104 parser.add_option('-r', '--rate-limit',
2105 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2106 parser.add_option('-R', '--retries',
2107 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2108 parser.add_option('--playlist-start',
2109 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2111 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2112 authentication.add_option('-u', '--username',
2113 dest='username', metavar='USERNAME', help='account username')
2114 authentication.add_option('-p', '--password',
2115 dest='password', metavar='PASSWORD', help='account password')
2116 authentication.add_option('-n', '--netrc',
2117 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2118 parser.add_option_group(authentication)
2120 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2121 video_format.add_option('-f', '--format',
2122 action='store', dest='format', metavar='FORMAT', help='video format code')
2123 video_format.add_option('-m', '--mobile-version',
2124 action='store_const', dest='format', help='alias for -f 17', const='17')
2125 video_format.add_option('--all-formats',
2126 action='store_const', dest='format', help='download all available video formats', const='-1')
2127 video_format.add_option('--max-quality',
2128 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2129 video_format.add_option('-b', '--best-quality',
2130 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2131 parser.add_option_group(video_format)
2133 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2134 verbosity.add_option('-q', '--quiet',
2135 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2136 verbosity.add_option('-s', '--simulate',
2137 action='store_true', dest='simulate', help='do not download video', default=False)
2138 verbosity.add_option('-g', '--get-url',
2139 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2140 verbosity.add_option('-e', '--get-title',
2141 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2142 verbosity.add_option('--get-thumbnail',
2143 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2144 verbosity.add_option('--get-description',
2145 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2146 verbosity.add_option('--no-progress',
2147 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2148 parser.add_option_group(verbosity)
2150 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2151 filesystem.add_option('-t', '--title',
2152 action='store_true', dest='usetitle', help='use title in file name', default=False)
2153 filesystem.add_option('-l', '--literal',
2154 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2155 filesystem.add_option('-o', '--output',
2156 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2157 filesystem.add_option('-a', '--batch-file',
2158 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2159 filesystem.add_option('-w', '--no-overwrites',
2160 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2161 filesystem.add_option('-c', '--continue',
2162 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2163 parser.add_option_group(filesystem)
2165 (opts, args) = parser.parse_args()
2167 # Batch file verification
2169 if opts.batchfile is not None:
2171 if opts.batchfile == '-':
2174 batchfd = open(opts.batchfile, 'r')
2175 batchurls = batchfd.readlines()
2176 batchurls = [x.strip() for x in batchurls]
2177 batchurls = [x for x in batchurls if len(x) > 0]
2179 sys.exit(u'ERROR: batch file could not be read')
2180 all_urls = batchurls + args
2182 # Conflicting, missing and erroneous options
2183 if opts.bestquality:
2184 print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2185 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2186 parser.error(u'using .netrc conflicts with giving username/password')
2187 if opts.password is not None and opts.username is None:
2188 parser.error(u'account username missing')
2189 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
2190 parser.error(u'using output template conflicts with using title or literal title')
2191 if opts.usetitle and opts.useliteral:
2192 parser.error(u'using title conflicts with using literal title')
2193 if opts.username is not None and opts.password is None:
2194 opts.password = getpass.getpass(u'Type account password and press return:')
2195 if opts.ratelimit is not None:
2196 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2197 if numeric_limit is None:
2198 parser.error(u'invalid rate limit specified')
2199 opts.ratelimit = numeric_limit
2200 if opts.retries is not None:
2202 opts.retries = long(opts.retries)
2203 except (TypeError, ValueError), err:
2204 parser.error(u'invalid retry count specified')
2205 if opts.playliststart is not None:
2207 opts.playliststart = long(opts.playliststart)
2208 except (TypeError, ValueError), err:
2209 parser.error(u'invalid playlist page specified')
2211 # Information extractors
2212 youtube_ie = YoutubeIE()
2213 metacafe_ie = MetacafeIE(youtube_ie)
2214 dailymotion_ie = DailymotionIE()
2215 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2216 youtube_user_ie = YoutubeUserIE(youtube_ie)
2217 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2218 google_ie = GoogleIE()
2219 google_search_ie = GoogleSearchIE(google_ie)
2220 photobucket_ie = PhotobucketIE()
2221 yahoo_ie = YahooIE()
2222 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2223 generic_ie = GenericIE()
2226 fd = FileDownloader({
2227 'usenetrc': opts.usenetrc,
2228 'username': opts.username,
2229 'password': opts.password,
2230 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2231 'forceurl': opts.geturl,
2232 'forcetitle': opts.gettitle,
2233 'forcethumbnail': opts.getthumbnail,
2234 'forcedescription': opts.getdescription,
2235 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2236 'format': opts.format,
2237 'format_limit': opts.format_limit,
2238 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2239 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2240 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2241 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2242 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2243 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2244 or u'%(id)s.%(ext)s'),
2245 'ignoreerrors': opts.ignoreerrors,
2246 'ratelimit': opts.ratelimit,
2247 'nooverwrites': opts.nooverwrites,
2248 'retries': opts.retries,
2249 'continuedl': opts.continue_dl,
2250 'noprogress': opts.noprogress,
2251 'playliststart': opts.playliststart,
2253 fd.add_info_extractor(youtube_search_ie)
2254 fd.add_info_extractor(youtube_pl_ie)
2255 fd.add_info_extractor(youtube_user_ie)
2256 fd.add_info_extractor(metacafe_ie)
2257 fd.add_info_extractor(dailymotion_ie)
2258 fd.add_info_extractor(youtube_ie)
2259 fd.add_info_extractor(google_ie)
2260 fd.add_info_extractor(google_search_ie)
2261 fd.add_info_extractor(photobucket_ie)
2262 fd.add_info_extractor(yahoo_ie)
2263 fd.add_info_extractor(yahoo_search_ie)
2265 # This must come last since it's the
2266 # fallback if none of the others work
2267 fd.add_info_extractor(generic_ie)
2270 if opts.update_self:
2271 update_self(fd, sys.argv[0])
2274 if len(all_urls) < 1:
2275 if not opts.update_self:
2276 parser.error(u'you must provide at least one URL')
2279 retcode = fd.download(all_urls)
2282 except DownloadError:
2284 except SameFileError:
2285 sys.exit(u'ERROR: fixed output name but more than one file to download')
2286 except KeyboardInterrupt:
2287 sys.exit(u'\nERROR: Interrupted by user')