2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
23 # parse_qs was moved from the cgi module to the urlparse module recently.
25 from urlparse import parse_qs
27 from cgi import parse_qs
30 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8',
31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
33 'Accept-Language': 'en-us,en;q=0.5',
36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
38 def preferredencoding():
39 """Get preferred encoding.
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
44 def yield_preferredencoding():
46 pref = locale.getpreferredencoding()
52 return yield_preferredencoding().next()
54 def htmlentity_transform(matchobj):
55 """Transforms an HTML entity to a Unicode character.
57 This function receives a match object and is intended to be used with
58 the re.sub() function.
60 entity = matchobj.group(1)
62 # Known non-numeric HTML entity
63 if entity in htmlentitydefs.name2codepoint:
64 return unichr(htmlentitydefs.name2codepoint[entity])
67 mobj = re.match(ur'(?u)#(x?\d+)', entity)
69 numstr = mobj.group(1)
70 if numstr.startswith(u'x'):
72 numstr = u'0%s' % numstr
75 return unichr(long(numstr, base))
77 # Unknown entity in name, return its literal representation
78 return (u'&%s;' % entity)
80 def sanitize_title(utitle):
81 """Sanitizes a video title so it could be used as part of a filename."""
82 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
83 return utitle.replace(unicode(os.sep), u'%')
85 def sanitize_open(filename, open_mode):
86 """Try to open the given filename, and slightly tweak it if this fails.
88 Attempts to open the given filename. If this fails, it tries to change
89 the filename slightly, step by step, until it's either able to open it
90 or it fails and raises a final exception, like the standard open()
93 It returns the tuple (stream, definitive_file_name).
97 return (sys.stdout, filename)
98 stream = open(filename, open_mode)
99 return (stream, filename)
100 except (IOError, OSError), err:
101 # In case of error, try to remove win32 forbidden chars
102 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
104 # An exception here should be caught in the caller
105 stream = open(filename, open_mode)
106 return (stream, filename)
109 class DownloadError(Exception):
110 """Download Error exception.
112 This exception may be thrown by FileDownloader objects if they are not
113 configured to continue on errors. They will contain the appropriate
118 class SameFileError(Exception):
119 """Same File exception.
121 This exception will be thrown by FileDownloader objects if they detect
122 multiple files would have to be downloaded to the same file on disk.
126 class PostProcessingError(Exception):
127 """Post Processing exception.
129 This exception may be raised by PostProcessor's .run() method to
130 indicate an error in the postprocessing task.
134 class UnavailableVideoError(Exception):
135 """Unavailable Format exception.
137 This exception will be thrown when a video is requested
138 in a format that is not available for that video.
142 class ContentTooShortError(Exception):
143 """Content Too Short exception.
145 This exception may be raised by FileDownloader objects when a file they
146 download is too small for what the server announced first, indicating
147 the connection was probably interrupted.
153 def __init__(self, downloaded, expected):
154 self.downloaded = downloaded
155 self.expected = expected
157 class FileDownloader(object):
158 """File Downloader class.
160 File downloader objects are the ones responsible of downloading the
161 actual video file and writing it to disk if the user has requested
162 it, among some other tasks. In most cases there should be one per
163 program. As, given a video URL, the downloader doesn't know how to
164 extract all the needed information, task that InfoExtractors do, it
165 has to pass the URL to one of them.
167 For this, file downloader objects have a method that allows
168 InfoExtractors to be registered in a given order. When it is passed
169 a URL, the file downloader handles it to the first InfoExtractor it
170 finds that reports being able to handle it. The InfoExtractor extracts
171 all the information about the video or videos the URL refers to, and
172 asks the FileDownloader to process the video information, possibly
173 downloading the video.
175 File downloaders accept a lot of parameters. In order not to saturate
176 the object constructor with arguments, it receives a dictionary of
177 options instead. These options are available through the params
178 attribute for the InfoExtractors to use. The FileDownloader also
179 registers itself as the downloader in charge for the InfoExtractors
180 that are added to it, so this is a "mutual registration".
184 username: Username for authentication purposes.
185 password: Password for authentication purposes.
186 usenetrc: Use netrc for authentication instead.
187 quiet: Do not print messages to stdout.
188 forceurl: Force printing final URL.
189 forcetitle: Force printing title.
190 simulate: Do not download the video files.
191 format: Video format code.
192 format_limit: Highest quality format to try.
193 outtmpl: Template for output names.
194 ignoreerrors: Do not stop on download errors.
195 ratelimit: Download speed limit, in bytes/sec.
196 nooverwrites: Prevent overwriting files.
197 retries: Number of times to retry for HTTP error 503
198 continuedl: Try to continue downloads if possible.
199 noprogress: Do not print the progress bar.
205 _download_retcode = None
206 _num_downloads = None
208 def __init__(self, params):
209 """Create a FileDownloader object with the given options."""
212 self._download_retcode = 0
213 self._num_downloads = 0
217 def pmkdir(filename):
218 """Create directory components in filename. Similar to Unix "mkdir -p"."""
219 components = filename.split(os.sep)
220 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
221 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
222 for dir in aggregate:
223 if not os.path.exists(dir):
227 def format_bytes(bytes):
230 if type(bytes) is str:
235 exponent = long(math.log(bytes, 1024.0))
236 suffix = 'bkMGTPEZY'[exponent]
237 converted = float(bytes) / float(1024**exponent)
238 return '%.2f%s' % (converted, suffix)
241 def calc_percent(byte_counter, data_len):
244 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
247 def calc_eta(start, now, total, current):
251 if current == 0 or dif < 0.001: # One millisecond
253 rate = float(current) / dif
254 eta = long((float(total) - float(current)) / rate)
255 (eta_mins, eta_secs) = divmod(eta, 60)
258 return '%02d:%02d' % (eta_mins, eta_secs)
261 def calc_speed(start, now, bytes):
263 if bytes == 0 or dif < 0.001: # One millisecond
264 return '%10s' % '---b/s'
265 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
268 def best_block_size(elapsed_time, bytes):
269 new_min = max(bytes / 2.0, 1.0)
270 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
271 if elapsed_time < 0.001:
273 rate = bytes / elapsed_time
281 def parse_bytes(bytestr):
282 """Parse a string indicating a byte quantity into a long integer."""
283 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
286 number = float(matchobj.group(1))
287 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
288 return long(round(number * multiplier))
290 def add_info_extractor(self, ie):
291 """Add an InfoExtractor object to the end of the list."""
293 ie.set_downloader(self)
295 def add_post_processor(self, pp):
296 """Add a PostProcessor object to the end of the chain."""
298 pp.set_downloader(self)
300 def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
301 """Print message to stdout if not in quiet mode."""
303 if not self.params.get('quiet', False):
304 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
306 except (UnicodeEncodeError), err:
307 if not ignore_encoding_errors:
310 def to_stderr(self, message):
311 """Print message to stderr."""
312 print >>sys.stderr, message.encode(preferredencoding())
314 def fixed_template(self):
315 """Checks if the output template is fixed."""
316 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
318 def trouble(self, message=None):
319 """Determine action to take when a download problem appears.
321 Depending on if the downloader has been configured to ignore
322 download errors or not, this method may throw an exception or
323 not when errors are found, after printing the message.
325 if message is not None:
326 self.to_stderr(message)
327 if not self.params.get('ignoreerrors', False):
328 raise DownloadError(message)
329 self._download_retcode = 1
331 def slow_down(self, start_time, byte_counter):
332 """Sleep if the download speed is over the rate limit."""
333 rate_limit = self.params.get('ratelimit', None)
334 if rate_limit is None or byte_counter == 0:
337 elapsed = now - start_time
340 speed = float(byte_counter) / elapsed
341 if speed > rate_limit:
342 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
344 def report_destination(self, filename):
345 """Report destination filename."""
346 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
348 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
349 """Report download progress."""
350 if self.params.get('noprogress', False):
352 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
353 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
355 def report_resuming_byte(self, resume_len):
356 """Report attempt to resume at given byte."""
357 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
359 def report_retry(self, count, retries):
360 """Report retry in case of HTTP error 503"""
361 self.to_stdout(u'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count, retries))
363 def report_file_already_downloaded(self, file_name):
364 """Report file has already been fully downloaded."""
366 self.to_stdout(u'[download] %s has already been downloaded' % file_name)
367 except (UnicodeEncodeError), err:
368 self.to_stdout(u'[download] The file has already been downloaded')
370 def report_unable_to_resume(self):
371 """Report it was impossible to resume download."""
372 self.to_stdout(u'[download] Unable to resume')
374 def report_finish(self):
375 """Report download finished."""
376 if self.params.get('noprogress', False):
377 self.to_stdout(u'[download] Download completed')
381 def increment_downloads(self):
382 """Increment the ordinal that assigns a number to each file."""
383 self._num_downloads += 1
385 def process_info(self, info_dict):
386 """Process a single dictionary returned by an InfoExtractor."""
387 # Do nothing else if in simulate mode
388 if self.params.get('simulate', False):
390 if self.params.get('forcetitle', False):
391 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
392 if self.params.get('forceurl', False):
393 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
394 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
395 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
396 if self.params.get('forcedescription', False) and 'description' in info_dict:
397 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
402 template_dict = dict(info_dict)
403 template_dict['epoch'] = unicode(long(time.time()))
404 template_dict['ord'] = unicode('%05d' % self._num_downloads)
405 filename = self.params['outtmpl'] % template_dict
406 except (ValueError, KeyError), err:
407 self.trouble(u'ERROR: invalid system charset or erroneous output template')
409 if self.params.get('nooverwrites', False) and os.path.exists(filename):
410 self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
414 self.pmkdir(filename)
415 except (OSError, IOError), err:
416 self.trouble('ERROR: unable to create directories: %s' % str(err))
420 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
421 except (OSError, IOError), err:
422 raise UnavailableVideoError
423 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
424 self.trouble('ERROR: unable to download video data: %s' % str(err))
426 except (ContentTooShortError, ), err:
427 self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
432 self.post_process(filename, info_dict)
433 except (PostProcessingError), err:
434 self.trouble('ERROR: postprocessing: %s' % str(err))
437 def download(self, url_list):
438 """Download a given list of URLs."""
439 if len(url_list) > 1 and self.fixed_template():
440 raise SameFileError(self.params['outtmpl'])
443 suitable_found = False
445 # Go to next InfoExtractor if not suitable
446 if not ie.suitable(url):
449 # Suitable InfoExtractor found
450 suitable_found = True
452 # Extract information from URL and process it
455 # Suitable InfoExtractor had been found; go to next URL
458 if not suitable_found:
459 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
461 return self._download_retcode
463 def post_process(self, filename, ie_info):
464 """Run the postprocessing chain on the given file."""
466 info['filepath'] = filename
472 def _download_with_rtmpdump(self, filename, url, player_url):
473 self.report_destination(filename)
475 # Check for rtmpdump first
477 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
478 except (OSError, IOError):
479 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
482 # Download using rtmpdump. rtmpdump returns exit code 2 when
483 # the connection was interrumpted and resuming appears to be
484 # possible. This is part of rtmpdump's normal usage, AFAIK.
485 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', filename]
486 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
487 while retval == 2 or retval == 1:
488 prevsize = os.path.getsize(filename)
489 self.to_stdout(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
490 time.sleep(5.0) # This seems to be needed
491 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
492 cursize = os.path.getsize(filename)
493 if prevsize == cursize and retval == 1:
496 self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
499 self.trouble('\nERROR: rtmpdump exited with code %d' % retval)
502 def _do_download(self, filename, url, player_url):
503 # Attempt to download using rtmpdump
504 if url.startswith('rtmp'):
505 return self._download_with_rtmpdump(filename, url, player_url)
509 basic_request = urllib2.Request(url, None, std_headers)
510 request = urllib2.Request(url, None, std_headers)
512 # Establish possible resume length
513 if os.path.isfile(filename):
514 resume_len = os.path.getsize(filename)
518 # Request parameters in case of being able to resume
519 if self.params.get('continuedl', False) and resume_len != 0:
520 self.report_resuming_byte(resume_len)
521 request.add_header('Range','bytes=%d-' % resume_len)
525 retries = self.params.get('retries', 0)
526 while count <= retries:
527 # Establish connection
529 data = urllib2.urlopen(request)
531 except (urllib2.HTTPError, ), err:
532 if err.code != 503 and err.code != 416:
533 # Unexpected HTTP error
535 elif err.code == 416:
536 # Unable to resume (requested range not satisfiable)
538 # Open the connection again without the range header
539 data = urllib2.urlopen(basic_request)
540 content_length = data.info()['Content-Length']
541 except (urllib2.HTTPError, ), err:
545 # Examine the reported length
546 if (content_length is not None and
547 (resume_len - 100 < long(content_length) < resume_len + 100)):
548 # The file had already been fully downloaded.
549 # Explanation to the above condition: in issue #175 it was revealed that
550 # YouTube sometimes adds or removes a few bytes from the end of the file,
551 # changing the file size slightly and causing problems for some users. So
552 # I decided to implement a suggested change and consider the file
553 # completely downloaded if the file size differs less than 100 bytes from
554 # the one in the hard drive.
555 self.report_file_already_downloaded(filename)
558 # The length does not match, we start the download over
559 self.report_unable_to_resume()
565 self.report_retry(count, retries)
568 self.trouble(u'ERROR: giving up after %s retries' % retries)
571 data_len = data.info().get('Content-length', None)
572 data_len_str = self.format_bytes(data_len)
579 data_block = data.read(block_size)
581 data_block_len = len(data_block)
582 if data_block_len == 0:
584 byte_counter += data_block_len
586 # Open file just in time
589 (stream, filename) = sanitize_open(filename, open_mode)
590 self.report_destination(filename)
591 except (OSError, IOError), err:
592 self.trouble('ERROR: unable to open for writing: %s' % str(err))
595 stream.write(data_block)
596 except (IOError, OSError), err:
597 self.trouble('\nERROR: unable to write data: %s' % str(err))
598 block_size = self.best_block_size(after - before, data_block_len)
601 percent_str = self.calc_percent(byte_counter, data_len)
602 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
603 speed_str = self.calc_speed(start, time.time(), byte_counter)
604 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
607 self.slow_down(start, byte_counter)
610 if data_len is not None and str(byte_counter) != data_len:
611 raise ContentTooShortError(byte_counter, long(data_len))
614 class InfoExtractor(object):
615 """Information Extractor class.
617 Information extractors are the classes that, given a URL, extract
618 information from the video (or videos) the URL refers to. This
619 information includes the real video URL, the video title and simplified
620 title, author and others. The information is stored in a dictionary
621 which is then passed to the FileDownloader. The FileDownloader
622 processes this information possibly downloading the video to the file
623 system, among other possible outcomes. The dictionaries must include
624 the following fields:
626 id: Video identifier.
627 url: Final video URL.
628 uploader: Nickname of the video uploader.
629 title: Literal title.
630 stitle: Simplified title.
631 ext: Video filename extension.
632 format: Video format.
633 player_url: SWF Player URL (may be None).
635 The following fields are optional. Their primary purpose is to allow
636 youtube-dl to serve as the backend for a video search function, such
637 as the one in youtube2mp3. They are only used when their respective
638 forced printing functions are called:
640 thumbnail: Full URL to a video thumbnail image.
641 description: One-line video description.
643 Subclasses of this one should re-define the _real_initialize() and
644 _real_extract() methods, as well as the suitable() static method.
645 Probably, they should also be instantiated and added to the main
652 def __init__(self, downloader=None):
653 """Constructor. Receives an optional downloader."""
655 self.set_downloader(downloader)
659 """Receives a URL and returns True if suitable for this IE."""
662 def initialize(self):
663 """Initializes an instance (authentication, etc)."""
665 self._real_initialize()
668 def extract(self, url):
669 """Extracts URL information and returns it in list of dicts."""
671 return self._real_extract(url)
673 def set_downloader(self, downloader):
674 """Sets the downloader for this IE."""
675 self._downloader = downloader
677 def _real_initialize(self):
678 """Real initialization process. Redefine in subclasses."""
681 def _real_extract(self, url):
682 """Real extraction process. Redefine in subclasses."""
685 class YoutubeIE(InfoExtractor):
686 """Information extractor for youtube.com."""
688 _VALID_URL = r'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
689 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
690 _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
691 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
692 _NETRC_MACHINE = 'youtube'
693 # Listed in order of quality
694 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
695 _video_extensions = {
701 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
708 return (re.match(YoutubeIE._VALID_URL, url) is not None)
710 def report_lang(self):
711 """Report attempt to set language."""
712 self._downloader.to_stdout(u'[youtube] Setting language')
714 def report_login(self):
715 """Report attempt to log in."""
716 self._downloader.to_stdout(u'[youtube] Logging in')
718 def report_age_confirmation(self):
719 """Report attempt to confirm age."""
720 self._downloader.to_stdout(u'[youtube] Confirming age')
722 def report_video_webpage_download(self, video_id):
723 """Report attempt to download video webpage."""
724 self._downloader.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
726 def report_video_info_webpage_download(self, video_id):
727 """Report attempt to download video info webpage."""
728 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
730 def report_information_extraction(self, video_id):
731 """Report attempt to extract video information."""
732 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
734 def report_unavailable_format(self, video_id, format):
735 """Report extracted video URL."""
736 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
738 def report_rtmp_download(self):
739 """Indicate the download will use the RTMP protocol."""
740 self._downloader.to_stdout(u'[youtube] RTMP download detected')
742 def _real_initialize(self):
743 if self._downloader is None:
748 downloader_params = self._downloader.params
750 # Attempt to use provided username and password or .netrc data
751 if downloader_params.get('username', None) is not None:
752 username = downloader_params['username']
753 password = downloader_params['password']
754 elif downloader_params.get('usenetrc', False):
756 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
761 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
762 except (IOError, netrc.NetrcParseError), err:
763 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
767 request = urllib2.Request(self._LANG_URL, None, std_headers)
770 urllib2.urlopen(request).read()
771 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
772 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
775 # No authentication to be performed
781 'current_form': 'loginForm',
783 'action_login': 'Log In',
784 'username': username,
785 'password': password,
787 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
790 login_results = urllib2.urlopen(request).read()
791 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
792 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
794 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
795 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
801 'action_confirm': 'Confirm',
803 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
805 self.report_age_confirmation()
806 age_results = urllib2.urlopen(request).read()
807 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
808 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
811 def _real_extract(self, url):
812 # Extract video id from URL
813 mobj = re.match(self._VALID_URL, url)
815 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
817 video_id = mobj.group(2)
820 self.report_video_webpage_download(video_id)
821 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id, None, std_headers)
823 video_webpage = urllib2.urlopen(request).read()
824 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
825 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
828 # Attempt to extract SWF player URL
829 mobj = re.search(r'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage)
831 player_url = mobj.group(1)
836 self.report_video_info_webpage_download(video_id)
837 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
838 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
839 % (video_id, el_type))
840 request = urllib2.Request(video_info_url, None, std_headers)
842 video_info_webpage = urllib2.urlopen(request).read()
843 video_info = parse_qs(video_info_webpage)
844 if 'token' in video_info:
846 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
847 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
849 if 'token' not in video_info:
850 if 'reason' in video_info:
851 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0])
853 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
856 # Start extracting information
857 self.report_information_extraction(video_id)
860 if 'author' not in video_info:
861 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
863 video_uploader = urllib.unquote_plus(video_info['author'][0])
866 if 'title' not in video_info:
867 self._downloader.trouble(u'ERROR: unable to extract video title')
869 video_title = urllib.unquote_plus(video_info['title'][0])
870 video_title = video_title.decode('utf-8')
871 video_title = sanitize_title(video_title)
874 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
875 simple_title = simple_title.strip(ur'_')
878 if 'thumbnail_url' not in video_info:
879 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
881 else: # don't panic if we can't find it
882 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
885 video_description = 'No description available.'
886 if self._downloader.params.get('forcedescription', False):
887 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
889 video_description = mobj.group(1)
892 video_token = urllib.unquote_plus(video_info['token'][0])
894 # Decide which formats to download
895 requested_format = self._downloader.params.get('format', None)
896 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
898 if 'fmt_url_map' in video_info:
899 url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
900 format_limit = self._downloader.params.get('format_limit', None)
901 if format_limit is not None and format_limit in self._available_formats:
902 format_list = self._available_formats[self._available_formats.index(format_limit):]
904 format_list = self._available_formats
905 existing_formats = [x for x in format_list if x in url_map]
906 if len(existing_formats) == 0:
907 self._downloader.trouble(u'ERROR: no known formats available for video')
909 if requested_format is None:
910 video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality
911 elif requested_format == '-1':
912 video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats
914 video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format
916 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
917 self.report_rtmp_download()
918 video_url_list = [(None, video_info['conn'][0])]
921 self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
924 for format_param, video_real_url in video_url_list:
925 # At this point we have a new video
926 self._downloader.increment_downloads()
929 video_extension = self._video_extensions.get(format_param, 'flv')
931 # Find the video URL in fmt_url_map or conn paramters
933 # Process video information
934 self._downloader.process_info({
935 'id': video_id.decode('utf-8'),
936 'url': video_real_url.decode('utf-8'),
937 'uploader': video_uploader.decode('utf-8'),
938 'title': video_title,
939 'stitle': simple_title,
940 'ext': video_extension.decode('utf-8'),
941 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
942 'thumbnail': video_thumbnail.decode('utf-8'),
943 'description': video_description.decode('utf-8'),
944 'player_url': player_url,
946 except UnavailableVideoError, err:
947 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
950 class MetacafeIE(InfoExtractor):
951 """Information Extractor for metacafe.com."""
953 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
954 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
955 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
958 def __init__(self, youtube_ie, downloader=None):
959 InfoExtractor.__init__(self, downloader)
960 self._youtube_ie = youtube_ie
964 return (re.match(MetacafeIE._VALID_URL, url) is not None)
966 def report_disclaimer(self):
967 """Report disclaimer retrieval."""
968 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
970 def report_age_confirmation(self):
971 """Report attempt to confirm age."""
972 self._downloader.to_stdout(u'[metacafe] Confirming age')
974 def report_download_webpage(self, video_id):
975 """Report webpage download."""
976 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
978 def report_extraction(self, video_id):
979 """Report information extraction."""
980 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
982 def _real_initialize(self):
983 # Retrieve disclaimer
984 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
986 self.report_disclaimer()
987 disclaimer = urllib2.urlopen(request).read()
988 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
989 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
995 'submit': "Continue - I'm over 18",
997 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
999 self.report_age_confirmation()
1000 disclaimer = urllib2.urlopen(request).read()
1001 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1002 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1005 def _real_extract(self, url):
1006 # Extract id and simplified title from URL
1007 mobj = re.match(self._VALID_URL, url)
1009 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1012 video_id = mobj.group(1)
1014 # Check if video comes from YouTube
1015 mobj2 = re.match(r'^yt-(.*)$', video_id)
1016 if mobj2 is not None:
1017 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1020 # At this point we have a new video
1021 self._downloader.increment_downloads()
1023 simple_title = mobj.group(2).decode('utf-8')
1024 video_extension = 'flv'
1026 # Retrieve video webpage to extract further information
1027 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1029 self.report_download_webpage(video_id)
1030 webpage = urllib2.urlopen(request).read()
1031 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1032 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1035 # Extract URL, uploader and title from webpage
1036 self.report_extraction(video_id)
1037 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1039 self._downloader.trouble(u'ERROR: unable to extract media URL')
1041 mediaURL = urllib.unquote(mobj.group(1))
1043 # Extract gdaKey if available
1044 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1046 video_url = mediaURL
1047 #self._downloader.trouble(u'ERROR: unable to extract gdaKey')
1050 gdaKey = mobj.group(1)
1051 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1053 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1055 self._downloader.trouble(u'ERROR: unable to extract title')
1057 video_title = mobj.group(1).decode('utf-8')
1058 video_title = sanitize_title(video_title)
1060 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1062 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1064 video_uploader = mobj.group(1)
1067 # Process video information
1068 self._downloader.process_info({
1069 'id': video_id.decode('utf-8'),
1070 'url': video_url.decode('utf-8'),
1071 'uploader': video_uploader.decode('utf-8'),
1072 'title': video_title,
1073 'stitle': simple_title,
1074 'ext': video_extension.decode('utf-8'),
1078 except UnavailableVideoError:
1079 self._downloader.trouble(u'ERROR: unable to download video')
1082 class DailymotionIE(InfoExtractor):
1083 """Information Extractor for Dailymotion"""
1085 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1087 def __init__(self, downloader=None):
1088 InfoExtractor.__init__(self, downloader)
1092 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1094 def report_download_webpage(self, video_id):
1095 """Report webpage download."""
1096 self._downloader.to_stdout(u'[dailymotion] %s: Downloading webpage' % video_id)
1098 def report_extraction(self, video_id):
1099 """Report information extraction."""
1100 self._downloader.to_stdout(u'[dailymotion] %s: Extracting information' % video_id)
1102 def _real_initialize(self):
1105 def _real_extract(self, url):
1106 # Extract id and simplified title from URL
1107 mobj = re.match(self._VALID_URL, url)
1109 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1112 # At this point we have a new video
1113 self._downloader.increment_downloads()
1114 video_id = mobj.group(1)
1116 simple_title = mobj.group(2).decode('utf-8')
1117 video_extension = 'flv'
1119 # Retrieve video webpage to extract further information
1120 request = urllib2.Request(url)
1122 self.report_download_webpage(video_id)
1123 webpage = urllib2.urlopen(request).read()
1124 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1125 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1128 # Extract URL, uploader and title from webpage
1129 self.report_extraction(video_id)
1130 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1132 self._downloader.trouble(u'ERROR: unable to extract media URL')
1134 mediaURL = urllib.unquote(mobj.group(1))
1136 # if needed add http://www.dailymotion.com/ if relative URL
1138 video_url = mediaURL
1140 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1141 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1143 self._downloader.trouble(u'ERROR: unable to extract title')
1145 video_title = mobj.group(1).decode('utf-8')
1146 video_title = sanitize_title(video_title)
1148 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a></div>', webpage)
1150 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1152 video_uploader = mobj.group(1)
1155 # Process video information
1156 self._downloader.process_info({
1157 'id': video_id.decode('utf-8'),
1158 'url': video_url.decode('utf-8'),
1159 'uploader': video_uploader.decode('utf-8'),
1160 'title': video_title,
1161 'stitle': simple_title,
1162 'ext': video_extension.decode('utf-8'),
1166 except UnavailableVideoError:
1167 self._downloader.trouble(u'ERROR: unable to download video')
1169 class GoogleIE(InfoExtractor):
1170 """Information extractor for video.google.com."""
1172 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1174 def __init__(self, downloader=None):
1175 InfoExtractor.__init__(self, downloader)
1179 return (re.match(GoogleIE._VALID_URL, url) is not None)
1181 def report_download_webpage(self, video_id):
1182 """Report webpage download."""
1183 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1185 def report_extraction(self, video_id):
1186 """Report information extraction."""
1187 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1189 def _real_initialize(self):
1192 def _real_extract(self, url):
1193 # Extract id from URL
1194 mobj = re.match(self._VALID_URL, url)
1196 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1199 # At this point we have a new video
1200 self._downloader.increment_downloads()
1201 video_id = mobj.group(1)
1203 video_extension = 'mp4'
1205 # Retrieve video webpage to extract further information
1206 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1208 self.report_download_webpage(video_id)
1209 webpage = urllib2.urlopen(request).read()
1210 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1211 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1214 # Extract URL, uploader, and title from webpage
1215 self.report_extraction(video_id)
1216 mobj = re.search(r"download_url:'([^']+)'", webpage)
1218 video_extension = 'flv'
1219 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1221 self._downloader.trouble(u'ERROR: unable to extract media URL')
1223 mediaURL = urllib.unquote(mobj.group(1))
1224 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1225 mediaURL = mediaURL.replace('\\x26', '\x26')
1227 video_url = mediaURL
1229 mobj = re.search(r'<title>(.*)</title>', webpage)
1231 self._downloader.trouble(u'ERROR: unable to extract title')
1233 video_title = mobj.group(1).decode('utf-8')
1234 video_title = sanitize_title(video_title)
1235 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1237 # Extract video description
1238 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1240 self._downloader.trouble(u'ERROR: unable to extract video description')
1242 video_description = mobj.group(1).decode('utf-8')
1243 if not video_description:
1244 video_description = 'No description available.'
1246 # Extract video thumbnail
1247 if self._downloader.params.get('forcethumbnail', False):
1248 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1250 webpage = urllib2.urlopen(request).read()
1251 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1252 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1254 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1256 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1258 video_thumbnail = mobj.group(1)
1259 else: # we need something to pass to process_info
1260 video_thumbnail = ''
1264 # Process video information
1265 self._downloader.process_info({
1266 'id': video_id.decode('utf-8'),
1267 'url': video_url.decode('utf-8'),
1269 'title': video_title,
1270 'stitle': simple_title,
1271 'ext': video_extension.decode('utf-8'),
1275 except UnavailableVideoError:
1276 self._downloader.trouble(u'ERROR: unable to download video')
1279 class PhotobucketIE(InfoExtractor):
1280 """Information extractor for photobucket.com."""
1282 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1284 def __init__(self, downloader=None):
1285 InfoExtractor.__init__(self, downloader)
1289 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1291 def report_download_webpage(self, video_id):
1292 """Report webpage download."""
1293 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1295 def report_extraction(self, video_id):
1296 """Report information extraction."""
1297 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1299 def _real_initialize(self):
1302 def _real_extract(self, url):
1303 # Extract id from URL
1304 mobj = re.match(self._VALID_URL, url)
1306 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1309 # At this point we have a new video
1310 self._downloader.increment_downloads()
1311 video_id = mobj.group(1)
1313 video_extension = 'flv'
1315 # Retrieve video webpage to extract further information
1316 request = urllib2.Request(url)
1318 self.report_download_webpage(video_id)
1319 webpage = urllib2.urlopen(request).read()
1320 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1321 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1324 # Extract URL, uploader, and title from webpage
1325 self.report_extraction(video_id)
1326 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1328 self._downloader.trouble(u'ERROR: unable to extract media URL')
1330 mediaURL = urllib.unquote(mobj.group(1))
1332 video_url = mediaURL
1334 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1336 self._downloader.trouble(u'ERROR: unable to extract title')
1338 video_title = mobj.group(1).decode('utf-8')
1339 video_title = sanitize_title(video_title)
1340 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1342 video_uploader = mobj.group(2).decode('utf-8')
1345 # Process video information
1346 self._downloader.process_info({
1347 'id': video_id.decode('utf-8'),
1348 'url': video_url.decode('utf-8'),
1349 'uploader': video_uploader,
1350 'title': video_title,
1351 'stitle': simple_title,
1352 'ext': video_extension.decode('utf-8'),
1356 except UnavailableVideoError:
1357 self._downloader.trouble(u'ERROR: unable to download video')
1360 class YahooIE(InfoExtractor):
1361 """Information extractor for video.yahoo.com."""
1363 # _VALID_URL matches all Yahoo! Video URLs
1364 # _VPAGE_URL matches only the extractable '/watch/' URLs
1365 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1366 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1368 def __init__(self, downloader=None):
1369 InfoExtractor.__init__(self, downloader)
1373 return (re.match(YahooIE._VALID_URL, url) is not None)
1375 def report_download_webpage(self, video_id):
1376 """Report webpage download."""
1377 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1379 def report_extraction(self, video_id):
1380 """Report information extraction."""
1381 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1383 def _real_initialize(self):
1386 def _real_extract(self, url, new_video=True):
1387 # Extract ID from URL
1388 mobj = re.match(self._VALID_URL, url)
1390 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1393 # At this point we have a new video
1394 self._downloader.increment_downloads()
1395 video_id = mobj.group(2)
1396 video_extension = 'flv'
1398 # Rewrite valid but non-extractable URLs as
1399 # extractable English language /watch/ URLs
1400 if re.match(self._VPAGE_URL, url) is None:
1401 request = urllib2.Request(url)
1403 webpage = urllib2.urlopen(request).read()
1404 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1405 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1408 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1410 self._downloader.trouble(u'ERROR: Unable to extract id field')
1412 yahoo_id = mobj.group(1)
1414 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1416 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1418 yahoo_vid = mobj.group(1)
1420 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1421 return self._real_extract(url, new_video=False)
1423 # Retrieve video webpage to extract further information
1424 request = urllib2.Request(url)
1426 self.report_download_webpage(video_id)
1427 webpage = urllib2.urlopen(request).read()
1428 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1429 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1432 # Extract uploader and title from webpage
1433 self.report_extraction(video_id)
1434 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1436 self._downloader.trouble(u'ERROR: unable to extract video title')
1438 video_title = mobj.group(1).decode('utf-8')
1439 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1441 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1443 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1445 video_uploader = mobj.group(1).decode('utf-8')
1447 # Extract video thumbnail
1448 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1450 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1452 video_thumbnail = mobj.group(1).decode('utf-8')
1454 # Extract video description
1455 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1457 self._downloader.trouble(u'ERROR: unable to extract video description')
1459 video_description = mobj.group(1).decode('utf-8')
1460 if not video_description: video_description = 'No description available.'
1462 # Extract video height and width
1463 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1465 self._downloader.trouble(u'ERROR: unable to extract video height')
1467 yv_video_height = mobj.group(1)
1469 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1471 self._downloader.trouble(u'ERROR: unable to extract video width')
1473 yv_video_width = mobj.group(1)
1475 # Retrieve video playlist to extract media URL
1476 # I'm not completely sure what all these options are, but we
1477 # seem to need most of them, otherwise the server sends a 401.
1478 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1479 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1480 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1481 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1482 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1484 self.report_download_webpage(video_id)
1485 webpage = urllib2.urlopen(request).read()
1486 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1487 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1490 # Extract media URL from playlist XML
1491 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1493 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1495 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1496 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1499 # Process video information
1500 self._downloader.process_info({
1501 'id': video_id.decode('utf-8'),
1503 'uploader': video_uploader,
1504 'title': video_title,
1505 'stitle': simple_title,
1506 'ext': video_extension.decode('utf-8'),
1507 'thumbnail': video_thumbnail.decode('utf-8'),
1508 'description': video_description,
1509 'thumbnail': video_thumbnail,
1510 'description': video_description,
1513 except UnavailableVideoError:
1514 self._downloader.trouble(u'ERROR: unable to download video')
1517 class GenericIE(InfoExtractor):
1518 """Generic last-resort information extractor."""
1520 def __init__(self, downloader=None):
1521 InfoExtractor.__init__(self, downloader)
1527 def report_download_webpage(self, video_id):
1528 """Report webpage download."""
1529 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1530 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1532 def report_extraction(self, video_id):
1533 """Report information extraction."""
1534 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1536 def _real_initialize(self):
1539 def _real_extract(self, url):
1540 # At this point we have a new video
1541 self._downloader.increment_downloads()
1543 video_id = url.split('/')[-1]
1544 request = urllib2.Request(url)
1546 self.report_download_webpage(video_id)
1547 webpage = urllib2.urlopen(request).read()
1548 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1549 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1551 except ValueError, err:
1552 # since this is the last-resort InfoExtractor, if
1553 # this error is thrown, it'll be thrown here
1554 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1557 # Start with something easy: JW Player in SWFObject
1558 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1560 # Broaden the search a little bit
1561 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1563 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1566 # It's possible that one of the regexes
1567 # matched, but returned an empty group:
1568 if mobj.group(1) is None:
1569 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1572 video_url = urllib.unquote(mobj.group(1))
1573 video_id = os.path.basename(video_url)
1575 # here's a fun little line of code for you:
1576 video_extension = os.path.splitext(video_id)[1][1:]
1577 video_id = os.path.splitext(video_id)[0]
1579 # it's tempting to parse this further, but you would
1580 # have to take into account all the variations like
1581 # Video Title - Site Name
1582 # Site Name | Video Title
1583 # Video Title - Tagline | Site Name
1584 # and so on and so forth; it's just not practical
1585 mobj = re.search(r'<title>(.*)</title>', webpage)
1587 self._downloader.trouble(u'ERROR: unable to extract title')
1589 video_title = mobj.group(1).decode('utf-8')
1590 video_title = sanitize_title(video_title)
1591 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1593 # video uploader is domain name
1594 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1596 self._downloader.trouble(u'ERROR: unable to extract title')
1598 video_uploader = mobj.group(1).decode('utf-8')
1601 # Process video information
1602 self._downloader.process_info({
1603 'id': video_id.decode('utf-8'),
1604 'url': video_url.decode('utf-8'),
1605 'uploader': video_uploader,
1606 'title': video_title,
1607 'stitle': simple_title,
1608 'ext': video_extension.decode('utf-8'),
1612 except UnavailableVideoError, err:
1613 self._downloader.trouble(u'ERROR: unable to download video')
1616 class YoutubeSearchIE(InfoExtractor):
1617 """Information Extractor for YouTube search queries."""
1618 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1619 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1620 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1621 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1623 _max_youtube_results = 1000
1625 def __init__(self, youtube_ie, downloader=None):
1626 InfoExtractor.__init__(self, downloader)
1627 self._youtube_ie = youtube_ie
1631 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1633 def report_download_page(self, query, pagenum):
1634 """Report attempt to download playlist page with given number."""
1635 query = query.decode(preferredencoding())
1636 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1638 def _real_initialize(self):
1639 self._youtube_ie.initialize()
1641 def _real_extract(self, query):
1642 mobj = re.match(self._VALID_QUERY, query)
1644 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1647 prefix, query = query.split(':')
1649 query = query.encode('utf-8')
1651 self._download_n_results(query, 1)
1653 elif prefix == 'all':
1654 self._download_n_results(query, self._max_youtube_results)
1660 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1662 elif n > self._max_youtube_results:
1663 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1664 n = self._max_youtube_results
1665 self._download_n_results(query, n)
1667 except ValueError: # parsing prefix as integer fails
1668 self._download_n_results(query, 1)
1671 def _download_n_results(self, query, n):
1672 """Downloads a specified number of results for a query"""
1675 already_seen = set()
1679 self.report_download_page(query, pagenum)
1680 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1681 request = urllib2.Request(result_url, None, std_headers)
1683 page = urllib2.urlopen(request).read()
1684 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1685 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1688 # Extract video identifiers
1689 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1690 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1691 if video_id not in already_seen:
1692 video_ids.append(video_id)
1693 already_seen.add(video_id)
1694 if len(video_ids) == n:
1695 # Specified n videos reached
1696 for id in video_ids:
1697 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1700 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1701 for id in video_ids:
1702 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1705 pagenum = pagenum + 1
1707 class GoogleSearchIE(InfoExtractor):
1708 """Information Extractor for Google Video search queries."""
1709 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1710 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1711 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1712 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1714 _max_google_results = 1000
1716 def __init__(self, google_ie, downloader=None):
1717 InfoExtractor.__init__(self, downloader)
1718 self._google_ie = google_ie
1722 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1724 def report_download_page(self, query, pagenum):
1725 """Report attempt to download playlist page with given number."""
1726 query = query.decode(preferredencoding())
1727 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1729 def _real_initialize(self):
1730 self._google_ie.initialize()
1732 def _real_extract(self, query):
1733 mobj = re.match(self._VALID_QUERY, query)
1735 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1738 prefix, query = query.split(':')
1740 query = query.encode('utf-8')
1742 self._download_n_results(query, 1)
1744 elif prefix == 'all':
1745 self._download_n_results(query, self._max_google_results)
1751 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1753 elif n > self._max_google_results:
1754 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1755 n = self._max_google_results
1756 self._download_n_results(query, n)
1758 except ValueError: # parsing prefix as integer fails
1759 self._download_n_results(query, 1)
1762 def _download_n_results(self, query, n):
1763 """Downloads a specified number of results for a query"""
1766 already_seen = set()
1770 self.report_download_page(query, pagenum)
1771 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1772 request = urllib2.Request(result_url, None, std_headers)
1774 page = urllib2.urlopen(request).read()
1775 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1776 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1779 # Extract video identifiers
1780 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1781 video_id = mobj.group(1)
1782 if video_id not in already_seen:
1783 video_ids.append(video_id)
1784 already_seen.add(video_id)
1785 if len(video_ids) == n:
1786 # Specified n videos reached
1787 for id in video_ids:
1788 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1791 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1792 for id in video_ids:
1793 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1796 pagenum = pagenum + 1
1798 class YahooSearchIE(InfoExtractor):
1799 """Information Extractor for Yahoo! Video search queries."""
1800 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1801 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1802 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1803 _MORE_PAGES_INDICATOR = r'\s*Next'
1805 _max_yahoo_results = 1000
1807 def __init__(self, yahoo_ie, downloader=None):
1808 InfoExtractor.__init__(self, downloader)
1809 self._yahoo_ie = yahoo_ie
1813 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1815 def report_download_page(self, query, pagenum):
1816 """Report attempt to download playlist page with given number."""
1817 query = query.decode(preferredencoding())
1818 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1820 def _real_initialize(self):
1821 self._yahoo_ie.initialize()
1823 def _real_extract(self, query):
1824 mobj = re.match(self._VALID_QUERY, query)
1826 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1829 prefix, query = query.split(':')
1831 query = query.encode('utf-8')
1833 self._download_n_results(query, 1)
1835 elif prefix == 'all':
1836 self._download_n_results(query, self._max_yahoo_results)
1842 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1844 elif n > self._max_yahoo_results:
1845 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
1846 n = self._max_yahoo_results
1847 self._download_n_results(query, n)
1849 except ValueError: # parsing prefix as integer fails
1850 self._download_n_results(query, 1)
1853 def _download_n_results(self, query, n):
1854 """Downloads a specified number of results for a query"""
1857 already_seen = set()
1861 self.report_download_page(query, pagenum)
1862 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1863 request = urllib2.Request(result_url, None, std_headers)
1865 page = urllib2.urlopen(request).read()
1866 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1867 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1870 # Extract video identifiers
1871 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1872 video_id = mobj.group(1)
1873 if video_id not in already_seen:
1874 video_ids.append(video_id)
1875 already_seen.add(video_id)
1876 if len(video_ids) == n:
1877 # Specified n videos reached
1878 for id in video_ids:
1879 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1882 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1883 for id in video_ids:
1884 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1887 pagenum = pagenum + 1
1889 class YoutubePlaylistIE(InfoExtractor):
1890 """Information Extractor for YouTube playlists."""
1892 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1893 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1894 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1895 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1898 def __init__(self, youtube_ie, downloader=None):
1899 InfoExtractor.__init__(self, downloader)
1900 self._youtube_ie = youtube_ie
1904 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1906 def report_download_page(self, playlist_id, pagenum):
1907 """Report attempt to download playlist page with given number."""
1908 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1910 def _real_initialize(self):
1911 self._youtube_ie.initialize()
1913 def _real_extract(self, url):
1914 # Extract playlist id
1915 mobj = re.match(self._VALID_URL, url)
1917 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1920 # Download playlist pages
1921 playlist_id = mobj.group(1)
1926 self.report_download_page(playlist_id, pagenum)
1927 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1929 page = urllib2.urlopen(request).read()
1930 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1931 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1934 # Extract video identifiers
1936 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1937 if mobj.group(1) not in ids_in_page:
1938 ids_in_page.append(mobj.group(1))
1939 video_ids.extend(ids_in_page)
1941 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1943 pagenum = pagenum + 1
1945 playliststart = self._downloader.params.get('playliststart', 1)
1946 playliststart -= 1 #our arrays are zero-based but the playlist is 1-based
1947 if playliststart > 0:
1948 video_ids = video_ids[playliststart:]
1950 for id in video_ids:
1951 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1954 class YoutubeUserIE(InfoExtractor):
1955 """Information Extractor for YouTube users."""
1957 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1958 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1959 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1962 def __init__(self, youtube_ie, downloader=None):
1963 InfoExtractor.__init__(self, downloader)
1964 self._youtube_ie = youtube_ie
1968 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1970 def report_download_page(self, username):
1971 """Report attempt to download user page."""
1972 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1974 def _real_initialize(self):
1975 self._youtube_ie.initialize()
1977 def _real_extract(self, url):
1979 mobj = re.match(self._VALID_URL, url)
1981 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1984 # Download user page
1985 username = mobj.group(1)
1989 self.report_download_page(username)
1990 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1992 page = urllib2.urlopen(request).read()
1993 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1994 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1997 # Extract video identifiers
2000 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2001 if mobj.group(1) not in ids_in_page:
2002 ids_in_page.append(mobj.group(1))
2003 video_ids.extend(ids_in_page)
2005 playliststart = self._downloader.params.get('playliststart', 1)
2006 playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based
2007 if playliststart > 0:
2008 video_ids = video_ids[playliststart:]
2010 for id in video_ids:
2011 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2014 class PostProcessor(object):
2015 """Post Processor class.
2017 PostProcessor objects can be added to downloaders with their
2018 add_post_processor() method. When the downloader has finished a
2019 successful download, it will take its internal chain of PostProcessors
2020 and start calling the run() method on each one of them, first with
2021 an initial argument and then with the returned value of the previous
2024 The chain will be stopped if one of them ever returns None or the end
2025 of the chain is reached.
2027 PostProcessor objects follow a "mutual registration" process similar
2028 to InfoExtractor objects.
2033 def __init__(self, downloader=None):
2034 self._downloader = downloader
2036 def set_downloader(self, downloader):
2037 """Sets the downloader for this PP."""
2038 self._downloader = downloader
2040 def run(self, information):
2041 """Run the PostProcessor.
2043 The "information" argument is a dictionary like the ones
2044 composed by InfoExtractors. The only difference is that this
2045 one has an extra field called "filepath" that points to the
2048 When this method returns None, the postprocessing chain is
2049 stopped. However, this method may return an information
2050 dictionary that will be passed to the next postprocessing
2051 object in the chain. It can be the one it received after
2052 changing some fields.
2054 In addition, this method may raise a PostProcessingError
2055 exception that will be taken into account by the downloader
2058 return information # by default, do nothing
2060 ### MAIN PROGRAM ###
2061 if __name__ == '__main__':
2063 # Modules needed only when running the main program
2067 # Function to update the program file with the latest version from bitbucket.org
2068 def update_self(downloader, filename):
2069 # Note: downloader only used for options
2070 if not os.access (filename, os.W_OK):
2071 sys.exit('ERROR: no write permissions on %s' % filename)
2073 downloader.to_stdout('Updating to latest stable version...')
2074 latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
2075 latest_version = urllib.urlopen(latest_url).read().strip()
2076 prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2077 newcontent = urllib.urlopen(prog_url).read()
2078 stream = open(filename, 'w')
2079 stream.write(newcontent)
2081 downloader.to_stdout('Updated to version %s' % latest_version)
2083 # General configuration
2084 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2085 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
2086 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2088 # Parse command line
2089 parser = optparse.OptionParser(
2090 usage='Usage: %prog [options] url...',
2091 version='2010.08.04',
2092 conflict_handler='resolve',
2095 parser.add_option('-h', '--help',
2096 action='help', help='print this help text and exit')
2097 parser.add_option('-v', '--version',
2098 action='version', help='print program version and exit')
2099 parser.add_option('-U', '--update',
2100 action='store_true', dest='update_self', help='update this program to latest stable version')
2101 parser.add_option('-i', '--ignore-errors',
2102 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2103 parser.add_option('-r', '--rate-limit',
2104 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2105 parser.add_option('-R', '--retries',
2106 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2107 parser.add_option('--playlist-start',
2108 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2110 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2111 authentication.add_option('-u', '--username',
2112 dest='username', metavar='USERNAME', help='account username')
2113 authentication.add_option('-p', '--password',
2114 dest='password', metavar='PASSWORD', help='account password')
2115 authentication.add_option('-n', '--netrc',
2116 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2117 parser.add_option_group(authentication)
2119 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2120 video_format.add_option('-f', '--format',
2121 action='store', dest='format', metavar='FORMAT', help='video format code')
2122 video_format.add_option('-m', '--mobile-version',
2123 action='store_const', dest='format', help='alias for -f 17', const='17')
2124 video_format.add_option('--all-formats',
2125 action='store_const', dest='format', help='download all available video formats', const='-1')
2126 video_format.add_option('--max-quality',
2127 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2128 video_format.add_option('-b', '--best-quality',
2129 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2130 parser.add_option_group(video_format)
2132 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2133 verbosity.add_option('-q', '--quiet',
2134 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2135 verbosity.add_option('-s', '--simulate',
2136 action='store_true', dest='simulate', help='do not download video', default=False)
2137 verbosity.add_option('-g', '--get-url',
2138 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2139 verbosity.add_option('-e', '--get-title',
2140 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2141 verbosity.add_option('--get-thumbnail',
2142 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2143 verbosity.add_option('--get-description',
2144 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2145 verbosity.add_option('--no-progress',
2146 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2147 parser.add_option_group(verbosity)
2149 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2150 filesystem.add_option('-t', '--title',
2151 action='store_true', dest='usetitle', help='use title in file name', default=False)
2152 filesystem.add_option('-l', '--literal',
2153 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2154 filesystem.add_option('-o', '--output',
2155 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2156 filesystem.add_option('-a', '--batch-file',
2157 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2158 filesystem.add_option('-w', '--no-overwrites',
2159 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2160 filesystem.add_option('-c', '--continue',
2161 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2162 parser.add_option_group(filesystem)
2164 (opts, args) = parser.parse_args()
2166 # Batch file verification
2168 if opts.batchfile is not None:
2170 if opts.batchfile == '-':
2173 batchfd = open(opts.batchfile, 'r')
2174 batchurls = batchfd.readlines()
2175 batchurls = [x.strip() for x in batchurls]
2176 batchurls = [x for x in batchurls if len(x) > 0]
2178 sys.exit(u'ERROR: batch file could not be read')
2179 all_urls = batchurls + args
2181 # Conflicting, missing and erroneous options
2182 if opts.bestquality:
2183 print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2184 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2185 parser.error(u'using .netrc conflicts with giving username/password')
2186 if opts.password is not None and opts.username is None:
2187 parser.error(u'account username missing')
2188 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
2189 parser.error(u'using output template conflicts with using title or literal title')
2190 if opts.usetitle and opts.useliteral:
2191 parser.error(u'using title conflicts with using literal title')
2192 if opts.username is not None and opts.password is None:
2193 opts.password = getpass.getpass(u'Type account password and press return:')
2194 if opts.ratelimit is not None:
2195 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2196 if numeric_limit is None:
2197 parser.error(u'invalid rate limit specified')
2198 opts.ratelimit = numeric_limit
2199 if opts.retries is not None:
2201 opts.retries = long(opts.retries)
2202 except (TypeError, ValueError), err:
2203 parser.error(u'invalid retry count specified')
2204 if opts.playliststart is not None:
2206 opts.playliststart = long(opts.playliststart)
2207 except (TypeError, ValueError), err:
2208 parser.error(u'invalid playlist page specified')
2210 # Information extractors
2211 youtube_ie = YoutubeIE()
2212 metacafe_ie = MetacafeIE(youtube_ie)
2213 dailymotion_ie = DailymotionIE()
2214 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2215 youtube_user_ie = YoutubeUserIE(youtube_ie)
2216 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2217 google_ie = GoogleIE()
2218 google_search_ie = GoogleSearchIE(google_ie)
2219 photobucket_ie = PhotobucketIE()
2220 yahoo_ie = YahooIE()
2221 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2222 generic_ie = GenericIE()
2225 fd = FileDownloader({
2226 'usenetrc': opts.usenetrc,
2227 'username': opts.username,
2228 'password': opts.password,
2229 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2230 'forceurl': opts.geturl,
2231 'forcetitle': opts.gettitle,
2232 'forcethumbnail': opts.getthumbnail,
2233 'forcedescription': opts.getdescription,
2234 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2235 'format': opts.format,
2236 'format_limit': opts.format_limit,
2237 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2238 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2239 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2240 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2241 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2242 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2243 or u'%(id)s.%(ext)s'),
2244 'ignoreerrors': opts.ignoreerrors,
2245 'ratelimit': opts.ratelimit,
2246 'nooverwrites': opts.nooverwrites,
2247 'retries': opts.retries,
2248 'continuedl': opts.continue_dl,
2249 'noprogress': opts.noprogress,
2250 'playliststart': opts.playliststart,
2252 fd.add_info_extractor(youtube_search_ie)
2253 fd.add_info_extractor(youtube_pl_ie)
2254 fd.add_info_extractor(youtube_user_ie)
2255 fd.add_info_extractor(metacafe_ie)
2256 fd.add_info_extractor(dailymotion_ie)
2257 fd.add_info_extractor(youtube_ie)
2258 fd.add_info_extractor(google_ie)
2259 fd.add_info_extractor(google_search_ie)
2260 fd.add_info_extractor(photobucket_ie)
2261 fd.add_info_extractor(yahoo_ie)
2262 fd.add_info_extractor(yahoo_search_ie)
2264 # This must come last since it's the
2265 # fallback if none of the others work
2266 fd.add_info_extractor(generic_ie)
2269 if opts.update_self:
2270 update_self(fd, sys.argv[0])
2273 if len(all_urls) < 1:
2274 if not opts.update_self:
2275 parser.error(u'you must provide at least one URL')
2278 retcode = fd.download(all_urls)
2281 except DownloadError:
2283 except SameFileError:
2284 sys.exit(u'ERROR: fixed output name but more than one file to download')
2285 except KeyboardInterrupt:
2286 sys.exit(u'\nERROR: Interrupted by user')