2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # License: Public domain code
21 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1',
22 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
23 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
24 'Accept-Language': 'en-us,en;q=0.5',
27 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
29 class DownloadError(Exception):
30 """Download Error exception.
32 This exception may be thrown by FileDownloader objects if they are not
33 configured to continue on errors. They will contain the appropriate
38 class SameFileError(Exception):
39 """Same File exception.
41 This exception will be thrown by FileDownloader objects if they detect
42 multiple files would have to be downloaded to the same file on disk.
46 class PostProcessingError(Exception):
47 """Post Processing exception.
49 This exception may be raised by PostProcessor's .run() method to
50 indicate an error in the postprocessing task.
54 class FileDownloader(object):
55 """File Downloader class.
57 File downloader objects are the ones responsible of downloading the
58 actual video file and writing it to disk if the user has requested
59 it, among some other tasks. In most cases there should be one per
60 program. As, given a video URL, the downloader doesn't know how to
61 extract all the needed information, task that InfoExtractors do, it
62 has to pass the URL to one of them.
64 For this, file downloader objects have a method that allows
65 InfoExtractors to be registered in a given order. When it is passed
66 a URL, the file downloader handles it to the first InfoExtractor it
67 finds that reports being able to handle it. The InfoExtractor returns
68 all the information to the FileDownloader and the latter downloads the
69 file or does whatever it's instructed to do.
71 File downloaders accept a lot of parameters. In order not to saturate
72 the object constructor with arguments, it receives a dictionary of
73 options instead. These options are available through the get_params()
74 method for the InfoExtractors to use. The FileDownloader also registers
75 itself as the downloader in charge for the InfoExtractors that are
76 added to it, so this is a "mutual registration".
80 username: Username for authentication purposes.
81 password: Password for authentication purposes.
82 usenetrc: Use netrc for authentication instead.
83 quiet: Do not print messages to stdout.
84 forceurl: Force printing final URL.
85 forcetitle: Force printing title.
86 simulate: Do not download the video files.
87 format: Video format code.
88 outtmpl: Template for output names.
89 ignoreerrors: Do not stop on download errors.
90 ratelimit: Download speed limit, in bytes/sec.
97 def __init__(self, params):
98 """Create a FileDownloader object with the given options."""
101 self.set_params(params)
104 def pmkdir(filename):
105 """Create directory components in filename. Similar to Unix "mkdir -p"."""
106 components = filename.split(os.sep)
107 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
108 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
109 for dir in aggregate:
110 if not os.path.exists(dir):
114 def format_bytes(bytes):
120 exponent = long(math.log(float(bytes), 1024.0))
121 suffix = 'bkMGTPEZY'[exponent]
122 converted = float(bytes) / float(1024**exponent)
123 return '%.2f%s' % (converted, suffix)
126 def calc_percent(byte_counter, data_len):
129 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
132 def calc_eta(start, now, total, current):
136 if current == 0 or dif < 0.001: # One millisecond
138 rate = float(current) / dif
139 eta = long((float(total) - float(current)) / rate)
140 (eta_mins, eta_secs) = divmod(eta, 60)
143 return '%02d:%02d' % (eta_mins, eta_secs)
146 def calc_speed(start, now, bytes):
148 if bytes == 0 or dif < 0.001: # One millisecond
149 return '%10s' % '---b/s'
150 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
153 def best_block_size(elapsed_time, bytes):
154 new_min = max(bytes / 2.0, 1.0)
155 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
156 if elapsed_time < 0.001:
158 rate = bytes / elapsed_time
166 def parse_bytes(bytestr):
167 """Parse a string indicating a byte quantity into a long integer."""
168 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
171 number = float(matchobj.group(1))
172 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
173 return long(round(number * multiplier))
175 def set_params(self, params):
176 """Sets parameters."""
177 if type(params) != dict:
178 raise ValueError('params: dictionary expected')
179 self._params = params
181 def get_params(self):
182 """Get parameters."""
185 def add_info_extractor(self, ie):
186 """Add an InfoExtractor object to the end of the list."""
188 ie.set_downloader(self)
190 def add_post_processor(self, pp):
191 """Add a PostProcessor object to the end of the chain."""
193 pp.set_downloader(self)
195 def to_stdout(self, message, skip_eol=False):
196 """Print message to stdout if not in quiet mode."""
197 if not self._params.get('quiet', False):
198 print u'%s%s' % (message, [u'\n', u''][skip_eol]),
201 def to_stderr(self, message):
202 """Print message to stderr."""
203 print >>sys.stderr, message
205 def fixed_template(self):
206 """Checks if the output template is fixed."""
207 return (re.search(ur'(?u)%\(.+?\)s', self._params['outtmpl']) is None)
209 def trouble(self, message=None):
210 """Determine action to take when a download problem appears.
212 Depending on if the downloader has been configured to ignore
213 download errors or not, this method may throw an exception or
214 not when errors are found, after printing the message. If it
215 doesn't raise, it returns an error code suitable to be returned
216 later as a program exit code to indicate error.
218 if message is not None:
219 self.to_stderr(message)
220 if not self._params.get('ignoreerrors', False):
221 raise DownloadError(message)
224 def slow_down(self, start_time, byte_counter):
225 """Sleep if the download speed is over the rate limit."""
226 rate_limit = self._params.get('ratelimit', None)
227 if rate_limit is None or byte_counter == 0:
230 elapsed = now - start_time
233 speed = float(byte_counter) / elapsed
234 if speed > rate_limit:
235 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
237 def report_destination(self, filename):
238 """Report destination filename."""
239 self.to_stdout(u'[download] Destination: %s' % filename)
241 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
242 """Report download progress."""
243 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
244 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
246 def report_finish(self):
247 """Report download finished."""
250 def download(self, url_list):
251 """Download a given list of URLs."""
253 if len(url_list) > 1 and self.fixed_template():
254 raise SameFileError(self._params['outtmpl'])
257 suitable_found = False
259 if not ie.suitable(url):
261 # Suitable InfoExtractor found
262 suitable_found = True
263 all_results = ie.extract(url)
264 results = [x for x in all_results if x is not None]
265 if len(results) != len(all_results):
266 retcode = self.trouble()
268 if len(results) > 1 and self.fixed_template():
269 raise SameFileError(self._params['outtmpl'])
271 for result in results:
273 if self._params.get('forcetitle', False):
274 print result['title']
275 if self._params.get('forceurl', False):
278 # Do nothing else if in simulate mode
279 if self._params.get('simulate', False):
283 filename = self._params['outtmpl'] % result
284 self.report_destination(filename)
285 except (ValueError, KeyError), err:
286 retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
289 self.pmkdir(filename)
290 except (OSError, IOError), err:
291 retcode = self.trouble('ERROR: unable to create directories: %s' % str(err))
294 outstream = open(filename, 'wb')
295 except (OSError, IOError), err:
296 retcode = self.trouble('ERROR: unable to open for writing: %s' % str(err))
299 self._do_download(outstream, result['url'])
301 except (OSError, IOError), err:
302 retcode = self.trouble('ERROR: unable to write video data: %s' % str(err))
304 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
305 retcode = self.trouble('ERROR: unable to download video data: %s' % str(err))
308 self.post_process(filename, result)
309 except (PostProcessingError), err:
310 retcode = self.trouble('ERROR: postprocessing: %s' % str(err))
314 if not suitable_found:
315 retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
319 def post_process(self, filename, ie_info):
320 """Run the postprocessing chain on the given file."""
322 info['filepath'] = filename
328 def _do_download(self, stream, url):
329 request = urllib2.Request(url, None, std_headers)
330 data = urllib2.urlopen(request)
331 data_len = data.info().get('Content-length', None)
332 data_len_str = self.format_bytes(data_len)
338 percent_str = self.calc_percent(byte_counter, data_len)
339 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
340 speed_str = self.calc_speed(start, time.time(), byte_counter)
341 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
345 data_block = data.read(block_size)
347 data_block_len = len(data_block)
348 if data_block_len == 0:
350 byte_counter += data_block_len
351 stream.write(data_block)
352 block_size = self.best_block_size(after - before, data_block_len)
355 self.slow_down(start, byte_counter)
358 if data_len is not None and str(byte_counter) != data_len:
359 raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
361 class InfoExtractor(object):
362 """Information Extractor class.
364 Information extractors are the classes that, given a URL, extract
365 information from the video (or videos) the URL refers to. This
366 information includes the real video URL, the video title and simplified
367 title, author and others. It is returned in a list of dictionaries when
368 calling its extract() method. It is a list because a URL can refer to
369 more than one video (think of playlists). The dictionaries must include
370 the following fields:
372 id: Video identifier.
373 url: Final video URL.
374 uploader: Nickname of the video uploader.
375 title: Literal title.
376 stitle: Simplified title.
377 ext: Video filename extension.
379 Subclasses of this one should re-define the _real_initialize() and
380 _real_extract() methods, as well as the suitable() static method.
381 Probably, they should also be instantiated and added to the main
388 def __init__(self, downloader=None):
389 """Constructor. Receives an optional downloader."""
391 self.set_downloader(downloader)
395 """Receives a URL and returns True if suitable for this IE."""
398 def initialize(self):
399 """Initializes an instance (authentication, etc)."""
401 self._real_initialize()
404 def extract(self, url):
405 """Extracts URL information and returns it in list of dicts."""
407 return self._real_extract(url)
409 def set_downloader(self, downloader):
410 """Sets the downloader for this IE."""
411 self._downloader = downloader
413 def to_stdout(self, message):
414 """Print message to stdout if downloader is not in quiet mode."""
415 if self._downloader is None or not self._downloader.get_params().get('quiet', False):
418 def to_stderr(self, message):
419 """Print message to stderr."""
420 print >>sys.stderr, message
422 def _real_initialize(self):
423 """Real initialization process. Redefine in subclasses."""
426 def _real_extract(self, url):
427 """Real extraction process. Redefine in subclasses."""
430 class YoutubeIE(InfoExtractor):
431 """Information extractor for youtube.com."""
433 _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
434 _LOGIN_URL = 'http://uk.youtube.com/login?next=/'
435 _AGE_URL = 'http://uk.youtube.com/verify_age?next_url=/'
436 _NETRC_MACHINE = 'youtube'
440 return (re.match(YoutubeIE._VALID_URL, url) is not None)
442 def report_login(self):
443 """Report attempt to log in."""
444 self.to_stdout(u'[youtube] Logging in')
446 def report_age_confirmation(self):
447 """Report attempt to confirm age."""
448 self.to_stdout(u'[youtube] Confirming age')
450 def report_webpage_download(self, video_id):
451 """Report attempt to download webpage."""
452 self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
454 def report_information_extraction(self, video_id):
455 """Report attempt to extract video information."""
456 self.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
458 def report_video_url(self, video_id, video_real_url):
459 """Report extracted video URL."""
460 self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
462 def _real_initialize(self):
463 if self._downloader is None:
468 downloader_params = self._downloader.get_params()
470 # Attempt to use provided username and password or .netrc data
471 if downloader_params.get('username', None) is not None:
472 username = downloader_params['username']
473 password = downloader_params['password']
474 elif downloader_params.get('usenetrc', False):
476 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
481 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
482 except (IOError, netrc.NetrcParseError), err:
483 self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
486 # No authentication to be performed
492 'current_form': 'loginForm',
494 'action_login': 'Log In',
495 'username': username,
496 'password': password,
498 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
501 login_results = urllib2.urlopen(request).read()
502 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
503 self.to_stderr(u'WARNING: unable to log in: bad username or password')
505 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
506 self.to_stderr(u'WARNING: unable to log in: %s' % str(err))
512 'action_confirm': 'Confirm',
514 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
516 self.report_age_confirmation()
517 age_results = urllib2.urlopen(request).read()
518 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
519 self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
522 def _real_extract(self, url):
523 # Extract video id from URL
524 mobj = re.match(self._VALID_URL, url)
526 self.to_stderr(u'ERROR: invalid URL: %s' % url)
528 video_id = mobj.group(2)
530 # Downloader parameters
532 if self._downloader is not None:
533 params = self._downloader.get_params()
534 format_param = params.get('format', None)
537 video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv')
539 # Normalize URL, including format
540 normalized_url = 'http://uk.youtube.com/watch?v=%s' % video_id
541 if format_param is not None:
542 normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
543 request = urllib2.Request(normalized_url, None, std_headers)
545 self.report_webpage_download(video_id)
546 video_webpage = urllib2.urlopen(request).read()
547 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
548 self.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err))
550 self.report_information_extraction(video_id)
553 mobj = re.search(r', "t": "([^"]+)"', video_webpage)
555 self.to_stderr(u'ERROR: unable to extract "t" parameter')
557 video_real_url = 'http://uk.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
558 if format_param is not None:
559 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
560 self.report_video_url(video_id, video_real_url)
563 mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
565 self.to_stderr(u'ERROR: unable to extract uploader nickname')
567 video_uploader = mobj.group(1)
570 mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
572 self.to_stderr(u'ERROR: unable to extract video title')
574 video_title = mobj.group(1).decode('utf-8')
575 video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title)
576 video_title = video_title.replace(os.sep, u'%')
579 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
580 simple_title = simple_title.strip(ur'_')
584 'id': video_id.decode('utf-8'),
585 'url': video_real_url.decode('utf-8'),
586 'uploader': video_uploader.decode('utf-8'),
587 'title': video_title,
588 'stitle': simple_title,
589 'ext': video_extension.decode('utf-8'),
592 class MetacafeIE(InfoExtractor):
593 """Information Extractor for metacafe.com."""
595 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
596 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
599 def __init__(self, youtube_ie, downloader=None):
600 InfoExtractor.__init__(self, downloader)
601 self._youtube_ie = youtube_ie
605 return (re.match(MetacafeIE._VALID_URL, url) is not None)
607 def report_disclaimer(self):
608 """Report disclaimer retrieval."""
609 self.to_stdout(u'[metacafe] Retrieving disclaimer')
611 def report_age_confirmation(self):
612 """Report attempt to confirm age."""
613 self.to_stdout(u'[metacafe] Confirming age')
615 def report_download_webpage(self, video_id):
616 """Report webpage download."""
617 self.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
619 def report_extraction(self, video_id):
620 """Report information extraction."""
621 self.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
623 def _real_initialize(self):
624 # Retrieve disclaimer
625 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
627 self.report_disclaimer()
628 disclaimer = urllib2.urlopen(request).read()
629 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
630 self.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
636 'submit': "Continue - I'm over 18",
638 request = urllib2.Request('http://www.metacafe.com/', urllib.urlencode(disclaimer_form), std_headers)
640 self.report_age_confirmation()
641 disclaimer = urllib2.urlopen(request).read()
642 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
643 self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
646 def _real_extract(self, url):
647 # Extract id and simplified title from URL
648 mobj = re.match(self._VALID_URL, url)
650 self.to_stderr(u'ERROR: invalid URL: %s' % url)
653 video_id = mobj.group(1)
655 # Check if video comes from YouTube
656 mobj2 = re.match(r'^yt-(.*)$', video_id)
657 if mobj2 is not None:
658 return self._youtube_ie.extract('http://uk.youtube.com/watch?v=%s' % mobj2.group(1))
660 simple_title = mobj.group(2).decode('utf-8')
661 video_extension = 'flv'
663 # Retrieve video webpage to extract further information
664 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
666 self.report_download_webpage(video_id)
667 webpage = urllib2.urlopen(request).read()
668 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
669 self.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err))
672 # Extract URL, uploader and title from webpage
673 self.report_extraction(video_id)
674 mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage)
676 self.to_stderr(u'ERROR: unable to extract media URL')
678 mediaURL = mobj.group(1).replace('\\', '')
680 mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage)
682 self.to_stderr(u'ERROR: unable to extract gdaKey')
684 gdaKey = mobj.group(1)
686 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
688 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
690 self.to_stderr(u'ERROR: unable to extract title')
692 video_title = mobj.group(1).decode('utf-8')
694 mobj = re.search(r'(?m)<li id="ChnlUsr">.*?Submitter:<br />(.*?)</li>', webpage)
696 self.to_stderr(u'ERROR: unable to extract uploader nickname')
698 video_uploader = re.sub(r'<.*?>', '', mobj.group(1))
702 'id': video_id.decode('utf-8'),
703 'url': video_url.decode('utf-8'),
704 'uploader': video_uploader.decode('utf-8'),
705 'title': video_title,
706 'stitle': simple_title,
707 'ext': video_extension.decode('utf-8'),
710 class YoutubePlaylistIE(InfoExtractor):
711 """Information Extractor for YouTube playlists."""
713 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)'
714 _TEMPLATE_URL = 'http://uk.youtube.com/view_play_list?p=%s&page=%s'
715 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
716 _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
719 def __init__(self, youtube_ie, downloader=None):
720 InfoExtractor.__init__(self, downloader)
721 self._youtube_ie = youtube_ie
725 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
727 def report_download_page(self, playlist_id, pagenum):
728 """Report attempt to download playlist page with given number."""
729 self.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
731 def _real_initialize(self):
732 self._youtube_ie.initialize()
734 def _real_extract(self, url):
735 # Extract playlist id
736 mobj = re.match(self._VALID_URL, url)
738 self.to_stderr(u'ERROR: invalid url: %s' % url)
741 # Download playlist pages
742 playlist_id = mobj.group(1)
747 self.report_download_page(playlist_id, pagenum)
748 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
750 page = urllib2.urlopen(request).read()
751 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
752 self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
755 # Extract video identifiers
757 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
758 if mobj.group(1) not in ids_in_page:
759 ids_in_page.append(mobj.group(1))
760 video_ids.extend(ids_in_page)
762 if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page:
764 pagenum = pagenum + 1
768 information.extend(self._youtube_ie.extract('http://uk.youtube.com/watch?v=%s' % id))
771 class PostProcessor(object):
772 """Post Processor class.
774 PostProcessor objects can be added to downloaders with their
775 add_post_processor() method. When the downloader has finished a
776 successful download, it will take its internal chain of PostProcessors
777 and start calling the run() method on each one of them, first with
778 an initial argument and then with the returned value of the previous
781 The chain will be stopped if one of them ever returns None or the end
782 of the chain is reached.
784 PostProcessor objects follow a "mutual registration" process similar
785 to InfoExtractor objects.
790 def __init__(self, downloader=None):
791 self._downloader = downloader
793 def to_stdout(self, message):
794 """Print message to stdout if downloader is not in quiet mode."""
795 if self._downloader is None or not self._downloader.get_params().get('quiet', False):
798 def to_stderr(self, message):
799 """Print message to stderr."""
800 print >>sys.stderr, message
802 def set_downloader(self, downloader):
803 """Sets the downloader for this PP."""
804 self._downloader = downloader
806 def run(self, information):
807 """Run the PostProcessor.
809 The "information" argument is a dictionary like the ones
810 returned by InfoExtractors. The only difference is that this
811 one has an extra field called "filepath" that points to the
814 When this method returns None, the postprocessing chain is
815 stopped. However, this method may return an information
816 dictionary that will be passed to the next postprocessing
817 object in the chain. It can be the one it received after
818 changing some fields.
820 In addition, this method may raise a PostProcessingError
821 exception that will be taken into account by the downloader
824 return information # by default, do nothing
827 if __name__ == '__main__':
829 # Modules needed only when running the main program
833 # General configuration
834 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
835 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
836 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
839 parser = optparse.OptionParser(
840 usage='Usage: %prog [options] url...',
841 version='2008.10.16',
842 conflict_handler='resolve',
844 parser.add_option('-h', '--help',
845 action='help', help='print this help text and exit')
846 parser.add_option('-v', '--version',
847 action='version', help='print program version and exit')
848 parser.add_option('-u', '--username',
849 dest='username', metavar='UN', help='account username')
850 parser.add_option('-p', '--password',
851 dest='password', metavar='PW', help='account password')
852 parser.add_option('-o', '--output',
853 dest='outtmpl', metavar='TPL', help='output filename template')
854 parser.add_option('-q', '--quiet',
855 action='store_true', dest='quiet', help='activates quiet mode', default=False)
856 parser.add_option('-s', '--simulate',
857 action='store_true', dest='simulate', help='do not download video', default=False)
858 parser.add_option('-t', '--title',
859 action='store_true', dest='usetitle', help='use title in file name', default=False)
860 parser.add_option('-l', '--literal',
861 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
862 parser.add_option('-n', '--netrc',
863 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
864 parser.add_option('-g', '--get-url',
865 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
866 parser.add_option('-e', '--get-title',
867 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
868 parser.add_option('-f', '--format',
869 dest='format', metavar='FMT', help='video format code')
870 parser.add_option('-b', '--best-quality',
871 action='store_const', dest='format', help='alias for -f 18', const='18')
872 parser.add_option('-m', '--mobile-version',
873 action='store_const', dest='format', help='alias for -f 17', const='17')
874 parser.add_option('-i', '--ignore-errors',
875 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
876 parser.add_option('-r', '--rate-limit',
877 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
878 (opts, args) = parser.parse_args()
880 # Conflicting, missing and erroneous options
882 sys.exit(u'ERROR: you must provide at least one URL')
883 if opts.usenetrc and (opts.username is not None or opts.password is not None):
884 sys.exit(u'ERROR: using .netrc conflicts with giving username/password')
885 if opts.password is not None and opts.username is None:
886 sys.exit(u'ERROR: account username missing')
887 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
888 sys.exit(u'ERROR: using output template conflicts with using title or literal title')
889 if opts.usetitle and opts.useliteral:
890 sys.exit(u'ERROR: using title conflicts with using literal title')
891 if opts.username is not None and opts.password is None:
892 opts.password = getpass.getpass(u'Type account password and press return:')
893 if opts.ratelimit is not None:
894 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
895 if numeric_limit is None:
896 sys.exit(u'ERROR: invalid rate limit specified')
897 opts.ratelimit = numeric_limit
899 # Information extractors
900 youtube_ie = YoutubeIE()
901 metacafe_ie = MetacafeIE(youtube_ie)
902 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
905 charset = locale.getdefaultlocale()[1]
908 fd = FileDownloader({
909 'usenetrc': opts.usenetrc,
910 'username': opts.username,
911 'password': opts.password,
912 'quiet': (opts.quiet or opts.geturl or opts.gettitle),
913 'forceurl': opts.geturl,
914 'forcetitle': opts.gettitle,
915 'simulate': (opts.simulate or opts.geturl or opts.gettitle),
916 'format': opts.format,
917 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(charset))
918 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
919 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
920 or u'%(id)s.%(ext)s'),
921 'ignoreerrors': opts.ignoreerrors,
922 'ratelimit': opts.ratelimit,
924 fd.add_info_extractor(youtube_pl_ie)
925 fd.add_info_extractor(metacafe_ie)
926 fd.add_info_extractor(youtube_ie)
927 retcode = fd.download(args)
930 except DownloadError:
932 except SameFileError:
933 sys.exit(u'ERROR: fixed output name but more than one file to download')
934 except KeyboardInterrupt:
935 sys.exit(u'\nERROR: Interrupted by user')