2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # License: Public domain code
20 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.1) Gecko/2008070208 Firefox/3.0.1',
21 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
22 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
23 'Accept-Language': 'en-us,en;q=0.5',
26 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
28 class DownloadError(Exception):
29 """Download Error exception.
31 This exception may be thrown by FileDownloader objects if they are not
32 configured to continue on errors. They will contain the appropriate
37 class SameFileError(Exception):
38 """Same File exception.
40 This exception will be thrown by FileDownloader objects if they detect
41 multiple files would have to be downloaded to the same file on disk.
45 class PostProcessingError(Exception):
46 """Post Processing exception.
48 This exception may be raised by PostProcessor's .run() method to
49 indicate an error in the postprocessing task.
53 class FileDownloader(object):
54 """File Downloader class.
56 File downloader objects are the ones responsible of downloading the
57 actual video file and writing it to disk if the user has requested
58 it, among some other tasks. In most cases there should be one per
59 program. As, given a video URL, the downloader doesn't know how to
60 extract all the needed information, task that InfoExtractors do, it
61 has to pass the URL to one of them.
63 For this, file downloader objects have a method that allows
64 InfoExtractors to be registered in a given order. When it is passed
65 a URL, the file downloader handles it to the first InfoExtractor it
66 finds that reports being able to handle it. The InfoExtractor returns
67 all the information to the FileDownloader and the latter downloads the
68 file or does whatever it's instructed to do.
70 File downloaders accept a lot of parameters. In order not to saturate
71 the object constructor with arguments, it receives a dictionary of
72 options instead. These options are available through the get_params()
73 method for the InfoExtractors to use. The FileDownloader also registers
74 itself as the downloader in charge for the InfoExtractors that are
75 added to it, so this is a "mutual registration".
79 username: Username for authentication purposes.
80 password: Password for authentication purposes.
81 usenetrc: Use netrc for authentication instead.
82 quiet: Do not print messages to stdout.
83 forceurl: Force printing final URL.
84 forcetitle: Force printing title.
85 simulate: Do not download the video files.
86 format: Video format code.
87 outtmpl: Template for output names.
88 ignoreerrors: Do not stop on download errors.
89 ratelimit: Download speed limit, in bytes/sec.
96 def __init__(self, params):
97 """Create a FileDownloader object with the given options."""
100 self.set_params(params)
103 def pmkdir(filename):
104 """Create directory components in filename. Similar to Unix "mkdir -p"."""
105 components = filename.split(os.sep)
106 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
107 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
108 for dir in aggregate:
109 if not os.path.exists(dir):
113 def format_bytes(bytes):
119 exponent = long(math.log(float(bytes), 1024.0))
120 suffix = 'bkMGTPEZY'[exponent]
121 converted = float(bytes) / float(1024**exponent)
122 return '%.2f%s' % (converted, suffix)
125 def calc_percent(byte_counter, data_len):
128 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
131 def calc_eta(start, now, total, current):
135 if current == 0 or dif < 0.001: # One millisecond
137 rate = float(current) / dif
138 eta = long((float(total) - float(current)) / rate)
139 (eta_mins, eta_secs) = divmod(eta, 60)
142 return '%02d:%02d' % (eta_mins, eta_secs)
145 def calc_speed(start, now, bytes):
147 if bytes == 0 or dif < 0.001: # One millisecond
148 return '%10s' % '---b/s'
149 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
152 def best_block_size(elapsed_time, bytes):
153 new_min = max(bytes / 2.0, 1.0)
154 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
155 if elapsed_time < 0.001:
157 rate = bytes / elapsed_time
165 def parse_bytes(bytestr):
166 """Parse a string indicating a byte quantity into a long integer."""
167 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
170 number = float(matchobj.group(1))
171 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
172 return long(round(number * multiplier))
174 def set_params(self, params):
175 """Sets parameters."""
176 if type(params) != dict:
177 raise ValueError('params: dictionary expected')
178 self._params = params
180 def get_params(self):
181 """Get parameters."""
184 def add_info_extractor(self, ie):
185 """Add an InfoExtractor object to the end of the list."""
187 ie.set_downloader(self)
189 def add_post_processor(self, pp):
190 """Add a PostProcessor object to the end of the chain."""
192 pp.set_downloader(self)
194 def to_stdout(self, message, skip_eol=False):
195 """Print message to stdout if not in quiet mode."""
196 if not self._params.get('quiet', False):
197 print u'%s%s' % (message, [u'\n', u''][skip_eol]),
200 def to_stderr(self, message):
201 """Print message to stderr."""
202 print >>sys.stderr, message
204 def fixed_template(self):
205 """Checks if the output template is fixed."""
206 return (re.search(ur'(?u)%\(.+?\)s', self._params['outtmpl']) is None)
208 def trouble(self, message=None):
209 """Determine action to take when a download problem appears.
211 Depending on if the downloader has been configured to ignore
212 download errors or not, this method may throw an exception or
213 not when errors are found, after printing the message. If it
214 doesn't raise, it returns an error code suitable to be returned
215 later as a program exit code to indicate error.
217 if message is not None:
218 self.to_stderr(message)
219 if not self._params.get('ignoreerrors', False):
220 raise DownloadError(message)
223 def slow_down(self, start_time, byte_counter):
224 """Sleep if the download speed is over the rate limit."""
225 rate_limit = self._params.get('ratelimit', None)
226 if rate_limit is None or byte_counter == 0:
229 elapsed = now - start_time
232 speed = float(byte_counter) / elapsed
233 if speed > rate_limit:
234 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
236 def report_destination(self, filename):
237 """Report destination filename."""
238 self.to_stdout(u'[download] Destination: %s' % filename)
240 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
241 """Report download progress."""
242 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
243 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
245 def report_finish(self):
246 """Report download finished."""
249 def download(self, url_list):
250 """Download a given list of URLs."""
252 if len(url_list) > 1 and self.fixed_template():
253 raise SameFileError(self._params['outtmpl'])
256 suitable_found = False
258 if not ie.suitable(url):
260 # Suitable InfoExtractor found
261 suitable_found = True
262 all_results = ie.extract(url)
263 results = [x for x in all_results if x is not None]
264 if len(results) != len(all_results):
265 retcode = self.trouble()
267 if len(results) > 1 and self.fixed_template():
268 raise SameFileError(self._params['outtmpl'])
270 for result in results:
272 if self._params.get('forcetitle', False):
273 print result['title']
274 if self._params.get('forceurl', False):
277 # Do nothing else if in simulate mode
278 if self._params.get('simulate', False):
282 filename = self._params['outtmpl'] % result
283 self.report_destination(filename)
284 except (ValueError, KeyError), err:
285 retcode = self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
288 self.pmkdir(filename)
289 except (OSError, IOError), err:
290 retcode = self.trouble('ERROR: unable to create directories: %s' % str(err))
293 outstream = open(filename, 'wb')
294 except (OSError, IOError), err:
295 retcode = self.trouble('ERROR: unable to open for writing: %s' % str(err))
298 self._do_download(outstream, result['url'])
300 except (OSError, IOError), err:
301 retcode = self.trouble('ERROR: unable to write video data: %s' % str(err))
303 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
304 retcode = self.trouble('ERROR: unable to download video data: %s' % str(err))
307 self.post_process(filename, result)
308 except (PostProcessingError), err:
309 retcode = self.trouble('ERROR: postprocessing: %s' % str(err))
313 if not suitable_found:
314 retcode = self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
318 def post_process(self, filename, ie_info):
319 """Run the postprocessing chain on the given file."""
321 info['filepath'] = filename
327 def _do_download(self, stream, url):
328 request = urllib2.Request(url, None, std_headers)
329 data = urllib2.urlopen(request)
330 data_len = data.info().get('Content-length', None)
331 data_len_str = self.format_bytes(data_len)
337 percent_str = self.calc_percent(byte_counter, data_len)
338 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
339 speed_str = self.calc_speed(start, time.time(), byte_counter)
340 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
344 data_block = data.read(block_size)
346 data_block_len = len(data_block)
347 if data_block_len == 0:
349 byte_counter += data_block_len
350 stream.write(data_block)
351 block_size = self.best_block_size(after - before, data_block_len)
354 self.slow_down(start, byte_counter)
357 if data_len is not None and str(byte_counter) != data_len:
358 raise ValueError('Content too short: %s/%s bytes' % (byte_counter, data_len))
360 class InfoExtractor(object):
361 """Information Extractor class.
363 Information extractors are the classes that, given a URL, extract
364 information from the video (or videos) the URL refers to. This
365 information includes the real video URL, the video title and simplified
366 title, author and others. It is returned in a list of dictionaries when
367 calling its extract() method. It is a list because a URL can refer to
368 more than one video (think of playlists). The dictionaries must include
369 the following fields:
371 id: Video identifier.
372 url: Final video URL.
373 uploader: Nickname of the video uploader.
374 title: Literal title.
375 stitle: Simplified title.
376 ext: Video filename extension.
378 Subclasses of this one should re-define the _real_initialize() and
379 _real_extract() methods, as well as the suitable() static method.
380 Probably, they should also be instantiated and added to the main
387 def __init__(self, downloader=None):
388 """Constructor. Receives an optional downloader."""
390 self.set_downloader(downloader)
394 """Receives a URL and returns True if suitable for this IE."""
397 def initialize(self):
398 """Initializes an instance (authentication, etc)."""
400 self._real_initialize()
403 def extract(self, url):
404 """Extracts URL information and returns it in list of dicts."""
406 return self._real_extract(url)
408 def set_downloader(self, downloader):
409 """Sets the downloader for this IE."""
410 self._downloader = downloader
412 def to_stdout(self, message):
413 """Print message to stdout if downloader is not in quiet mode."""
414 if self._downloader is None or not self._downloader.get_params().get('quiet', False):
417 def to_stderr(self, message):
418 """Print message to stderr."""
419 print >>sys.stderr, message
421 def _real_initialize(self):
422 """Real initialization process. Redefine in subclasses."""
425 def _real_extract(self, url):
426 """Real extraction process. Redefine in subclasses."""
429 class YoutubeIE(InfoExtractor):
430 """Information extractor for youtube.com."""
432 _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
433 _LOGIN_URL = 'http://www.youtube.com/login?next=/'
434 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/'
435 _NETRC_MACHINE = 'youtube'
439 return (re.match(YoutubeIE._VALID_URL, url) is not None)
441 def report_login(self):
442 """Report attempt to log in."""
443 self.to_stdout(u'[youtube] Logging in')
445 def report_age_confirmation(self):
446 """Report attempt to confirm age."""
447 self.to_stdout(u'[youtube] Confirming age')
449 def report_webpage_download(self, video_id):
450 """Report attempt to download webpage."""
451 self.to_stdout(u'[youtube] %s: Downloading video webpage' % video_id)
453 def report_information_extraction(self, video_id):
454 """Report attempt to extract video information."""
455 self.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
457 def report_video_url(self, video_id, video_real_url):
458 """Report extracted video URL."""
459 self.to_stdout(u'[youtube] %s: URL: %s' % (video_id, video_real_url))
461 def _real_initialize(self):
462 if self._downloader is None:
467 downloader_params = self._downloader.get_params()
469 # Attempt to use provided username and password or .netrc data
470 if downloader_params.get('username', None) is not None:
471 username = downloader_params['username']
472 password = downloader_params['password']
473 elif downloader_params.get('usenetrc', False):
475 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
480 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
481 except (IOError, netrc.NetrcParseError), err:
482 self.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
485 # No authentication to be performed
491 'current_form': 'loginForm',
493 'action_login': 'Log In',
494 'username': username,
495 'password': password,
497 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
500 login_results = urllib2.urlopen(request).read()
501 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
502 self.to_stderr(u'WARNING: unable to log in: bad username or password')
504 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
505 self.to_stderr(u'WARNING: unable to log in: %s' % str(err))
511 'action_confirm': 'Confirm',
513 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
515 self.report_age_confirmation()
516 age_results = urllib2.urlopen(request).read()
517 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
518 self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
521 def _real_extract(self, url):
522 # Extract video id from URL
523 mobj = re.match(self._VALID_URL, url)
525 self.to_stderr(u'ERROR: invalid URL: %s' % url)
527 video_id = mobj.group(2)
529 # Downloader parameters
531 if self._downloader is not None:
532 params = self._downloader.get_params()
533 format_param = params.get('format', None)
536 video_extension = {'18': 'mp4', '17': '3gp'}.get(format_param, 'flv')
538 # Normalize URL, including format
539 normalized_url = 'http://www.youtube.com/watch?v=%s' % video_id
540 if format_param is not None:
541 normalized_url = '%s&fmt=%s' % (normalized_url, format_param)
542 request = urllib2.Request(normalized_url, None, std_headers)
544 self.report_webpage_download(video_id)
545 video_webpage = urllib2.urlopen(request).read()
546 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
547 self.to_stderr(u'ERROR: unable to download video webpage: %s' % str(err))
549 self.report_information_extraction(video_id)
552 mobj = re.search(r', "t": "([^"]+)"', video_webpage)
554 self.to_stderr(u'ERROR: unable to extract "t" parameter')
556 video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s' % (video_id, mobj.group(1))
557 if format_param is not None:
558 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
559 self.report_video_url(video_id, video_real_url)
562 mobj = re.search(r"var watchUsername = '([^']+)';", video_webpage)
564 self.to_stderr(u'ERROR: unable to extract uploader nickname')
566 video_uploader = mobj.group(1)
569 mobj = re.search(r'(?im)<title>YouTube - ([^<]*)</title>', video_webpage)
571 self.to_stderr(u'ERROR: unable to extract video title')
573 video_title = mobj.group(1).decode('utf-8')
574 video_title = re.sub(ur'(?u)&(.+?);', lambda x: unichr(htmlentitydefs.name2codepoint[x.group(1)]), video_title)
575 video_title = video_title.replace(os.sep, u'%')
578 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
579 simple_title = simple_title.strip(ur'_')
583 'id': video_id.decode('utf-8'),
584 'url': video_real_url.decode('utf-8'),
585 'uploader': video_uploader.decode('utf-8'),
586 'title': video_title,
587 'stitle': simple_title,
588 'ext': video_extension.decode('utf-8'),
591 class MetacafeIE(InfoExtractor):
592 """Information Extractor for metacafe.com."""
594 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
595 _DISCLAIMER = 'http://www.metacafe.com/disclaimer'
598 def __init__(self, youtube_ie, downloader=None):
599 InfoExtractor.__init__(self, downloader)
600 self._youtube_ie = youtube_ie
604 return (re.match(MetacafeIE._VALID_URL, url) is not None)
606 def report_disclaimer(self):
607 """Report disclaimer retrieval."""
608 self.to_stdout(u'[metacafe] Retrieving disclaimer')
610 def report_age_confirmation(self):
611 """Report attempt to confirm age."""
612 self.to_stdout(u'[metacafe] Confirming age')
614 def report_download_webpage(self, video_id):
615 """Report webpage download."""
616 self.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
618 def report_extraction(self, video_id):
619 """Report information extraction."""
620 self.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
622 def _real_initialize(self):
623 # Retrieve disclaimer
624 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
626 self.report_disclaimer()
627 disclaimer = urllib2.urlopen(request).read()
628 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
629 self.to_stderr(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
634 'allowAdultContent': '1',
635 'submit': "Continue - I'm over 18",
637 request = urllib2.Request('http://www.metacafe.com/watch/', urllib.urlencode(disclaimer_form), std_headers)
639 self.report_age_confirmation()
640 disclaimer = urllib2.urlopen(request).read()
641 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
642 self.to_stderr(u'ERROR: unable to confirm age: %s' % str(err))
645 def _real_extract(self, url):
646 # Extract id and simplified title from URL
647 mobj = re.match(self._VALID_URL, url)
649 self.to_stderr(u'ERROR: invalid URL: %s' % url)
652 video_id = mobj.group(1)
654 # Check if video comes from YouTube
655 mobj2 = re.match(r'^yt-(.*)$', video_id)
656 if mobj2 is not None:
657 return self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
659 simple_title = mobj.group(2).decode('utf-8')
660 video_extension = 'flv'
662 # Retrieve video webpage to extract further information
663 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
665 self.report_download_webpage(video_id)
666 webpage = urllib2.urlopen(request).read()
667 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
668 self.to_stderr(u'ERROR: unable retrieve video webpage: %s' % str(err))
671 # Extract URL, uploader and title from webpage
672 self.report_extraction(video_id)
673 mobj = re.search(r'(?m)"mediaURL":"(http.*?\.flv)"', webpage)
675 self.to_stderr(u'ERROR: unable to extract media URL')
677 mediaURL = mobj.group(1).replace('\\', '')
679 mobj = re.search(r'(?m)"gdaKey":"(.*?)"', webpage)
681 self.to_stderr(u'ERROR: unable to extract gdaKey')
683 gdaKey = mobj.group(1)
685 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
687 mobj = re.search(r'(?im)<meta name="title" content="Metacafe - ([^"]+)"', webpage)
689 self.to_stderr(u'ERROR: unable to extract title')
691 video_title = mobj.group(1).decode('utf-8')
693 mobj = re.search(r'(?m)<li id="ChnlUsr">.*?Submitter:<br />(.*?)</li>', webpage)
695 self.to_stderr(u'ERROR: unable to extract uploader nickname')
697 video_uploader = re.sub(r'<.*?>', '', mobj.group(1))
701 'id': video_id.decode('utf-8'),
702 'url': video_url.decode('utf-8'),
703 'uploader': video_uploader.decode('utf-8'),
704 'title': video_title,
705 'stitle': simple_title,
706 'ext': video_extension.decode('utf-8'),
709 class YoutubePlaylistIE(InfoExtractor):
710 """Information Extractor for YouTube playlists."""
712 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/view_play_list\?p=(.+)'
713 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s'
714 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
715 _MORE_PAGES_INDICATOR = r'/view_play_list?p=%s&page=%s'
718 def __init__(self, youtube_ie, downloader=None):
719 InfoExtractor.__init__(self, downloader)
720 self._youtube_ie = youtube_ie
724 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
726 def report_download_page(self, playlist_id, pagenum):
727 """Report attempt to download playlist page with given number."""
728 self.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
730 def _real_initialize(self):
731 self._youtube_ie.initialize()
733 def _real_extract(self, url):
734 # Extract playlist id
735 mobj = re.match(self._VALID_URL, url)
737 self.to_stderr(u'ERROR: invalid url: %s' % url)
740 # Download playlist pages
741 playlist_id = mobj.group(1)
746 self.report_download_page(playlist_id, pagenum)
747 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
749 page = urllib2.urlopen(request).read()
750 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
751 self.to_stderr(u'ERROR: unable to download webpage: %s' % str(err))
754 # Extract video identifiers
756 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
757 ids_in_page.add(mobj.group(1))
758 video_ids.extend(list(ids_in_page))
760 if (self._MORE_PAGES_INDICATOR % (playlist_id, pagenum + 1)) not in page:
762 pagenum = pagenum + 1
766 information.extend(self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id))
769 class PostProcessor(object):
770 """Post Processor class.
772 PostProcessor objects can be added to downloaders with their
773 add_post_processor() method. When the downloader has finished a
774 successful download, it will take its internal chain of PostProcessors
775 and start calling the run() method on each one of them, first with
776 an initial argument and then with the returned value of the previous
779 The chain will be stopped if one of them ever returns None or the end
780 of the chain is reached.
782 PostProcessor objects follow a "mutual registration" process similar
783 to InfoExtractor objects.
788 def __init__(self, downloader=None):
789 self._downloader = downloader
791 def to_stdout(self, message):
792 """Print message to stdout if downloader is not in quiet mode."""
793 if self._downloader is None or not self._downloader.get_params().get('quiet', False):
796 def to_stderr(self, message):
797 """Print message to stderr."""
798 print >>sys.stderr, message
800 def set_downloader(self, downloader):
801 """Sets the downloader for this PP."""
802 self._downloader = downloader
804 def run(self, information):
805 """Run the PostProcessor.
807 The "information" argument is a dictionary like the ones
808 returned by InfoExtractors. The only difference is that this
809 one has an extra field called "filepath" that points to the
812 When this method returns None, the postprocessing chain is
813 stopped. However, this method may return an information
814 dictionary that will be passed to the next postprocessing
815 object in the chain. It can be the one it received after
816 changing some fields.
818 In addition, this method may raise a PostProcessingError
819 exception that will be taken into account by the downloader
822 return information # by default, do nothing
825 if __name__ == '__main__':
827 # Modules needed only when running the main program
831 # General configuration
832 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
833 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
834 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
837 parser = optparse.OptionParser(
838 usage='Usage: %prog [options] url...',
839 version='2008.08.09',
840 conflict_handler='resolve',
842 parser.add_option('-h', '--help',
843 action='help', help='print this help text and exit')
844 parser.add_option('-v', '--version',
845 action='version', help='print program version and exit')
846 parser.add_option('-u', '--username',
847 dest='username', metavar='UN', help='account username')
848 parser.add_option('-p', '--password',
849 dest='password', metavar='PW', help='account password')
850 parser.add_option('-o', '--output',
851 dest='outtmpl', metavar='TPL', help='output filename template')
852 parser.add_option('-q', '--quiet',
853 action='store_true', dest='quiet', help='activates quiet mode', default=False)
854 parser.add_option('-s', '--simulate',
855 action='store_true', dest='simulate', help='do not download video', default=False)
856 parser.add_option('-t', '--title',
857 action='store_true', dest='usetitle', help='use title in file name', default=False)
858 parser.add_option('-l', '--literal',
859 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
860 parser.add_option('-n', '--netrc',
861 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
862 parser.add_option('-g', '--get-url',
863 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
864 parser.add_option('-e', '--get-title',
865 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
866 parser.add_option('-f', '--format',
867 dest='format', metavar='FMT', help='video format code')
868 parser.add_option('-b', '--best-quality',
869 action='store_const', dest='format', help='alias for -f 18', const='18')
870 parser.add_option('-m', '--mobile-version',
871 action='store_const', dest='format', help='alias for -f 17', const='17')
872 parser.add_option('-i', '--ignore-errors',
873 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
874 parser.add_option('-r', '--rate-limit',
875 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
876 (opts, args) = parser.parse_args()
878 # Conflicting, missing and erroneous options
880 sys.exit(u'ERROR: you must provide at least one URL')
881 if opts.usenetrc and (opts.username is not None or opts.password is not None):
882 sys.exit(u'ERROR: using .netrc conflicts with giving username/password')
883 if opts.password is not None and opts.username is None:
884 sys.exit(u'ERROR: account username missing')
885 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
886 sys.exit(u'ERROR: using output template conflicts with using title or literal title')
887 if opts.usetitle and opts.useliteral:
888 sys.exit(u'ERROR: using title conflicts with using literal title')
889 if opts.username is not None and opts.password is None:
890 opts.password = getpass.getpass(u'Type account password and press return:')
891 if opts.ratelimit is not None:
892 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
893 if numeric_limit is None:
894 sys.exit(u'ERROR: invalid rate limit specified')
895 opts.ratelimit = numeric_limit
897 # Information extractors
898 youtube_ie = YoutubeIE()
899 metacafe_ie = MetacafeIE(youtube_ie)
900 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
903 fd = FileDownloader({
904 'usenetrc': opts.usenetrc,
905 'username': opts.username,
906 'password': opts.password,
907 'quiet': (opts.quiet or opts.geturl or opts.gettitle),
908 'forceurl': opts.geturl,
909 'forcetitle': opts.gettitle,
910 'simulate': (opts.simulate or opts.geturl or opts.gettitle),
911 'format': opts.format,
912 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode())
913 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
914 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
915 or u'%(id)s.%(ext)s'),
916 'ignoreerrors': opts.ignoreerrors,
917 'ratelimit': opts.ratelimit,
919 fd.add_info_extractor(youtube_pl_ie)
920 fd.add_info_extractor(metacafe_ie)
921 fd.add_info_extractor(youtube_ie)
922 retcode = fd.download(args)
925 except DownloadError:
927 except SameFileError:
928 sys.exit(u'ERROR: fixed output name but more than one file to download')
929 except KeyboardInterrupt:
930 sys.exit(u'\nERROR: Interrupted by user')