X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube-dl;h=5aff9c08c1aab97a14909e76a0cb042f78345d6a;hb=8c5dc3ad4024eab1d167fb62a92eeabf7d895e59;hp=0d77585a80b931a17c08e0ada94b301e448ac5db;hpb=0ac22e4f5a652f1b470f9daff06c1361e8f93c16;p=youtube-dl diff --git a/youtube-dl b/youtube-dl index 0d77585a8..5aff9c08c 100755 --- a/youtube-dl +++ b/youtube-dl @@ -1,20 +1,26 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# Author: Ricardo Garcia Gonzalez -# Author: Danny Colligan -# Author: Benjamin Johnson -# Author: Vasyl' Vavrychuk -# Author: Witold Baryluk -# Author: Paweł Paprota -# Author: Gergely Imreh -# Author: Philipp Hagemeister -# License: Public domain code -from __future__ import with_statement -import contextlib + +__author__ = ( + 'Ricardo Garcia Gonzalez', + 'Danny Colligan', + 'Benjamin Johnson', + 'Vasyl\' Vavrychuk', + 'Witold Baryluk', + 'Paweł Paprota', + 'Gergely Imreh', + 'Rogério Brito', + 'Philipp Hagemeister', + 'Sören Schulze', + ) + +__license__ = 'Public Domain' +__version__ = '2011.09.14' + +UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' + import cookielib -import ctypes import datetime -import email.utils import gzip import htmlentitydefs import httplib @@ -34,6 +40,13 @@ import urllib2 import warnings import zlib +if os.name == 'nt': + import ctypes + +try: + import email.utils +except ImportError: # Python 2.4 + import email.Utils try: import cStringIO as StringIO except ImportError: @@ -47,11 +60,16 @@ except ImportError: try: import lxml.etree -except ImportError: # Python < 2.6 +except ImportError: pass # Handled below +try: + import xml.etree.ElementTree +except ImportError: # Python<2.5 + pass # Not officially supported, but let it slip + std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate', @@ -189,6 +207,7 @@ def preferredencoding(): yield pref return yield_preferredencoding().next() + def htmlentity_transform(matchobj): """Transforms an HTML entity to a Unicode character. @@ -215,11 +234,13 @@ def htmlentity_transform(matchobj): # Unknown entity in name, return its literal representation return (u'&%s;' % entity) + def sanitize_title(utitle): """Sanitizes a video title so it could be used as part of a filename.""" utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle) return utitle.replace(unicode(os.sep), u'%') + def sanitize_open(filename, open_mode): """Try to open the given filename, and slightly tweak it if this fails. @@ -246,13 +267,15 @@ def sanitize_open(filename, open_mode): stream = open(filename, open_mode) return (stream, filename) + def timeconvert(timestr): - """Convert RFC 2822 defined time string into system timestamp""" - timestamp = None - timetuple = email.utils.parsedate_tz(timestr) - if timetuple is not None: - timestamp = email.utils.mktime_tz(timetuple) - return timestamp + """Convert RFC 2822 defined time string into system timestamp""" + timestamp = None + timetuple = email.utils.parsedate_tz(timestr) + if timetuple is not None: + timestamp = email.utils.mktime_tz(timetuple) + return timestamp + class DownloadError(Exception): """Download Error exception. @@ -263,6 +286,7 @@ class DownloadError(Exception): """ pass + class SameFileError(Exception): """Same File exception. @@ -271,6 +295,7 @@ class SameFileError(Exception): """ pass + class PostProcessingError(Exception): """Post Processing exception. @@ -279,6 +304,7 @@ class PostProcessingError(Exception): """ pass + class UnavailableVideoError(Exception): """Unavailable Format exception. @@ -287,6 +313,7 @@ class UnavailableVideoError(Exception): """ pass + class ContentTooShortError(Exception): """Content Too Short exception. @@ -302,6 +329,7 @@ class ContentTooShortError(Exception): self.downloaded = downloaded self.expected = expected + class YoutubeDLHandler(urllib2.HTTPHandler): """Handler for HTTP requests and responses. @@ -311,11 +339,11 @@ class YoutubeDLHandler(urllib2.HTTPHandler): a particular request, the original request in the program code only has to include the HTTP header "Youtubedl-No-Compression", which will be removed before making the real request. - + Part of this code was copied from: - http://techknack.net/python-urllib2-handlers/ - + http://techknack.net/python-urllib2-handlers/ + Andrew Rowls, the author of that code, agreed to release it to the public domain. """ @@ -326,7 +354,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): return zlib.decompress(data, -zlib.MAX_WBITS) except zlib.error: return zlib.decompress(data) - + @staticmethod def addinfourl_wrapper(stream, headers, url, code): if hasattr(urllib2.addinfourl, 'getcode'): @@ -334,7 +362,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): ret = urllib2.addinfourl(stream, headers, url) ret.code = code return ret - + def http_request(self, req): for h in std_headers: if h in req.headers: @@ -360,6 +388,7 @@ class YoutubeDLHandler(urllib2.HTTPHandler): resp.msg = old_resp.msg return resp + class FileDownloader(object): """File Downloader class. @@ -432,16 +461,6 @@ class FileDownloader(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params - @staticmethod - def pmkdir(filename): - """Create directory components in filename. Similar to Unix "mkdir -p".""" - components = filename.split(os.sep) - aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))] - aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator - for dir in aggregate: - if not os.path.exists(dir): - os.mkdir(dir) - @staticmethod def format_bytes(bytes): if bytes is None: @@ -453,7 +472,7 @@ class FileDownloader(object): else: exponent = long(math.log(bytes, 1024.0)) suffix = 'bkMGTPEZY'[exponent] - converted = float(bytes) / float(1024**exponent) + converted = float(bytes) / float(1024 ** exponent) return '%.2f%s' % (converted, suffix) @staticmethod @@ -591,7 +610,7 @@ class FileDownloader(object): os.rename(old_filename, new_filename) except (IOError, OSError), err: self.trouble(u'ERROR: unable to rename file') - + def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" if last_modified_hdr is None: @@ -605,7 +624,7 @@ class FileDownloader(object): if filetime is None: return try: - os.utime(filename,(time.time(), filetime)) + os.utime(filename, (time.time(), filetime)) except: pass @@ -698,19 +717,24 @@ class FileDownloader(object): return try: - self.pmkdir(filename) + dn = os.path.dirname(filename) + if dn != '' and not os.path.exists(dn): + os.makedirs(dn) except (OSError, IOError), err: - self.trouble(u'ERROR: unable to create directories: %s' % str(err)) + self.trouble(u'ERROR: unable to create directory ' + unicode(err)) return if self.params.get('writedescription', False): try: descfn = filename + '.description' self.report_writedescription(descfn) - with contextlib.closing(open(descfn, 'wb')) as descfile: + descfile = open(descfn, 'wb') + try: descfile.write(info_dict['description'].encode('utf-8')) + finally: + descfile.close() except (OSError, IOError): - self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) + self.trouble(u'ERROR: Cannot write description file ' + descfn) return if self.params.get('writeinfojson', False): @@ -722,10 +746,13 @@ class FileDownloader(object): self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') return try: - with contextlib.closing(open(infofn, 'wb')) as infof: + infof = open(infofn, 'wb') + try: json.dump(info_dict, infof) + finally: + infof.close() except (OSError, IOError): - self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn)) + self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) return try: @@ -805,6 +832,11 @@ class FileDownloader(object): cursize = os.path.getsize(tmpfilename) if prevsize == cursize and retval == 1: break + # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those + if prevsize == cursize and retval == 2 and cursize > 1024: + self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') + retval = 0 + break if retval == 0: self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename)) self.try_rename(tmpfilename, filename) @@ -841,7 +873,7 @@ class FileDownloader(object): # Request parameters in case of being able to resume if self.params.get('continuedl', False) and resume_len != 0: self.report_resuming_byte(resume_len) - request.add_header('Range','bytes=%d-' % resume_len) + request.add_header('Range', 'bytes=%d-' % resume_len) open_mode = 'ab' count = 0 @@ -867,7 +899,7 @@ class FileDownloader(object): else: # Examine the reported length if (content_length is not None and - (resume_len - 100 < long(content_length) < resume_len + 100)): + (resume_len - 100 < long(content_length) < resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, @@ -912,6 +944,7 @@ class FileDownloader(object): if stream is None: try: (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) + assert stream is not None filename = self.undo_temp_name(tmpfilename) self.report_destination(filename) except (OSError, IOError), err: @@ -933,6 +966,9 @@ class FileDownloader(object): # Apply rate limit self.slow_down(start, byte_counter - resume_len) + if stream is None: + self.trouble(u'\nERROR: Did not get any data blocks') + return False stream.close() self.report_finish() if data_len is not None and byte_counter != data_len: @@ -945,6 +981,7 @@ class FileDownloader(object): return True + class InfoExtractor(object): """Information Extractor class. @@ -1016,16 +1053,17 @@ class InfoExtractor(object): """Real extraction process. Redefine in subclasses.""" pass + class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' _NETRC_MACHINE = 'youtube' # Listed in order of quality - _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] + _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13'] _video_extensions = { '13': '3gp', '17': 'mp4', @@ -1170,7 +1208,7 @@ class YoutubeIE(InfoExtractor): self.report_video_info_webpage_download(video_id) for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' - % (video_id, el_type)) + % (video_id, el_type)) request = urllib2.Request(video_info_url) try: video_info_webpage = urllib2.urlopen(request).read() @@ -1248,11 +1286,15 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') - url_data = [dict(pairStr.split('=') for pairStr in uds.split('&')) for uds in url_data_strs] - url_map = dict((ud['itag'], urllib.unquote(ud['url'])) for ud in url_data) - + url_data = [parse_qs(uds) for uds in url_data_strs] + url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) + url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) + format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] @@ -1272,13 +1314,8 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'ERROR: requested format not available') return video_url_list = [(req_format, url_map[req_format])] # Specific format - - elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_url_list = [(None, video_info['conn'][0])] - else: - self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') return for format_param, video_real_url in video_url_list: @@ -1288,7 +1325,6 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -1492,6 +1528,7 @@ class DailymotionIE(InfoExtractor): # Retrieve video webpage to extract further information request = urllib2.Request(url) + request.add_header('Cookie', 'family_filter=off') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -1501,25 +1538,29 @@ class DailymotionIE(InfoExtractor): # Extract URL, uploader and title from webpage self.report_extraction(video_id) - mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage) + mobj = re.search(r'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract media URL') return - mediaURL = urllib.unquote(mobj.group(1)) + sequence = urllib.unquote(mobj.group(1)) + mobj = re.search(r',\"sdURL\"\:\"([^\"]+?)\",', sequence) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + mediaURL = urllib.unquote(mobj.group(1)).replace('\\', '') # if needed add http://www.dailymotion.com/ if relative URL video_url = mediaURL - # '' - mobj = re.search(r'(?im)Dailymotion\s*[\-:]\s*(.+?)', webpage) + mobj = re.search(r'(?im)Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract title') return video_title = mobj.group(1).decode('utf-8') video_title = sanitize_title(video_title) - mobj = re.search(r'(?im)(.+?)', webpage) + mobj = re.search(r'(?im)[^<]+?]+?>([^<]+?)', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -1541,6 +1582,7 @@ class DailymotionIE(InfoExtractor): except UnavailableVideoError: self._downloader.trouble(u'\nERROR: unable to download video') + class GoogleIE(InfoExtractor): """Information extractor for video.google.com.""" @@ -1634,7 +1676,6 @@ class GoogleIE(InfoExtractor): else: # we need something to pass to process_info video_thumbnail = '' - try: # Process video information self._downloader.process_info({ @@ -1834,7 +1875,8 @@ class YahooIE(InfoExtractor): self._downloader.trouble(u'ERROR: unable to extract video description') return video_description = mobj.group(1).decode('utf-8') - if not video_description: video_description = 'No description available.' + if not video_description: + video_description = 'No description available.' # Extract video height and width mobj = re.search(r'', webpage) @@ -1855,8 +1897,8 @@ class YahooIE(InfoExtractor): yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents yv_bitrate = '700' # according to Wikipedia this is hard-coded request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id + - '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + - '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') + '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height + + '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') try: self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() @@ -1885,13 +1927,128 @@ class YahooIE(InfoExtractor): 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'thumbnail': video_thumbnail, - 'description': video_description, 'player_url': None, }) except UnavailableVideoError: self._downloader.trouble(u'\nERROR: unable to download video') +class VimeoIE(InfoExtractor): + """Information extractor for vimeo.com.""" + + # _VALID_URL matches Vimeo URLs + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(VimeoIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[vimeo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[vimeo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self, url, new_video=True): + # Extract ID from URL + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) + return + + # At this point we have a new video + self._downloader.increment_downloads() + video_id = mobj.group(1) + + # Retrieve video webpage to extract further information + request = urllib2.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id, None, std_headers) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + # Now we begin extracting as much information as we can from what we + # retrieved. First we extract the information common to all extractors, + # and latter we extract those that are Vimeo specific. + self.report_extraction(video_id) + + # Extract title + mobj = re.search(r'(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video title') + return + video_title = mobj.group(1).decode('utf-8') + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) + + # Extract uploader + mobj = re.search(r'http://vimeo.com/(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video uploader') + return + video_uploader = mobj.group(1).decode('utf-8') + + # Extract video thumbnail + mobj = re.search(r'(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract video thumbnail') + return + video_thumbnail = mobj.group(1).decode('utf-8') + + # # Extract video description + # mobj = re.search(r'', webpage) + # if mobj is None: + # self._downloader.trouble(u'ERROR: unable to extract video description') + # return + # video_description = mobj.group(1).decode('utf-8') + # if not video_description: video_description = 'No description available.' + video_description = 'Foo.' + + # Vimeo specific: extract request signature + mobj = re.search(r'(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract request signature') + return + sig = mobj.group(1).decode('utf-8') + + # Vimeo specific: Extract request signature expiration + mobj = re.search(r'(.*?)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract request signature expiration') + return + sig_exp = mobj.group(1).decode('utf-8') + + video_url = "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id, sig, sig_exp) + + try: + # Process video information + self._downloader.process_info({ + 'id': video_id.decode('utf-8'), + 'url': video_url, + 'uploader': video_uploader, + 'upload_date': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': u'mp4', + 'thumbnail': video_thumbnail.decode('utf-8'), + 'description': video_description, + 'thumbnail': video_thumbnail, + 'description': video_description, + 'player_url': None, + }) + except UnavailableVideoError: + self._downloader.trouble(u'ERROR: unable to download video') + + class GenericIE(InfoExtractor): """Generic last-resort information extractor.""" @@ -1949,11 +2106,11 @@ class GenericIE(InfoExtractor): return video_url = urllib.unquote(mobj.group(1)) - video_id = os.path.basename(video_url) + video_id = os.path.basename(video_url) # here's a fun little line of code for you: video_extension = os.path.splitext(video_id)[1][1:] - video_id = os.path.splitext(video_id)[0] + video_id = os.path.splitext(video_id)[0] # it's tempting to parse this further, but you would # have to take into account all the variations like @@ -2026,7 +2183,7 @@ class YoutubeSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -2040,7 +2197,7 @@ class YoutubeSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_youtube_results: - self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) + self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) n = self._max_youtube_results self._download_n_results(query, n) return @@ -2084,6 +2241,7 @@ class YoutubeSearchIE(InfoExtractor): pagenum = pagenum + 1 + class GoogleSearchIE(InfoExtractor): """Information Extractor for Google Video search queries.""" _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' @@ -2117,7 +2275,7 @@ class GoogleSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -2131,7 +2289,7 @@ class GoogleSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_google_results: - self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) + self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) n = self._max_google_results self._download_n_results(query, n) return @@ -2175,6 +2333,7 @@ class GoogleSearchIE(InfoExtractor): pagenum = pagenum + 1 + class YahooSearchIE(InfoExtractor): """Information Extractor for Yahoo! Video search queries.""" _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' @@ -2208,7 +2367,7 @@ class YahooSearchIE(InfoExtractor): prefix, query = query.split(':') prefix = prefix[8:] - query = query.encode('utf-8') + query = query.encode('utf-8') if prefix == '': self._download_n_results(query, 1) return @@ -2222,7 +2381,7 @@ class YahooSearchIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) return elif n > self._max_yahoo_results: - self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) + self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) n = self._max_yahoo_results self._download_n_results(query, n) return @@ -2266,10 +2425,11 @@ class YahooSearchIE(InfoExtractor): pagenum = pagenum + 1 + class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*' @@ -2342,6 +2502,7 @@ class YoutubePlaylistIE(InfoExtractor): self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return + class YoutubeUserIE(InfoExtractor): """Information Extractor for YouTube users.""" @@ -2363,7 +2524,7 @@ class YoutubeUserIE(InfoExtractor): def report_download_page(self, username, start_index): """Report attempt to download user page.""" self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % - (username, start_index, start_index + self._GDATA_PAGE_SIZE)) + (username, start_index, start_index + self._GDATA_PAGE_SIZE)) def _real_initialize(self): self._youtube_ie.initialize() @@ -2425,9 +2586,9 @@ class YoutubeUserIE(InfoExtractor): video_ids = video_ids[playliststart:] else: video_ids = video_ids[playliststart:playlistend] - + self._downloader.to_screen("[youtube] user %s: Collected %d video ids (downloading %d of them)" % - (username, all_ids_count, len(video_ids))) + (username, all_ids_count, len(video_ids))) for video_id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) @@ -2512,6 +2673,7 @@ class DepositFilesIE(InfoExtractor): except UnavailableVideoError, err: self._downloader.trouble(u'ERROR: unable to download file') + class FacebookIE(InfoExtractor): """Information Extractor for Facebook""" @@ -2713,7 +2875,6 @@ class FacebookIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'mp4') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -2735,7 +2896,7 @@ class FacebookIE(InfoExtractor): class BlipTVIE(InfoExtractor): """Information extractor for blip.tv""" - _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip.tv(/.+)$' + _VALID_URL = r'^(?:https?://)?(?:\w+\.)?blip\.tv(/.+)$' _URL_EXT = r'^.*\.([a-z0-9]+)$' @staticmethod @@ -2757,7 +2918,11 @@ class BlipTVIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' + if '?' in url: + cchar = '&' + else: + cchar = '?' + json_url = url + cchar + 'skin=json&version=2&no_wrap=1' request = urllib2.Request(json_url) self.report_extraction(mobj.group(1)) try: @@ -2767,7 +2932,10 @@ class BlipTVIE(InfoExtractor): return try: json_data = json.loads(json_code) - data = json_data['Post'] if 'Post' in json_data else json_data + if 'Post' in json_data: + data = json_data['Post'] + else: + data = json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') video_url = data['media']['url'] @@ -2801,6 +2969,226 @@ class BlipTVIE(InfoExtractor): self._downloader.trouble(u'\nERROR: unable to download video') +class MyVideoIE(InfoExtractor): + """Information Extractor for myvideo.de.""" + + _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + @staticmethod + def suitable(url): + return (re.match(MyVideoIE._VALID_URL, url) is not None) + + def report_download_webpage(self, video_id): + """Report webpage download.""" + self._downloader.to_screen(u'[myvideo] %s: Downloading webpage' % video_id) + + def report_extraction(self, video_id): + """Report information extraction.""" + self._downloader.to_screen(u'[myvideo] %s: Extracting information' % video_id) + + def _real_initialize(self): + return + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._download.trouble(u'ERROR: invalid URL: %s' % url) + return + + video_id = mobj.group(1) + simple_title = mobj.group(2).decode('utf-8') + # should actually not be necessary + simple_title = sanitize_title(simple_title) + simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', simple_title) + + # Get video webpage + request = urllib2.Request('http://www.myvideo.de/watch/%s' % video_id) + try: + self.report_download_webpage(video_id) + webpage = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + mobj = re.search(r'', + webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract media URL') + return + video_url = mobj.group(1) + ('/%s.flv' % video_id) + + mobj = re.search('([^<]+)', webpage) + if mobj is None: + self._downloader.trouble(u'ERROR: unable to extract title') + return + + video_title = mobj.group(1) + video_title = sanitize_title(video_title) + + try: + print(video_url) + self._downloader.process_info({ + 'id': video_id, + 'url': video_url, + 'uploader': u'NA', + 'upload_date': u'NA', + 'title': video_title, + 'stitle': simple_title, + 'ext': u'flv', + 'format': u'NA', + 'player_url': None, + }) + except UnavailableVideoError: + self._downloader.trouble(u'\nERROR: Unable to download video') + +class ComedyCentralIE(InfoExtractor): + """Information extractor for The Daily Show and Colbert Report """ + + _VALID_URL = r'^(:(?Ptds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)(?Pthedailyshow|colbertnation)\.com/full-episodes/(?P.*)$' + + @staticmethod + def suitable(url): + return (re.match(ComedyCentralIE._VALID_URL, url) is not None) + + def report_extraction(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) + + def report_config_download(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + + def report_index_download(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id) + + def report_player_url(self, episode_id): + self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) + + def _simplify_title(self, title): + res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) + res = res.strip(ur'_') + return res + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + + if mobj.group('shortname'): + if mobj.group('shortname') in ('tds', 'thedailyshow'): + url = 'http://www.thedailyshow.com/full-episodes/' + else: + url = 'http://www.colbertnation.com/full-episodes/' + mobj = re.match(self._VALID_URL, url) + assert mobj is not None + + dlNewest = not mobj.group('episode') + if dlNewest: + epTitle = mobj.group('showname') + else: + epTitle = mobj.group('episode') + + req = urllib2.Request(url) + self.report_extraction(epTitle) + try: + htmlHandle = urllib2.urlopen(req) + html = htmlHandle.read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) + return + if dlNewest: + url = htmlHandle.geturl() + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: Invalid redirected URL: ' + url) + return + if mobj.group('episode') == '': + self._downloader.trouble(u'ERROR: Redirected URL is still not specific: ' + url) + return + epTitle = mobj.group('episode') + + mMovieParams = re.findall('', html) + if len(mMovieParams) == 0: + self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) + return + + playerUrl_raw = mMovieParams[0][0] + self.report_player_url(epTitle) + try: + urlHandle = urllib2.urlopen(playerUrl_raw) + playerUrl = urlHandle.geturl() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err)) + return + + uri = mMovieParams[0][1] + indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri}) + self.report_index_download(epTitle) + try: + indexXml = urllib2.urlopen(indexUrl).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err)) + return + + idoc = xml.etree.ElementTree.fromstring(indexXml) + itemEls = idoc.findall('.//item') + for itemEl in itemEls: + mediaId = itemEl.findall('./guid')[0].text + shortMediaId = mediaId.split(':')[-1] + showId = mediaId.split(':')[-2].replace('.com', '') + officialTitle = itemEl.findall('./title')[0].text + officialDate = itemEl.findall('./pubDate')[0].text + + configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + + urllib.urlencode({'uri': mediaId})) + configReq = urllib2.Request(configUrl) + self.report_config_download(epTitle) + try: + configXml = urllib2.urlopen(configReq).read() + except (urllib2.URLError, httplib.HTTPException, socket.error), err: + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) + return + + cdoc = xml.etree.ElementTree.fromstring(configXml) + turls = [] + for rendition in cdoc.findall('.//rendition'): + finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) + turls.append(finfo) + + if len(turls) == 0: + self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found') + continue + + # For now, just pick the highest bitrate + format,video_url = turls[-1] + + self._downloader.increment_downloads() + + effTitle = showId + '-' + epTitle + info = { + 'id': shortMediaId, + 'url': video_url, + 'uploader': showId, + 'upload_date': officialDate, + 'title': effTitle, + 'stitle': self._simplify_title(effTitle), + 'ext': 'mp4', + 'format': format, + 'thumbnail': None, + 'description': officialTitle, + 'player_url': playerUrl + } + + try: + self._downloader.process_info(info) + except UnavailableVideoError, err: + self._downloader.trouble(u'\nERROR: unable to download ' + mediaId) + continue + + class PostProcessor(object): """Post Processor class. @@ -2847,6 +3235,7 @@ class PostProcessor(object): """ return information # by default, do nothing + class FFmpegExtractAudioPP(PostProcessor): def __init__(self, downloader=None, preferredcodec=None): @@ -2929,321 +3318,379 @@ class FFmpegExtractAudioPP(PostProcessor): information['filepath'] = new_path return information -### MAIN PROGRAM ### -if __name__ == '__main__': + +def updateSelf(downloader, filename): + ''' Update the program file with the latest version from the repository ''' + # Note: downloader only used for options + if not os.access(filename, os.W_OK): + sys.exit('ERROR: no write permissions on %s' % filename) + + downloader.to_screen('Updating to latest version...') + try: - # Modules needed only when running the main program - import getpass - import optparse + try: + urlh = urllib.urlopen(UPDATE_URL) + newcontent = urlh.read() + finally: + urlh.close() + except (IOError, OSError), err: + sys.exit('ERROR: unable to download latest version') - # Function to update the program file with the latest version from the repository. - def update_self(downloader, filename): - # Note: downloader only used for options - if not os.access(filename, os.W_OK): - sys.exit('ERROR: no write permissions on %s' % filename) + try: + outf = open(filename, 'wb') + try: + outf.write(newcontent) + finally: + outf.close() + except (IOError, OSError), err: + sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updating to latest stable version...') - try: - latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' - latest_version = urllib.urlopen(latest_url).read().strip() - prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version - newcontent = urllib.urlopen(prog_url).read() - except (IOError, OSError), err: - sys.exit('ERROR: unable to download latest version') - try: - stream = open(filename, 'w') - stream.write(newcontent) - stream.close() - except (IOError, OSError), err: - sys.exit('ERROR: unable to overwrite current version') - downloader.to_screen('Updated to version %s' % latest_version) - - # Parse command line - parser = optparse.OptionParser( - usage='Usage: %prog [options] url...', - version='2011.07.09-phihag', - conflict_handler='resolve', - ) - - parser.add_option('-h', '--help', - action='help', help='print this help text and exit') - parser.add_option('-v', '--version', - action='version', help='print program version and exit') - parser.add_option('-U', '--update', - action='store_true', dest='update_self', help='update this program to latest stable version') - parser.add_option('-i', '--ignore-errors', - action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) - parser.add_option('-r', '--rate-limit', - dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') - parser.add_option('-R', '--retries', - dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) - parser.add_option('--playlist-start', - dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) - parser.add_option('--playlist-end', - dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) - parser.add_option('--dump-user-agent', - action='store_true', dest='dump_user_agent', - help='display the current browser identification', default=False) - - authentication = optparse.OptionGroup(parser, 'Authentication Options') - authentication.add_option('-u', '--username', - dest='username', metavar='USERNAME', help='account username') - authentication.add_option('-p', '--password', - dest='password', metavar='PASSWORD', help='account password') - authentication.add_option('-n', '--netrc', - action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) - parser.add_option_group(authentication) - - video_format = optparse.OptionGroup(parser, 'Video Format Options') - video_format.add_option('-f', '--format', - action='store', dest='format', metavar='FORMAT', help='video format code') - video_format.add_option('--all-formats', - action='store_const', dest='format', help='download all available video formats', const='-1') - video_format.add_option('--max-quality', - action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') - parser.add_option_group(video_format) - - verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') - verbosity.add_option('-q', '--quiet', - action='store_true', dest='quiet', help='activates quiet mode', default=False) - verbosity.add_option('-s', '--simulate', - action='store_true', dest='simulate', help='do not download video', default=False) - verbosity.add_option('-g', '--get-url', - action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) - verbosity.add_option('-e', '--get-title', - action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) - verbosity.add_option('--get-thumbnail', - action='store_true', dest='getthumbnail', - help='simulate, quiet but print thumbnail URL', default=False) - verbosity.add_option('--get-description', - action='store_true', dest='getdescription', - help='simulate, quiet but print video description', default=False) - verbosity.add_option('--get-filename', - action='store_true', dest='getfilename', - help='simulate, quiet but print output filename', default=False) - verbosity.add_option('--no-progress', - action='store_true', dest='noprogress', help='do not print progress bar', default=False) - verbosity.add_option('--console-title', - action='store_true', dest='consoletitle', - help='display progress in console titlebar', default=False) - parser.add_option_group(verbosity) - - filesystem = optparse.OptionGroup(parser, 'Filesystem Options') - filesystem.add_option('-t', '--title', - action='store_true', dest='usetitle', help='use title in file name', default=False) - filesystem.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) - filesystem.add_option('-A', '--auto-number', - action='store_true', dest='autonumber', - help='number downloaded files starting from 00000', default=False) - filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', help='output filename template') - filesystem.add_option('-a', '--batch-file', - dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') - filesystem.add_option('-w', '--no-overwrites', - action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) - filesystem.add_option('-c', '--continue', - action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) - filesystem.add_option('--cookies', - dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') - filesystem.add_option('--no-part', - action='store_true', dest='nopart', help='do not use .part files', default=False) - filesystem.add_option('--no-mtime', - action='store_false', dest='updatetime', - help='do not use the Last-modified header to set the file modification time', default=True) - filesystem.add_option('--write-description', - action='store_true', dest='writedescription', - help='write video description to a .description file', default=False) - filesystem.add_option('--write-info-json', - action='store_true', dest='writeinfojson', - help='write video metadata to a .info.json file', default=False) - parser.add_option_group(filesystem) - - postproc = optparse.OptionGroup(parser, 'Post-processing Options') - postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, - help='convert video files to audio-only files (requires ffmpeg and ffprobe)') - postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac" or "mp3"; best by default') - parser.add_option_group(postproc) - - (opts, args) = parser.parse_args() - - # Open appropriate CookieJar - if opts.cookiefile is None: - jar = cookielib.CookieJar() - else: - try: - jar = cookielib.MozillaCookieJar(opts.cookiefile) - if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): - jar.load() - except (IOError, OSError), err: - sys.exit(u'ERROR: unable to open cookie file') + downloader.to_screen('Updated youtube-dl. Restart to use the new version.') - # Dump user agent - if opts.dump_user_agent: - print std_headers['User-Agent'] - sys.exit(0) +def parseOpts(): + # Deferred imports + import getpass + import optparse - # General configuration - cookie_processor = urllib2.HTTPCookieProcessor(jar) - urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) - socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + def _format_option_string(option): + ''' ('-o', '--option') -> -o, --format METAVAR''' + + opts = [] + + if option._short_opts: opts.append(option._short_opts[0]) + if option._long_opts: opts.append(option._long_opts[0]) + if len(opts) > 1: opts.insert(1, ', ') + + if option.takes_value(): opts.append(' %s' % option.metavar) + + return "".join(opts) + + def _find_term_columns(): + columns = os.environ.get('COLUMNS', None) + if columns: + return int(columns) - # Batch file verification - batchurls = [] - if opts.batchfile is not None: - try: - if opts.batchfile == '-': - batchfd = sys.stdin - else: - batchfd = open(opts.batchfile, 'r') - batchurls = batchfd.readlines() - batchurls = [x.strip() for x in batchurls] - batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] - except IOError: - sys.exit(u'ERROR: batch file could not be read') - all_urls = batchurls + args - - # Conflicting, missing and erroneous options - if opts.usenetrc and (opts.username is not None or opts.password is not None): - parser.error(u'using .netrc conflicts with giving username/password') - if opts.password is not None and opts.username is None: - parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): - parser.error(u'using output template conflicts with using title, literal title or auto number') - if opts.usetitle and opts.useliteral: - parser.error(u'using title conflicts with using literal title') - if opts.username is not None and opts.password is None: - opts.password = getpass.getpass(u'Type account password and press return:') - if opts.ratelimit is not None: - numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) - if numeric_limit is None: - parser.error(u'invalid rate limit specified') - opts.ratelimit = numeric_limit - if opts.retries is not None: - try: - opts.retries = long(opts.retries) - except (TypeError, ValueError), err: - parser.error(u'invalid retry count specified') try: - opts.playliststart = long(opts.playliststart) - if opts.playliststart <= 0: - raise ValueError - except (TypeError, ValueError), err: - parser.error(u'invalid playlist start number specified') + sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out,err = sp.communicate() + return int(out.split()[1]) + except: + pass + return None + + max_width = 80 + max_help_position = 80 + + # No need to wrap help messages if we're on a wide console + columns = _find_term_columns() + if columns: max_width = columns + + fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) + fmt.format_option_strings = _format_option_string + + kw = { + 'version' : __version__, + 'formatter' : fmt, + 'usage' : '%prog [options] url [url...]', + 'conflict_handler' : 'resolve', + } + + parser = optparse.OptionParser(**kw) + + # option groups + general = optparse.OptionGroup(parser, 'General Options') + authentication = optparse.OptionGroup(parser, 'Authentication Options') + video_format = optparse.OptionGroup(parser, 'Video Format Options') + postproc = optparse.OptionGroup(parser, 'Post-processing Options') + filesystem = optparse.OptionGroup(parser, 'Filesystem Options') + verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') + + general.add_option('-h', '--help', + action='help', help='print this help text and exit') + general.add_option('-v', '--version', + action='version', help='print program version and exit') + general.add_option('-U', '--update', + action='store_true', dest='update_self', help='update this program to latest version') + general.add_option('-i', '--ignore-errors', + action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) + general.add_option('-r', '--rate-limit', + dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') + general.add_option('-R', '--retries', + dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + general.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) + general.add_option('--playlist-end', + dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) + general.add_option('--dump-user-agent', + action='store_true', dest='dump_user_agent', + help='display the current browser identification', default=False) + + authentication.add_option('-u', '--username', + dest='username', metavar='USERNAME', help='account username') + authentication.add_option('-p', '--password', + dest='password', metavar='PASSWORD', help='account password') + authentication.add_option('-n', '--netrc', + action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) + + + video_format.add_option('-f', '--format', + action='store', dest='format', metavar='FORMAT', help='video format code') + video_format.add_option('--all-formats', + action='store_const', dest='format', help='download all available video formats', const='-1') + video_format.add_option('--max-quality', + action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + + + verbosity.add_option('-q', '--quiet', + action='store_true', dest='quiet', help='activates quiet mode', default=False) + verbosity.add_option('-s', '--simulate', + action='store_true', dest='simulate', help='do not download video', default=False) + verbosity.add_option('-g', '--get-url', + action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) + verbosity.add_option('-e', '--get-title', + action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) + verbosity.add_option('--get-thumbnail', + action='store_true', dest='getthumbnail', + help='simulate, quiet but print thumbnail URL', default=False) + verbosity.add_option('--get-description', + action='store_true', dest='getdescription', + help='simulate, quiet but print video description', default=False) + verbosity.add_option('--get-filename', + action='store_true', dest='getfilename', + help='simulate, quiet but print output filename', default=False) + verbosity.add_option('--no-progress', + action='store_true', dest='noprogress', help='do not print progress bar', default=False) + verbosity.add_option('--console-title', + action='store_true', dest='consoletitle', + help='display progress in console titlebar', default=False) + + + filesystem.add_option('-t', '--title', + action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('-l', '--literal', + action='store_true', dest='useliteral', help='use literal title in file name', default=False) + filesystem.add_option('-A', '--auto-number', + action='store_true', dest='autonumber', + help='number downloaded files starting from 00000', default=False) + filesystem.add_option('-o', '--output', + dest='outtmpl', metavar='TEMPLATE', help='output filename template') + filesystem.add_option('-a', '--batch-file', + dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') + filesystem.add_option('-w', '--no-overwrites', + action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) + filesystem.add_option('-c', '--continue', + action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) + filesystem.add_option('--cookies', + dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') + filesystem.add_option('--no-part', + action='store_true', dest='nopart', help='do not use .part files', default=False) + filesystem.add_option('--no-mtime', + action='store_false', dest='updatetime', + help='do not use the Last-modified header to set the file modification time', default=True) + filesystem.add_option('--write-description', + action='store_true', dest='writedescription', + help='write video description to a .description file', default=False) + filesystem.add_option('--write-info-json', + action='store_true', dest='writeinfojson', + help='write video metadata to a .info.json file', default=False) + + + postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + help='convert video files to audio-only files (requires ffmpeg and ffprobe)') + postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', + help='"best", "aac" or "mp3"; best by default') + + + parser.add_option_group(general) + parser.add_option_group(filesystem) + parser.add_option_group(verbosity) + parser.add_option_group(video_format) + parser.add_option_group(authentication) + parser.add_option_group(postproc) + + opts, args = parser.parse_args() + + return parser, opts, args + +def main(): + parser, opts, args = parseOpts() + + # Open appropriate CookieJar + if opts.cookiefile is None: + jar = cookielib.CookieJar() + else: try: - opts.playlistend = long(opts.playlistend) - if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): - raise ValueError - except (TypeError, ValueError), err: - parser.error(u'invalid playlist end number specified') - if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3']: - parser.error(u'invalid audio format specified') - - # Information extractors - youtube_ie = YoutubeIE() - metacafe_ie = MetacafeIE(youtube_ie) - dailymotion_ie = DailymotionIE() - youtube_pl_ie = YoutubePlaylistIE(youtube_ie) - youtube_user_ie = YoutubeUserIE(youtube_ie) - youtube_search_ie = YoutubeSearchIE(youtube_ie) - google_ie = GoogleIE() - google_search_ie = GoogleSearchIE(google_ie) - photobucket_ie = PhotobucketIE() - yahoo_ie = YahooIE() - yahoo_search_ie = YahooSearchIE(yahoo_ie) - deposit_files_ie = DepositFilesIE() - facebook_ie = FacebookIE() - bliptv_ie = BlipTVIE() - generic_ie = GenericIE() - - # File downloader - fd = FileDownloader({ - 'usenetrc': opts.usenetrc, - 'username': opts.username, - 'password': opts.password, - 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), - 'forceurl': opts.geturl, - 'forcetitle': opts.gettitle, - 'forcethumbnail': opts.getthumbnail, - 'forcedescription': opts.getdescription, - 'forcefilename': opts.getfilename, - 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), - 'format': opts.format, - 'format_limit': opts.format_limit, - 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) - or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') - or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') - or u'%(id)s.%(ext)s'), - 'ignoreerrors': opts.ignoreerrors, - 'ratelimit': opts.ratelimit, - 'nooverwrites': opts.nooverwrites, - 'retries': opts.retries, - 'continuedl': opts.continue_dl, - 'noprogress': opts.noprogress, - 'playliststart': opts.playliststart, - 'playlistend': opts.playlistend, - 'logtostderr': opts.outtmpl == '-', - 'consoletitle': opts.consoletitle, - 'nopart': opts.nopart, - 'updatetime': opts.updatetime, - 'writedescription': opts.writedescription, - 'writeinfojson': opts.writeinfojson, - }) - fd.add_info_extractor(youtube_search_ie) - fd.add_info_extractor(youtube_pl_ie) - fd.add_info_extractor(youtube_user_ie) - fd.add_info_extractor(metacafe_ie) - fd.add_info_extractor(dailymotion_ie) - fd.add_info_extractor(youtube_ie) - fd.add_info_extractor(google_ie) - fd.add_info_extractor(google_search_ie) - fd.add_info_extractor(photobucket_ie) - fd.add_info_extractor(yahoo_ie) - fd.add_info_extractor(yahoo_search_ie) - fd.add_info_extractor(deposit_files_ie) - fd.add_info_extractor(facebook_ie) - fd.add_info_extractor(bliptv_ie) - - # This must come last since it's the - # fallback if none of the others work - fd.add_info_extractor(generic_ie) - - # PostProcessors - if opts.extractaudio: - fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) - - # Update version - if opts.update_self: - update_self(fd, sys.argv[0]) - - # Maybe do nothing - if len(all_urls) < 1: - if not opts.update_self: - parser.error(u'you must provide at least one URL') + jar = cookielib.MozillaCookieJar(opts.cookiefile) + if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): + jar.load() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to open cookie file') + + # Dump user agent + if opts.dump_user_agent: + print std_headers['User-Agent'] + sys.exit(0) + + # General configuration + cookie_processor = urllib2.HTTPCookieProcessor(jar) + opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) + urllib2.install_opener(opener) + socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) + + # Batch file verification + batchurls = [] + if opts.batchfile is not None: + try: + if opts.batchfile == '-': + batchfd = sys.stdin else: - sys.exit() - retcode = fd.download(all_urls) + batchfd = open(opts.batchfile, 'r') + batchurls = batchfd.readlines() + batchurls = [x.strip() for x in batchurls] + batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] + except IOError: + sys.exit(u'ERROR: batch file could not be read') + all_urls = batchurls + args + + # Conflicting, missing and erroneous options + if opts.usenetrc and (opts.username is not None or opts.password is not None): + parser.error(u'using .netrc conflicts with giving username/password') + if opts.password is not None and opts.username is None: + parser.error(u'account username missing') + if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): + parser.error(u'using output template conflicts with using title, literal title or auto number') + if opts.usetitle and opts.useliteral: + parser.error(u'using title conflicts with using literal title') + if opts.username is not None and opts.password is None: + opts.password = getpass.getpass(u'Type account password and press return:') + if opts.ratelimit is not None: + numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) + if numeric_limit is None: + parser.error(u'invalid rate limit specified') + opts.ratelimit = numeric_limit + if opts.retries is not None: + try: + opts.retries = long(opts.retries) + except (TypeError, ValueError), err: + parser.error(u'invalid retry count specified') + try: + opts.playliststart = int(opts.playliststart) + if opts.playliststart <= 0: + raise ValueError(u'Playlist start must be positive') + except (TypeError, ValueError), err: + parser.error(u'invalid playlist start number specified') + try: + opts.playlistend = int(opts.playlistend) + if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): + raise ValueError(u'Playlist end must be greater than playlist start') + except (TypeError, ValueError), err: + parser.error(u'invalid playlist end number specified') + if opts.extractaudio: + if opts.audioformat not in ['best', 'aac', 'mp3']: + parser.error(u'invalid audio format specified') + + # Information extractors + youtube_ie = YoutubeIE() + google_ie = GoogleIE() + yahoo_ie = YahooIE() + extractors = [ # Order does matter + youtube_ie, + MetacafeIE(youtube_ie), + DailymotionIE(), + YoutubePlaylistIE(youtube_ie), + YoutubeUserIE(youtube_ie), + YoutubeSearchIE(youtube_ie), + google_ie, + GoogleSearchIE(google_ie), + PhotobucketIE(), + yahoo_ie, + YahooSearchIE(yahoo_ie), + DepositFilesIE(), + FacebookIE(), + BlipTVIE(), + VimeoIE(), + MyVideoIE(), + ComedyCentralIE(), + + GenericIE() + ] + + # File downloader + fd = FileDownloader({ + 'usenetrc': opts.usenetrc, + 'username': opts.username, + 'password': opts.password, + 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'forceurl': opts.geturl, + 'forcetitle': opts.gettitle, + 'forcethumbnail': opts.getthumbnail, + 'forcedescription': opts.getdescription, + 'forcefilename': opts.getfilename, + 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename), + 'format': opts.format, + 'format_limit': opts.format_limit, + 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) + or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') + or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') + or u'%(id)s.%(ext)s'), + 'ignoreerrors': opts.ignoreerrors, + 'ratelimit': opts.ratelimit, + 'nooverwrites': opts.nooverwrites, + 'retries': opts.retries, + 'continuedl': opts.continue_dl, + 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, + 'playlistend': opts.playlistend, + 'logtostderr': opts.outtmpl == '-', + 'consoletitle': opts.consoletitle, + 'nopart': opts.nopart, + 'updatetime': opts.updatetime, + 'writedescription': opts.writedescription, + 'writeinfojson': opts.writeinfojson, + }) + for extractor in extractors: + fd.add_info_extractor(extractor) + + # PostProcessors + if opts.extractaudio: + fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat)) + + # Update version + if opts.update_self: + updateSelf(fd, sys.argv[0]) + + # Maybe do nothing + if len(all_urls) < 1: + if not opts.update_self: + parser.error(u'you must provide at least one URL') + else: + sys.exit() + retcode = fd.download(all_urls) - # Dump cookie jar if requested - if opts.cookiefile is not None: - try: - jar.save() - except (IOError, OSError), err: - sys.exit(u'ERROR: unable to save cookie jar') + # Dump cookie jar if requested + if opts.cookiefile is not None: + try: + jar.save() + except (IOError, OSError), err: + sys.exit(u'ERROR: unable to save cookie jar') - sys.exit(retcode) + sys.exit(retcode) + +if __name__ == '__main__': + try: + main() except DownloadError: sys.exit(1) except SameFileError: sys.exit(u'ERROR: fixed output name but more than one file to download') except KeyboardInterrupt: sys.exit(u'\nERROR: Interrupted by user') + +# vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: