# Author: Danny Colligan
# Author: Benjamin Johnson
# Author: Vasyl' Vavrychuk
+# Author: Witold Baryluk
# License: Public domain code
import cookielib
import ctypes
import datetime
+import email.utils
import gzip
import htmlentitydefs
import httplib
stream = open(filename, open_mode)
return (stream, filename)
+def timeconvert(timestr):
+ """Convert RFC 2822 defined time string into system timestamp"""
+ timestamp = None
+ timetuple = email.utils.parsedate_tz(timestr)
+ if timetuple is not None:
+ timestamp = email.utils.mktime_tz(timetuple)
+ return timestamp
+
class DownloadError(Exception):
"""Download Error exception.
except zlib.error:
return zlib.decompress(data)
+ @staticmethod
+ def addinfourl_wrapper(stream, headers, url, code):
+ if hasattr(urllib2.addinfourl, 'getcode'):
+ return urllib2.addinfourl(stream, headers, url, code)
+ ret = urllib2.addinfourl(stream, headers, url)
+ ret.code = code
+ return ret
+
def http_request(self, req):
for h in std_headers:
if h in req.headers:
# gzip
if resp.headers.get('Content-encoding', '') == 'gzip':
gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
- resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
# deflate
if resp.headers.get('Content-encoding', '') == 'deflate':
gz = StringIO.StringIO(self.deflate(resp.read()))
- resp = urllib2.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
resp.msg = old_resp.msg
return resp
forcetitle: Force printing title.
forcethumbnail: Force printing thumbnail URL.
forcedescription: Force printing description.
+ forcefilename: Force printing final filename.
simulate: Do not download the video files.
format: Video format code.
format_limit: Highest quality format to try.
logtostderr: Log messages to stderr instead of stdout.
consoletitle: Display progress in console window's titlebar.
nopart: Do not use temporary .part files.
+ updatetime: Use the Last-modified header to set output file timestamps.
"""
params = None
os.rename(old_filename, new_filename)
except (IOError, OSError), err:
self.trouble(u'ERROR: unable to rename file')
+
+ def try_utime(self, filename, last_modified_hdr):
+ """Try to set the last-modified time of the given file."""
+ if last_modified_hdr is None:
+ return
+ if not os.path.isfile(filename):
+ return
+ timestr = last_modified_hdr
+ if timestr is None:
+ return
+ filetime = timeconvert(timestr)
+ if filetime is None:
+ return
+ try:
+ os.utime(filename,(time.time(), filetime))
+ except:
+ pass
def report_destination(self, filename):
"""Report destination filename."""
"""Increment the ordinal that assigns a number to each file."""
self._num_downloads += 1
+ def prepare_filename(self, info_dict):
+ """Generate the output filename."""
+ try:
+ template_dict = dict(info_dict)
+ template_dict['epoch'] = unicode(long(time.time()))
+ template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
+ filename = self.params['outtmpl'] % template_dict
+ return filename
+ except (ValueError, KeyError), err:
+ self.trouble(u'ERROR: invalid system charset or erroneous output template')
+ return None
+
def process_info(self, info_dict):
"""Process a single dictionary returned by an InfoExtractor."""
+ filename = self.prepare_filename(info_dict)
# Do nothing else if in simulate mode
if self.params.get('simulate', False):
# Forced printings
print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
if self.params.get('forcedescription', False) and 'description' in info_dict:
print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
+ if self.params.get('forcefilename', False) and filename is not None:
+ print filename.encode(preferredencoding(), 'xmlcharrefreplace')
return
- try:
- template_dict = dict(info_dict)
- template_dict['epoch'] = unicode(long(time.time()))
- template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
- filename = self.params['outtmpl'] % template_dict
- except (ValueError, KeyError), err:
- self.trouble(u'ERROR: invalid system charset or erroneous output template')
+ if filename is None:
return
if self.params.get('nooverwrites', False) and os.path.exists(filename):
self.to_stderr(u'WARNING: file exists and will be skipped')
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, long(data_len))
self.try_rename(tmpfilename, filename)
+
+ # Update file modification time
+ if self.params.get('updatetime', True):
+ self.try_utime(filename, data.info().get('last-modified', None))
+
return True
class InfoExtractor(object):
class YoutubeIE(InfoExtractor):
"""Information extractor for youtube.com."""
- _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
+ _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
_LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
_LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
_NETRC_MACHINE = 'youtube'
# Listed in order of quality
- _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
+ _available_formats = ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13']
_video_extensions = {
'13': '3gp',
'17': 'mp4',
video_title = mobj.group(1).decode('utf-8')
video_title = sanitize_title(video_title)
- mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
+ mobj = re.search(r'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage)
if mobj is None:
self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
return
import getpass
import optparse
- # Function to update the program file with the latest version from bitbucket.org
+ # Function to update the program file with the latest version from the repository.
def update_self(downloader, filename):
# Note: downloader only used for options
- if not os.access (filename, os.W_OK):
+ if not os.access(filename, os.W_OK):
sys.exit('ERROR: no write permissions on %s' % filename)
downloader.to_screen('Updating to latest stable version...')
- latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
- latest_version = urllib.urlopen(latest_url).read().strip()
- prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
- newcontent = urllib.urlopen(prog_url).read()
- stream = open(filename, 'w')
- stream.write(newcontent)
- stream.close()
+ try:
+ latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
+ latest_version = urllib.urlopen(latest_url).read().strip()
+ prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
+ newcontent = urllib.urlopen(prog_url).read()
+ except (IOError, OSError), err:
+ sys.exit('ERROR: unable to download latest version')
+ try:
+ stream = open(filename, 'w')
+ stream.write(newcontent)
+ stream.close()
+ except (IOError, OSError), err:
+ sys.exit('ERROR: unable to overwrite current version')
downloader.to_screen('Updated to version %s' % latest_version)
# Parse command line
parser.add_option('--playlist-end',
dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
parser.add_option('--dump-user-agent',
- action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False)
+ action='store_true', dest='dump_user_agent',
+ help='display the current browser identification', default=False)
authentication = optparse.OptionGroup(parser, 'Authentication Options')
authentication.add_option('-u', '--username',
verbosity.add_option('-e', '--get-title',
action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
verbosity.add_option('--get-thumbnail',
- action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
+ action='store_true', dest='getthumbnail',
+ help='simulate, quiet but print thumbnail URL', default=False)
verbosity.add_option('--get-description',
- action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
+ action='store_true', dest='getdescription',
+ help='simulate, quiet but print video description', default=False)
+ verbosity.add_option('--get-filename',
+ action='store_true', dest='getfilename',
+ help='simulate, quiet but print output filename', default=False)
verbosity.add_option('--no-progress',
action='store_true', dest='noprogress', help='do not print progress bar', default=False)
verbosity.add_option('--console-title',
- action='store_true', dest='consoletitle', help='display progress in console titlebar', default=False)
+ action='store_true', dest='consoletitle',
+ help='display progress in console titlebar', default=False)
parser.add_option_group(verbosity)
filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
filesystem.add_option('-l', '--literal',
action='store_true', dest='useliteral', help='use literal title in file name', default=False)
filesystem.add_option('-A', '--auto-number',
- action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
+ action='store_true', dest='autonumber',
+ help='number downloaded files starting from 00000', default=False)
filesystem.add_option('-o', '--output',
dest='outtmpl', metavar='TEMPLATE', help='output filename template')
filesystem.add_option('-a', '--batch-file',
dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
filesystem.add_option('--no-part',
action='store_true', dest='nopart', help='do not use .part files', default=False)
+ filesystem.add_option('--no-mtime',
+ action='store_false', dest='updatetime',
+ help='do not use the Last-modified header to set the file modification time', default=True)
parser.add_option_group(filesystem)
(opts, args) = parser.parse_args()
'usenetrc': opts.usenetrc,
'username': opts.username,
'password': opts.password,
- 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
+ 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
'forceurl': opts.geturl,
'forcetitle': opts.gettitle,
'forcethumbnail': opts.getthumbnail,
'forcedescription': opts.getdescription,
- 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
+ 'forcefilename': opts.getfilename,
+ 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename),
'format': opts.format,
'format_limit': opts.format_limit,
'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
'logtostderr': opts.outtmpl == '-',
'consoletitle': opts.consoletitle,
'nopart': opts.nopart,
+ 'updatetime': opts.updatetime,
})
fd.add_info_extractor(youtube_search_ie)
fd.add_info_extractor(youtube_pl_ie)