X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=blobdiff_plain;f=youtube-dl;h=2a1908d3d5e064fc9f03ace8e026d855800b6f65;hb=86e709d3dee1c0ac6f21edbd11ba92c026bef7bb;hp=466802434db6025606ac144afb931c10a1bb9944;hpb=a1cab7cead8554913ba3d0362b3d6fb11c8c7f90;p=youtube-dl diff --git a/youtube-dl b/youtube-dl index 466802434..2a1908d3d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -7,15 +7,10 @@ # Author: Witold Baryluk # Author: Paweł Paprota # Author: Gergely Imreh +# Author: Philipp Hagemeister # License: Public domain code - -from __future__ import with_statement - -import contextlib import cookielib -import ctypes import datetime -import email.utils import gzip import htmlentitydefs import httplib @@ -35,11 +30,13 @@ import urllib2 import warnings import zlib -try: - import json -except ImportError: - warnings.warn('No JSON support (TODO: insert trivialjson here)') +if os.name == 'nt': + import ctypes +try: + import email.utils +except ImportError: # Python 2.4 + import email.Utils try: import cStringIO as StringIO except ImportError: @@ -66,6 +63,119 @@ std_headers = { simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii') +try: + import json +except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): + import re + class json(object): + @staticmethod + def loads(s): + s = s.decode('UTF-8') + def raiseError(msg, i): + raise ValueError(msg + ' at position ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:])) + def skipSpace(i, expectMore=True): + while i < len(s) and s[i] in ' \t\r\n': + i += 1 + if expectMore: + if i >= len(s): + raiseError('Premature end', i) + return i + def decodeEscape(match): + esc = match.group(1) + _STATIC = { + '"': '"', + '\\': '\\', + '/': '/', + 'b': unichr(0x8), + 'f': unichr(0xc), + 'n': '\n', + 'r': '\r', + 't': '\t', + } + if esc in _STATIC: + return _STATIC[esc] + if esc[0] == 'u': + if len(esc) == 1+4: + return unichr(int(esc[1:5], 16)) + if len(esc) == 5+6 and esc[5:7] == '\\u': + hi = int(esc[1:5], 16) + low = int(esc[7:11], 16) + return unichr((hi - 0xd800) * 0x400 + low - 0xdc00 + 0x10000) + raise ValueError('Unknown escape ' + str(esc)) + def parseString(i): + i += 1 + e = i + while True: + e = s.index('"', e) + bslashes = 0 + while s[e-bslashes-1] == '\\': + bslashes += 1 + if bslashes % 2 == 1: + e += 1 + continue + break + rexp = re.compile(r'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') + stri = rexp.sub(decodeEscape, s[i:e]) + return (e+1,stri) + def parseObj(i): + i += 1 + res = {} + i = skipSpace(i) + if s[i] == '}': # Empty dictionary + return (i+1,res) + while True: + if s[i] != '"': + raiseError('Expected a string object key', i) + i,key = parseString(i) + i = skipSpace(i) + if i >= len(s) or s[i] != ':': + raiseError('Expected a colon', i) + i,val = parse(i+1) + res[key] = val + i = skipSpace(i) + if s[i] == '}': + return (i+1, res) + if s[i] != ',': + raiseError('Expected comma or closing curly brace', i) + i = skipSpace(i+1) + def parseArray(i): + res = [] + i = skipSpace(i+1) + if s[i] == ']': # Empty array + return (i+1,res) + while True: + i,val = parse(i) + res.append(val) + i = skipSpace(i) # Raise exception if premature end + if s[i] == ']': + return (i+1, res) + if s[i] != ',': + raiseError('Expected a comma or closing bracket', i) + i = skipSpace(i+1) + def parseDiscrete(i): + for k,v in {'true': True, 'false': False, 'null': None}.items(): + if s.startswith(k, i): + return (i+len(k), v) + raiseError('Not a boolean (or null)', i) + def parseNumber(i): + mobj = re.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s[i:]) + if mobj is None: + raiseError('Not a number', i) + nums = mobj.group(1) + if '.' in nums or 'e' in nums or 'E' in nums: + return (i+len(nums), float(nums)) + return (i+len(nums), int(nums)) + CHARMAP = {'{': parseObj, '[': parseArray, '"': parseString, 't': parseDiscrete, 'f': parseDiscrete, 'n': parseDiscrete} + def parse(i): + i = skipSpace(i) + i,res = CHARMAP.get(s[i], parseNumber)(i) + i = skipSpace(i, False) + return (i,res) + i,res = parse(0) + if i < len(s): + raise ValueError('Extra data at end of input (index ' + str(i) + ' of ' + repr(s) + ': ' + repr(s[i:]) + ')') + return res + def preferredencoding(): """Get preferred encoding. @@ -306,6 +416,7 @@ class FileDownloader(object): nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file """ params = None @@ -502,8 +613,12 @@ class FileDownloader(object): pass def report_writedescription(self, descfn): - """ Report that the description file has been written """ - self.to_screen(u'[info] Video description written to: %s' % descfn, ignore_encoding_errors=True) + """ Report that the description file is being written """ + self.to_screen(u'[info] Writing video description to: %s' % descfn, ignore_encoding_errors=True) + + def report_writeinfojson(self, infofn): + """ Report that the metadata file has been written """ + self.to_screen(u'[info] Video description metadata as JSON to: %s' % infofn, ignore_encoding_errors=True) def report_destination(self, filename): """Report destination filename.""" @@ -594,13 +709,34 @@ class FileDownloader(object): if self.params.get('writedescription', False): try: descfn = filename + '.description' - with contextlib.closing(open(descfn, 'wb')) as descfile: - descfile.write(info_dict['description'].encode('utf-8')) self.report_writedescription(descfn) + descfile = open(descfn, 'wb') + try: + descfile.write(info_dict['description'].encode('utf-8')) + finally: + descfile.close() except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file: %s' % str(descfn)) return + if self.params.get('writeinfojson', False): + infofn = filename + '.info.json' + self.report_writeinfojson(infofn) + try: + json.dump + except (NameError,AttributeError): + self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') + return + try: + infof = open(infofn, 'wb') + try: + json.dump(info_dict, infof) + finally: + infof.close() + except (OSError, IOError): + self.trouble(u'ERROR: Cannot write metadata to JSON file: %s' % str(infofn)) + return + try: success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None)) except (OSError, IOError), err: @@ -892,7 +1028,7 @@ class InfoExtractor(object): class YoutubeIE(InfoExtractor): """Information extractor for youtube.com.""" - _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' + _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en' _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' @@ -1106,7 +1242,6 @@ class YoutubeIE(InfoExtractor): except NameError: video_description = u'No description available.' if self._downloader.params.get('forcedescription', False) or self._downloader.params.get('writedescription', False): - warnings.warn(u'You are using an old Python version, install Python 2.6+ or lxml. Falling back to old video description extractor.') mobj = re.search(r'', video_webpage) if mobj is not None: video_description = mobj.group(1).decode('utf-8') @@ -1114,6 +1249,7 @@ class YoutubeIE(InfoExtractor): html_parser = lxml.etree.HTMLParser(encoding='utf-8') vwebpage_doc = lxml.etree.parse(StringIO.StringIO(video_webpage), html_parser) video_description = u''.join(vwebpage_doc.xpath('id("eow-description")//text()')) + # TODO use another parser # token video_token = urllib.unquote_plus(video_info['token'][0]) @@ -1121,8 +1257,15 @@ class YoutubeIE(InfoExtractor): # Decide which formats to download req_format = self._downloader.params.get('format', None) - if 'fmt_url_map' in video_info and len(video_info['fmt_url_map']) >= 1 and ',' in video_info['fmt_url_map'][0]: - url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) + if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): + self.report_rtmp_download() + video_url_list = [(None, video_info['conn'][0])] + elif 'url_encoded_fmt_stream_map' in video_info and len(video_info['url_encoded_fmt_stream_map']) >= 1: + url_data_strs = video_info['url_encoded_fmt_stream_map'][0].split(',') + url_data = [parse_qs(uds) for uds in url_data_strs] + url_data = filter(lambda ud: 'itag' in ud and 'url' in ud, url_data) + url_map = dict((ud['itag'][0], ud['url'][0]) for ud in url_data) + format_limit = self._downloader.params.get('format_limit', None) if format_limit is not None and format_limit in self._available_formats: format_list = self._available_formats[self._available_formats.index(format_limit):] @@ -1142,13 +1285,8 @@ class YoutubeIE(InfoExtractor): self._downloader.trouble(u'ERROR: requested format not available') return video_url_list = [(req_format, url_map[req_format])] # Specific format - - elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): - self.report_rtmp_download() - video_url_list = [(None, video_info['conn'][0])] - else: - self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') + self._downloader.trouble(u'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') return for format_param, video_real_url in video_url_list: @@ -1158,7 +1296,6 @@ class YoutubeIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'flv') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -2583,7 +2720,6 @@ class FacebookIE(InfoExtractor): # Extension video_extension = self._video_extensions.get(format_param, 'mp4') - # Find the video URL in fmt_url_map or conn paramters try: # Process video information self._downloader.process_info({ @@ -2627,7 +2763,11 @@ class BlipTVIE(InfoExtractor): self._downloader.trouble(u'ERROR: invalid URL: %s' % url) return - json_url = url + ('&' if '?' in url else '?') + 'skin=json&version=2&no_wrap=1' + if '?' in url: + cchar = '&' + else: + cchar = '?' + json_url = url + cchar + 'skin=json&version=2&no_wrap=1' request = urllib2.Request(json_url) self.report_extraction(mobj.group(1)) try: @@ -2637,7 +2777,10 @@ class BlipTVIE(InfoExtractor): return try: json_data = json.loads(json_code) - data = json_data['Post'] if 'Post' in json_data else json_data + if 'Post' in json_data: + data = json_data['Post'] + else: + data = json_data upload_date = datetime.datetime.strptime(data['datestamp'], '%m-%d-%y %H:%M%p').strftime('%Y%m%d') video_url = data['media']['url'] @@ -2831,7 +2974,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2011.03.29', + version='2011.07.09-phihag', conflict_handler='resolve', ) @@ -2924,6 +3067,9 @@ if __name__ == '__main__': filesystem.add_option('--write-description', action='store_true', dest='writedescription', help='write video description to a .description file', default=False) + filesystem.add_option('--write-info-json', + action='store_true', dest='writeinfojson', + help='write video metadata to a .info.json file', default=False) parser.add_option_group(filesystem) postproc = optparse.OptionGroup(parser, 'Post-processing Options') @@ -3062,6 +3208,7 @@ if __name__ == '__main__': 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, + 'writeinfojson': opts.writeinfojson, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie)