From: Sergey M․ Date: Wed, 16 Jul 2014 13:55:38 +0000 (+0700) Subject: Merge branch 'cracked' of https://github.com/hassaanaliw/youtube-dl into hassaanaliw... X-Git-Url: http://git.bitcoin.ninja/index.cgi?a=commitdiff_plain;h=2f97f7687710f29cb0e101dd7bb3e0e41c73c9b4;hp=43f0537c06384b9b97235a93ea39649ee3de4d45;p=youtube-dl Merge branch 'cracked' of https://github.com/hassaanaliw/youtube-dl into hassaanaliw-cracked --- diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8d46fe108..d95533959 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -33,6 +33,12 @@ _TESTS = [ 90, u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), + ( + u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', + u'js', + 84, + u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', + ), ( u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', u'js', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 78b95c2a5..ca372496a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -171,6 +171,7 @@ from .metacafe import MetacafeIE from .metacritic import MetacriticIE from .mit import TechTVMITIE, MITIE, OCWMITIE from .mixcloud import MixcloudIE +from .mlb import MLBIE from .mpora import MporaIE from .mofosex import MofosexIE from .mooshare import MooshareIE diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py new file mode 100644 index 000000000..18ab2c135 --- /dev/null +++ b/youtube_dl/extractor/mlb.py @@ -0,0 +1,102 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + parse_duration, + parse_iso8601, + find_xpath_attr, +) + + +class MLBIE(InfoExtractor): + _VALID_URL = r'http?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?Pn?\d+)' + _TESTS = [ + { + 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', + 'md5': 'd9c022c10d21f849f49c05ae12a8a7e9', + 'info_dict': { + 'id': '34496663', + 'ext': 'mp4', + 'title': 'Stanton prepares for Derby', + 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57', + 'duration': 46, + 'timestamp': 1405105800, + 'upload_date': '20140711', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, + { + 'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby', + 'md5': '0e6e73d509321e142409b695eadd541f', + 'info_dict': { + 'id': '34578115', + 'ext': 'mp4', + 'title': 'Cespedes repeats as Derby champ', + 'description': 'md5:08df253ce265d4cf6fb09f581fafad07', + 'duration': 488, + 'timestamp': 1405399936, + 'upload_date': '20140715', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, + { + 'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance', + 'md5': 'b8fd237347b844365d74ea61d4245967', + 'info_dict': { + 'id': '34577915', + 'ext': 'mp4', + 'title': 'Bautista on Home Run Derby', + 'description': 'md5:b80b34031143d0986dddc64a8839f0fb', + 'duration': 52, + 'timestamp': 1405390722, + 'upload_date': '20140715', + 'thumbnail': 're:^https?://.*\.jpg$', + }, + }, + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + detail = self._download_xml( + 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml' + % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id) + + title = detail.find('./headline').text + description = detail.find('./big-blurb').text + duration = parse_duration(detail.find('./duration').text) + timestamp = parse_iso8601(detail.attrib['date'][:-5]) + + thumbnail = find_xpath_attr( + detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text + + formats = [] + for media_url in detail.findall('./url'): + playback_scenario = media_url.attrib['playback_scenario'] + fmt = { + 'url': media_url.text, + 'format_id': playback_scenario, + } + m = re.search(r'(?P\d+)K_(?P\d+)X(?P\d+)', playback_scenario) + if m: + fmt.update({ + 'vbr': int(m.group('vbr')) * 1000, + 'width': int(m.group('width')), + 'height': int(m.group('height')), + }) + formats.append(fmt) + + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'duration': duration, + 'timestamp': timestamp, + 'formats': formats, + 'thumbnail': thumbnail, + } diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 3bbb07704..ae5bca2e6 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -11,6 +11,7 @@ class JSInterpreter(object): def __init__(self, code): self.code = code self._functions = {} + self._objects = {} def interpret_statement(self, stmt, local_vars, allow_recursion=20): if allow_recursion < 0: @@ -55,7 +56,19 @@ class JSInterpreter(object): m = re.match(r'^(?P[a-z]+)\.(?P.*)$', expr) if m: member = m.group('member') - val = local_vars[m.group('in')] + variable = m.group('in') + + if variable not in local_vars: + if variable not in self._objects: + self._objects[variable] = self.extract_object(variable) + obj = self._objects[variable] + key, args = member.split('(', 1) + args = args.strip(')') + argvals = [int(v) if v.isdigit() else local_vars[v] + for v in args.split(',')] + return obj[key](argvals) + + val = local_vars[variable] if member == 'split("")': return list(val) if member == 'join("")': @@ -97,6 +110,25 @@ class JSInterpreter(object): return self._functions[fname](argvals) raise ExtractorError('Unsupported JS expression %r' % expr) + def extract_object(self, objname): + obj = {} + obj_m = re.search( + (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + + r'\s*(?P([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + + r'\}\s*;', + self.code) + fields = obj_m.group('fields') + # Currently, it only supports function definitions + fields_m = re.finditer( + r'(?P[a-zA-Z$]+)\s*:\s*function' + r'\((?P[a-z,]+)\){(?P[^}]+)}', + fields) + for f in fields_m: + argnames = f.group('args').split(',') + obj[f.group('key')] = self.build_function(argnames, f.group('code')) + + return obj + def extract_function(self, funcname): func_m = re.search( (r'(?:function %s|[{;]%s\s*=\s*function)' % ( @@ -107,10 +139,12 @@ class JSInterpreter(object): raise ExtractorError('Could not find JS function %r' % funcname) argnames = func_m.group('args').split(',') + return self.build_function(argnames, func_m.group('code')) + + def build_function(self, argnames, code): def resf(args): local_vars = dict(zip(argnames, args)) - for stmt in func_m.group('code').split(';'): + for stmt in code.split(';'): res = self.interpret_statement(stmt, local_vars) return res return resf - diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2c9591630..4d606c3d2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11.3' +__version__ = '2014.07.15'