[jsinterp] Remove superfluous u
[youtube-dl] / youtube_dl / jsinterp.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .utils import (
6     ExtractorError,
7 )
8
9
10 class JSInterpreter(object):
11     def __init__(self, code):
12         self.code = code
13         self._functions = {}
14
15     def interpret_statement(self, stmt, local_vars, allow_recursion=20):
16         if allow_recursion < 0:
17             raise ExtractorError('Recursion limit reached')
18
19         if stmt.startswith('var '):
20             stmt = stmt[len('var '):]
21         ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' +
22                          r'=(?P<expr>.*)$', stmt)
23         if ass_m:
24             if ass_m.groupdict().get('index'):
25                 def assign(val):
26                     lvar = local_vars[ass_m.group('out')]
27                     idx = self.interpret_expression(
28                         ass_m.group('index'), local_vars, allow_recursion)
29                     assert isinstance(idx, int)
30                     lvar[idx] = val
31                     return val
32                 expr = ass_m.group('expr')
33             else:
34                 def assign(val):
35                     local_vars[ass_m.group('out')] = val
36                     return val
37                 expr = ass_m.group('expr')
38         elif stmt.startswith('return '):
39             assign = lambda v: v
40             expr = stmt[len('return '):]
41         else:
42             raise ExtractorError(
43                 'Cannot determine left side of statement in %r' % stmt)
44
45         v = self.interpret_expression(expr, local_vars, allow_recursion)
46         return assign(v)
47
48     def interpret_expression(self, expr, local_vars, allow_recursion):
49         if expr.isdigit():
50             return int(expr)
51
52         if expr.isalpha():
53             return local_vars[expr]
54
55         m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
56         if m:
57             member = m.group('member')
58             val = local_vars[m.group('in')]
59             if member == 'split("")':
60                 return list(val)
61             if member == 'join("")':
62                 return ''.join(val)
63             if member == 'length':
64                 return len(val)
65             if member == 'reverse()':
66                 return val[::-1]
67             slice_m = re.match(r'slice\((?P<idx>.*)\)', member)
68             if slice_m:
69                 idx = self.interpret_expression(
70                     slice_m.group('idx'), local_vars, allow_recursion - 1)
71                 return val[idx:]
72
73         m = re.match(
74             r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr)
75         if m:
76             val = local_vars[m.group('in')]
77             idx = self.interpret_expression(
78                 m.group('idx'), local_vars, allow_recursion - 1)
79             return val[idx]
80
81         m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr)
82         if m:
83             a = self.interpret_expression(
84                 m.group('a'), local_vars, allow_recursion)
85             b = self.interpret_expression(
86                 m.group('b'), local_vars, allow_recursion)
87             return a % b
88
89         m = re.match(
90             r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr)
91         if m:
92             fname = m.group('func')
93             if fname not in self._functions:
94                 self._functions[fname] = self.extract_function(fname)
95             argvals = [int(v) if v.isdigit() else local_vars[v]
96                        for v in m.group('args').split(',')]
97             return self._functions[fname](argvals)
98         raise ExtractorError('Unsupported JS expression %r' % expr)
99
100     def extract_function(self, funcname):
101         func_m = re.search(
102             (r'(?:function %s|%s\s*=\s*function)' % (
103                 re.escape(funcname), re.escape(funcname))) +
104             r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
105             self.code)
106         if func_m is None:
107             raise ExtractorError('Could not find JS function %r' % funcname)
108         argnames = func_m.group('args').split(',')
109
110         def resf(args):
111             local_vars = dict(zip(argnames, args))
112             for stmt in func_m.group('code').split(';'):
113                 res = self.interpret_statement(stmt, local_vars)
114             return res
115         return resf
116