[utils] Don't attempt to coerce JS strings to numbers in js_to_json (#26851)
authorKevin O'Connor <kevin.oconnor7@gmail.com>
Sat, 17 Oct 2020 17:10:41 +0000 (13:10 -0400)
committerGitHub <noreply@github.com>
Sat, 17 Oct 2020 17:10:41 +0000 (00:10 +0700)
The current logic in `js_to_json` tries to rewrite octal/hex numbers to
decimal. However, when the logic actually happens the `"` or `'` have
already been trimmed off. This causes what were originally strings, that
happen to look like octal/hex numbers, to get rewritten to decimal and
returned as a number rather than a string.

In practive something like:

```js
{
  "0x40": "foo",
  "040": "bar",
}
```

would get rewritten as:

```json
{
  64: "foo",
  32: "bar
}
```

This is problematic since this isn't valid JSON as you cannot have
non-string keys.

test/test_utils.py
youtube_dl/utils.py

index 962fd8d753ffe5d2ffec4784112623fd870bf213..c2d1e4fb17a84393563092bece09c8ba6e001060 100644 (file)
@@ -994,6 +994,12 @@ class TestUtil(unittest.TestCase):
         on = js_to_json('{42:4.2e1}')
         self.assertEqual(json.loads(on), {'42': 42.0})
 
+        on = js_to_json('{ "0x40": "0x40" }')
+        self.assertEqual(json.loads(on), {'0x40': '0x40'})
+
+        on = js_to_json('{ "040": "040" }')
+        self.assertEqual(json.loads(on), {'040': '040'})
+
     def test_js_to_json_malformed(self):
         self.assertEqual(js_to_json('42a1'), '42"a1"')
         self.assertEqual(js_to_json('42a-1'), '42"a"-1')
index 01d9c0362141588cb6242f55e00a8025f8743ade..737e2810e227a523680d94e4da66890ad3da0733 100644 (file)
@@ -4088,12 +4088,12 @@ def js_to_json(code):
                 '\\\n': '',
                 '\\x': '\\u00',
             }.get(m.group(0), m.group(0)), v[1:-1])
-
-        for regex, base in INTEGER_TABLE:
-            im = re.match(regex, v)
-            if im:
-                i = int(im.group(1), base)
-                return '"%d":' % i if v.endswith(':') else '%d' % i
+        else:
+            for regex, base in INTEGER_TABLE:
+                im = re.match(regex, v)
+                if im:
+                    i = int(im.group(1), base)
+                    return '"%d":' % i if v.endswith(':') else '%d' % i
 
         return '"%s"' % v