[utils] Improve and test js_to_json
authorPhilipp Hagemeister <phihag@phihag.de>
Tue, 30 Sep 2014 09:12:59 +0000 (11:12 +0200)
committerPhilipp Hagemeister <phihag@phihag.de>
Tue, 30 Sep 2014 22:08:34 +0000 (00:08 +0200)
test/test_utils.py
youtube_dl/extractor/common.py
youtube_dl/utils.py

index 113aa44b2e5c319040ebde225a0f9298c766ec31..bcca0efead42b85f39337a4c28f0d654447cd8e2 100644 (file)
@@ -332,14 +332,28 @@ class TestUtil(unittest.TestCase):
         )
         self.assertEqual(escape_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0')
 
-    def test_js_to_json(self):
+    def test_js_to_json_realworld(self):
         inp = '''{
-                'clip':{'provider':'pseudo'}
+            'clip':{'provider':'pseudo'}
         }'''
         self.assertEqual(js_to_json(inp), '''{
-                "clip":{"provider":"pseudo"}
+            "clip":{"provider":"pseudo"}
         }''')
         json.loads(js_to_json(inp))
 
+        inp = '''{
+            'playlist':[{'controls':{'all':null}}]
+        }'''
+        self.assertEqual(js_to_json(inp), '''{
+            "playlist":[{"controls":{"all":null}}]
+        }''')
+
+    def test_js_to_json_edgecases(self):
+        on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}")
+        self.assertEqual(json.loads(on), {"abc_def": "1'\\2\\'3\"4"})
+
+        on = js_to_json('{"abc": true}')
+        self.assertEqual(json.loads(on), {'abc': True})
+
 if __name__ == '__main__':
     unittest.main()
index f43a0a569a3d90d555a27cece8ac3e68951c5106..611cf95f1125ec9340a96554df9542d1fcbd62b4 100644 (file)
@@ -334,7 +334,11 @@ class InfoExtractor(object):
         try:
             return json.loads(json_string)
         except ValueError as ve:
-            raise ExtractorError('Failed to download JSON', cause=ve)
+            errmsg = '%s: Failed to parse JSON ' % video_id
+            if fatal:
+                raise ExtractorError(errmsg, cause=ve)
+            else:
+                self.report_warning(errmsg + str(ve))
 
     def report_warning(self, msg, video_id=None):
         idstr = '' if video_id is None else '%s: ' % video_id
index 59851a8c05f15fd4b635702925bc3a2d5360eeea..f8dd9c72d8e07ee7fc231b33f123df4c16b2d6d6 100644 (file)
@@ -1580,29 +1580,24 @@ def strip_jsonp(code):
 
 def js_to_json(code):
     def fix_kv(m):
-        key = m.group(2)
-        if key.startswith("'"):
-            assert key.endswith("'")
-            assert '"' not in key
-            key = '"%s"' % key[1:-1]
-        elif not key.startswith('"'):
-            key = '"%s"' % key
-
-        value = m.group(4)
-        if value.startswith("'"):
-            assert value.endswith("'")
-            assert '"' not in value
-            value = '"%s"' % value[1:-1]
-
-        return m.group(1) + key + m.group(3) + value
+        v = m.group(0)
+        if v in ('true', 'false', 'null'):
+            return v
+        if v.startswith('"'):
+            return v
+        if v.startswith("'"):
+            v = v[1:-1]
+            v = re.sub(r"\\\\|\\'|\"", lambda m: {
+                '\\\\': '\\\\',
+                "\\'": "'",
+                '"': '\\"',
+            }[m.group(0)], v)
+        return '"%s"' % v
 
     res = re.sub(r'''(?x)
-            ([{,]\s*)
-            ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+)
-            (:\s*)
-            ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|
-                (?=\[|\{)
-            )
+        "(?:[^"\\]*(?:\\\\|\\")?)*"|
+        '(?:[^'\\]*(?:\\\\|\\')?)*'|
+        [a-zA-Z_][a-zA-Z_0-9]*
         ''', fix_kv, code)
     res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
     return res