[platzi] Improve client data extraction (closes #22290)
authorSergey M․ <dstftw@gmail.com>
Mon, 2 Sep 2019 18:24:20 +0000 (01:24 +0700)
committerSergey M․ <dstftw@gmail.com>
Mon, 2 Sep 2019 18:24:20 +0000 (01:24 +0700)
youtube_dl/extractor/platzi.py

index cd6b966c5cc03bb060d9f01ccac119de0db189b7..602207bebdd6a01d7f33dbf08302ab5a75ccf207 100644 (file)
@@ -107,7 +107,11 @@ class PlatziIE(PlatziBaseIE):
 
         data = self._parse_json(
             self._search_regex(
-                r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
+                # client_data may contain "};" so that we have to try more
+                # strict regex first
+                (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+                 r'client_data\s*=\s*({.+?})\s*;'),
+                webpage, 'client data'),
             lecture_id)
 
         material = data['initialState']['material']