[extractor/common] Improve _form_hidden_inputs and rename to _hidden_inputs
[youtube-dl] / youtube_dl / extractor / common.py
index 82f5de2d898c4238839b8ada7c43aea6b166fd5f..315fe4a723e844c4ee550f1fd67e7ef57eb61ad5 100644 (file)
@@ -706,10 +706,17 @@ class InfoExtractor(object):
                                       'twitter card player')
 
     @staticmethod
-    def _form_hidden_inputs(html):
-        return dict(re.findall(
-            r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"',
-            html))
+    def _hidden_inputs(html):
+        return dict([
+            (input.group('name'), input.group('value')) for input in re.finditer(
+                r'''(?x)
+                    <input\s+
+                        type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+
+                        name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+
+                        (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)?
+                        value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value)
+                ''', html)
+        ])
 
     def _sort_formats(self, formats, field_preference=None):
         if not formats: