]> git.bitcoin.ninja Git - youtube-dl/blobdiff - youtube_dl/extractor/common.py
[utils] Improve _hidden_inputs
[youtube-dl] / youtube_dl / extractor / common.py
index a8296816257ca8a2e0b9b38f44c48edbaa689756..e413799f9a3b4b78ca3cf3fd6908abaaa94c906e 100644 (file)
@@ -680,7 +680,7 @@ class InfoExtractor(object):
 
         return (username, password)
 
-    def _get_login_info(self):
+    def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
         """
         Get the login info as (username, password)
         It will look in the netrc file using the _NETRC_MACHINE value
@@ -694,11 +694,11 @@ class InfoExtractor(object):
         downloader_params = self._downloader.params
 
         # Attempt to use provided username and password or .netrc data
-        if downloader_params.get('username') is not None:
-            username = downloader_params['username']
-            password = downloader_params['password']
+        if downloader_params.get(username_option) is not None:
+            username = downloader_params[username_option]
+            password = downloader_params[password_option]
         else:
-            username, password = self._get_netrc_login_info()
+            username, password = self._get_netrc_login_info(netrc_machine)
 
         return (username, password)
 
@@ -888,16 +888,16 @@ class InfoExtractor(object):
     def _hidden_inputs(html):
         html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
         hidden_inputs = {}
-        for input in re.findall(r'(?i)<input([^>]+)>', html):
-            if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):
+        for input in re.findall(r'(?i)(<input[^>]+>)', html):
+            attrs = extract_attributes(input)
+            if not input:
                 continue
-            name = re.search(r'(?:name|id)=(["\'])(?P<value>.+?)\1', input)
-            if not name:
+            if attrs.get('type') not in ('hidden', 'submit'):
                 continue
-            value = re.search(r'value=(["\'])(?P<value>.*?)\1', input)
-            if not value:
-                continue
-            hidden_inputs[name.group('value')] = value.group('value')
+            name = attrs.get('name') or attrs.get('id')
+            value = attrs.get('value')
+            if name and value is not None:
+                hidden_inputs[name] = value
         return hidden_inputs
 
     def _form_hidden_inputs(self, form_id, html):
@@ -1163,13 +1163,6 @@ class InfoExtractor(object):
                               m3u8_id=None, note=None, errnote=None,
                               fatal=True, live=False):
 
-        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
-
-        format_url = lambda u: (
-            u
-            if re.match(r'^https?://', u)
-            else compat_urlparse.urljoin(m3u8_url, u))
-
         res = self._download_webpage_handle(
             m3u8_url, video_id,
             note=note or 'Downloading m3u8 information',
@@ -1180,6 +1173,13 @@ class InfoExtractor(object):
         m3u8_doc, urlh = res
         m3u8_url = urlh.geturl()
 
+        formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
+
+        format_url = lambda u: (
+            u
+            if re.match(r'^https?://', u)
+            else compat_urlparse.urljoin(m3u8_url, u))
+
         # We should try extracting formats only from master playlists [1], i.e.
         # playlists that describe available qualities. On the other hand media
         # playlists [2] should be returned as is since they contain just the media