[youtube] Fix extraction (closes #20758, closes #20759, closes #20761, closes #20762...
[youtube-dl] / youtube_dl / extractor / minhateca.py
index 077c9b19dda06327544030b285dbcf05ae4fd023..dccc542497692ac6aa14a6e36f6b96b0aad7741d 100644 (file)
@@ -2,13 +2,12 @@
 from __future__ import unicode_literals
 
 from .common import InfoExtractor
-from ..compat import (
-    compat_urllib_parse,
-    compat_urllib_request,
-)
 from ..utils import (
     int_or_none,
+    parse_duration,
     parse_filesize,
+    sanitized_Request,
+    urlencode_postdata,
 )
 
 
@@ -20,7 +19,7 @@ class MinhatecaIE(InfoExtractor):
             'id': '125848331',
             'ext': 'mp4',
             'title': 'youtube-dl test video',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'filesize_approx': 1530000,
             'duration': 9,
             'view_count': int,
@@ -38,9 +37,9 @@ class MinhatecaIE(InfoExtractor):
             ('fileId', video_id),
             ('__RequestVerificationToken', token),
         ]
-        req = compat_urllib_request.Request(
+        req = sanitized_Request(
             'http://minhateca.com.br/action/License/Download',
-            data=compat_urllib_parse.urlencode(token_data))
+            data=urlencode_postdata(token_data))
         req.add_header('Content-Type', 'application/x-www-form-urlencoded')
         data = self._download_json(
             req, video_id, note='Downloading metadata')
@@ -52,8 +51,8 @@ class MinhatecaIE(InfoExtractor):
         filesize_approx = parse_filesize(self._html_search_regex(
             r'<p class="fileSize">(.*?)</p>',
             webpage, 'file size approximation', fatal=False))
-        duration = int_or_none(self._html_search_regex(
-            r'(?s)<p class="fileLeng[ht][th]">.*?([0-9]+)\s*s',
+        duration = parse_duration(self._html_search_regex(
+            r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
             webpage, 'duration', fatal=False))
         view_count = int_or_none(self._html_search_regex(
             r'<p class="downloadsCounter">([0-9]+)</p>',