[minhateca] Fix duration parsing
authorPhilipp Hagemeister <phihag@phihag.de>
Thu, 4 Dec 2014 16:35:40 +0000 (17:35 +0100)
committerPhilipp Hagemeister <phihag@phihag.de>
Thu, 4 Dec 2014 16:35:40 +0000 (17:35 +0100)
test/test_utils.py
youtube_dl/extractor/minhateca.py
youtube_dl/utils.py

index 04f1bf283b32abe2421fb782f2765c3c95811c3c..badab1370629cf0214f3eaf1b1822a7768aeaa5b 100644 (file)
@@ -220,6 +220,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('0s'), 0)
         self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
         self.assertEqual(parse_duration('T30M38S'), 1838)
+        self.assertEqual(parse_duration('5 s'), 5)
+        self.assertEqual(parse_duration('3 min'), 180)
+        self.assertEqual(parse_duration('2.5 hours'), 9000)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(
index 077c9b19dda06327544030b285dbcf05ae4fd023..14934b7ec5579d3b7cfb4b16e5308e81301ace63 100644 (file)
@@ -8,6 +8,7 @@ from ..compat import (
 )
 from ..utils import (
     int_or_none,
+    parse_duration,
     parse_filesize,
 )
 
@@ -52,8 +53,8 @@ class MinhatecaIE(InfoExtractor):
         filesize_approx = parse_filesize(self._html_search_regex(
             r'<p class="fileSize">(.*?)</p>',
             webpage, 'file size approximation', fatal=False))
-        duration = int_or_none(self._html_search_regex(
-            r'(?s)<p class="fileLeng[ht][th]">.*?([0-9]+)\s*s',
+        duration = parse_duration(self._html_search_regex(
+            r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
             webpage, 'duration', fatal=False))
         view_count = int_or_none(self._html_search_regex(
             r'<p class="downloadsCounter">([0-9]+)</p>',
index 5e9ae7a426277ef29aeb0cb5901eea30f85ed794..5efb4c59a18e0c89e902f66be6dfe93af6b71df5 100644 (file)
@@ -1206,18 +1206,29 @@ def parse_duration(s):
 
     m = re.match(
         r'''(?ix)T?
+        (?:
+            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
             (?:
                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
             )?
-            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
+            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+        )$''', s)
     if not m:
         return None
-    res = int(m.group('secs'))
+    res = 0
+    if m.group('only_mins'):
+        return float_or_none(m.group('only_mins'), invscale=60)
+    if m.group('only_hours'):
+        return float_or_none(m.group('only_hours'), invscale=60 * 60)
+    if m.group('secs'):
+        res += int(m.group('secs'))
     if m.group('mins'):
         res += int(m.group('mins')) * 60
-        if m.group('hours'):
-            res += int(m.group('hours')) * 60 * 60
+    if m.group('hours'):
+        res += int(m.group('hours')) * 60 * 60
     if m.group('ms'):
         res += float(m.group('ms'))
     return res