Merge remote-tracking branch 'upstream/master'
authorAllan Zhou <allanzp@gmail.com>
Wed, 21 Aug 2013 07:07:03 +0000 (00:07 -0700)
committerAllan Zhou <allanzp@gmail.com>
Wed, 21 Aug 2013 07:07:03 +0000 (00:07 -0700)
youtube_dl/FileDownloader.py
youtube_dl/extractor/collegehumor.py
youtube_dl/extractor/common.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/rtlnow.py
youtube_dl/version.py

index ea6b9d626efa7a18eafe20afa8c473d1afee315b..217c4a52f5204ccf471ccb1932a2f322571828eb 100644 (file)
@@ -79,9 +79,13 @@ class FileDownloader(object):
         rate = float(current) / dif
         eta = int((float(total) - float(current)) / rate)
         (eta_mins, eta_secs) = divmod(eta, 60)
-        if eta_mins > 99:
-            return '--:--'
-        return '%02d:%02d' % (eta_mins, eta_secs)
+        (eta_hours, eta_mins) = divmod(eta_mins, 60)
+        if eta_hours > 99:
+            return '--:--:--'
+        if eta_hours == 0:
+            return '%02d:%02d' % (eta_mins, eta_secs)
+        else:
+            return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs)
 
     @staticmethod
     def calc_speed(start, now, bytes):
index 30b9c7549f76c8d65dd4f18bcc5023b0c86160d9..8d4c93d6da91f4470c9809bf32dd0fbbe886c92b 100644 (file)
@@ -4,6 +4,7 @@ import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
     compat_urllib_parse_urlparse,
+    determine_ext,
 
     ExtractorError,
 )
@@ -12,7 +13,7 @@ from ..utils import (
 class CollegeHumorIE(InfoExtractor):
     _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P<videoid>[0-9]+)/?(?P<shorttitle>.*)$'
 
-    _TEST = {
+    _TESTS = [{
         u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe',
         u'file': u'6902724.mp4',
         u'md5': u'1264c12ad95dca142a9f0bf7968105a0',
@@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor):
             u'title': u'Comic-Con Cosplay Catastrophe',
             u'description': u'Fans get creative this year at San Diego.  Too creative.  And yes, that\'s really Joss Whedon.',
         },
-    }
+    },
+    {
+        u'url': u'http://www.collegehumor.com/video/3505939/font-conference',
+        u'file': u'3505939.mp4',
+        u'md5': u'c51ca16b82bb456a4397987791a835f5',
+        u'info_dict': {
+            u'title': u'Font Conference',
+            u'description': u'This video wasn\'t long enough, so we made it double-spaced.',
+        },
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor):
             info['description'] = videoNode.findall('./description')[0].text
             info['title'] = videoNode.findall('./caption')[0].text
             info['thumbnail'] = videoNode.findall('./thumbnail')[0].text
-            manifest_url = videoNode.findall('./file')[0].text
+            next_url = videoNode.findall('./file')[0].text
         except IndexError:
             raise ExtractorError(u'Invalid metadata XML file')
 
-        manifest_url += '?hdcore=2.10.3'
-        manifestXml = self._download_webpage(manifest_url, video_id,
-                                             u'Downloading XML manifest',
-                                             u'Unable to download video info XML')
-
-        adoc = xml.etree.ElementTree.fromstring(manifestXml)
-        try:
-            media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
-            node_id = media_node.attrib['url']
-            video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
-        except IndexError as err:
-            raise ExtractorError(u'Invalid manifest file')
+        if next_url.endswith(u'manifest.f4m'):
+            manifest_url = next_url + '?hdcore=2.10.3'
+            manifestXml = self._download_webpage(manifest_url, video_id,
+                                         u'Downloading XML manifest',
+                                         u'Unable to download video info XML')
 
-        url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
+            adoc = xml.etree.ElementTree.fromstring(manifestXml)
+            try:
+                media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0]
+                node_id = media_node.attrib['url']
+                video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text
+            except IndexError as err:
+                raise ExtractorError(u'Invalid manifest file')
+            url_pr = compat_urllib_parse_urlparse(info['thumbnail'])
+            info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
+            info['ext'] = 'mp4'
+        else:
+            # Old-style direct links
+            info['url'] = next_url
+            info['ext'] = determine_ext(info['url'])
 
-        info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','')
-        info['ext'] = 'mp4'
-        return [info]
+        return info
index da50abfc1cd492b8d360ef601b44841a938c055b..8009c2d85708638509ce1539198ecbd6db1dbafa 100644 (file)
@@ -77,7 +77,13 @@ class InfoExtractor(object):
     @classmethod
     def suitable(cls, url):
         """Receives a URL and returns True if suitable for this IE."""
-        return re.match(cls._VALID_URL, url) is not None
+
+        # This does not use has/getattr intentionally - we want to know whether
+        # we have cached the regexp for *this* class, whereas getattr would also
+        # match the superclass
+        if '_VALID_URL_RE' not in cls.__dict__:
+            cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+        return cls._VALID_URL_RE.match(url) is not None
 
     @classmethod
     def working(cls):
index b633e896c6eb3d5b2158585d01ffba9615fe8f31..da016f7ee86635c114bb4328742971c8082e5491 100644 (file)
@@ -107,8 +107,13 @@ class GenericIE(InfoExtractor):
         return new_url
 
     def _real_extract(self, url):
-        new_url = self._test_redirect(url)
-        if new_url: return [self.url_result(new_url)]
+        try:
+            new_url = self._test_redirect(url)
+            if new_url:
+                return [self.url_result(new_url)]
+        except compat_urllib_error.HTTPError:
+            # This may be a stupid server that doesn't like HEAD, our UA, or so
+            pass
 
         video_id = url.split('/')[-1]
         try:
@@ -144,6 +149,9 @@ class GenericIE(InfoExtractor):
             # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
             if m_video_type is not None:
                 mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)
+        if mobj is None:
+            # HTML5 video
+            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL)
         if mobj is None:
             raise ExtractorError(u'Invalid URL: %s' % url)
 
index d993a990ad7c5869e70cf29f60dc81f188bc09d3..2f134e6a7bbb6695197234b7833d0b55d5575be8 100644 (file)
@@ -2,7 +2,10 @@
 import re
 
 from .common import InfoExtractor
-from ..utils import ExtractorError
+from ..utils import (
+    clean_html,
+    ExtractorError,
+)
 
 class RTLnowIE(InfoExtractor):
     """Information Extractor for RTLnow, RTL2now and VOXnow"""
@@ -18,6 +21,7 @@ class RTLnowIE(InfoExtractor):
         u'params': {
             u'skip_download': True,
         },
+        u'skip': u'Only works from Germany',
     },
     {
         u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
@@ -31,6 +35,7 @@ class RTLnowIE(InfoExtractor):
         u'params': {
             u'skip_download': True,
         },
+        u'skip': u'Only works from Germany',
     },
     {
         u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
@@ -53,6 +58,14 @@ class RTLnowIE(InfoExtractor):
         video_id = mobj.group(u'video_id')
 
         webpage = self._download_webpage(webpage_url, video_id)
+
+        note_m = re.search(r'''(?sx)
+            <div[ ]style="margin-left:[ ]20px;[ ]font-size:[ ]13px;">(.*?)
+            <div[ ]id="playerteaser">''', webpage)
+        if note_m:
+            msg = clean_html(note_m.group(1))
+            raise ExtractorError(msg)
+
         video_title = self._html_search_regex(r'<title>(?P<title>[^<]+)</title>',
             webpage, u'title')
         playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P<playerdata_url>[^\']+)\'',
index 8c93a275c1ca3b7ad344571e1499498b8dbc529a..58e26bc49f8050e8233b32525e6b7c7516d4db32 100644 (file)
@@ -1,2 +1,2 @@
 
-__version__ = '2013.08.17'
+__version__ = '2013.08.21'