Merge pull request #5376 from PeteHemery/ffmpeg-postproc-utime-bug
authorSergey M. <dstftw@gmail.com>
Wed, 8 Apr 2015 15:27:17 +0000 (20:27 +0500)
committerSergey M. <dstftw@gmail.com>
Wed, 8 Apr 2015 15:27:17 +0000 (20:27 +0500)
[ffmpeg] adding exception catching for call to os.utime in run_ffmpeg_multiple_files

youtube_dl/extractor/__init__.py
youtube_dl/extractor/generic.py
youtube_dl/extractor/livestream.py
youtube_dl/extractor/theplatform.py
youtube_dl/extractor/udn.py [new file with mode: 0644]
youtube_dl/extractor/youtube.py

index e6fdf12977e2316adc842daf9adb97b2d7edb201..8df1db83ec00337f1119de19d000ec3826c50419 100644 (file)
@@ -557,6 +557,7 @@ from .udemy import (
     UdemyIE,
     UdemyCourseIE
 )
+from .udn import UDNEmbedIE
 from .ultimedia import UltimediaIE
 from .unistra import UnistraIE
 from .urort import UrortIE
index 2ff002643c9e4404b3427f4f309c187186ccc37a..6c212efac4b0f5869f93e37a5c34b42dbe2ca88a 100644 (file)
@@ -34,6 +34,7 @@ from .ooyala import OoyalaIE
 from .rutv import RUTVIE
 from .smotri import SmotriIE
 from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
 
 
 class GenericIE(InfoExtractor):
@@ -650,6 +651,17 @@ class GenericIE(InfoExtractor):
                 'title': "PFT Live: New leader in the 'new-look' defense",
                 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
             },
+        },
+        # UDN embed
+        {
+            'url': 'http://www.udn.com/news/story/7314/822787',
+            'md5': 'de06b4c90b042c128395a88f0384817e',
+            'info_dict': {
+                'id': '300040',
+                'ext': 'mp4',
+                'title': '生物老師男變女 全校挺"做自己"',
+                'thumbnail': 're:^https?://.*\.jpg$',
+            }
         }
     ]
 
@@ -1268,6 +1280,13 @@ class GenericIE(InfoExtractor):
         if nbc_sports_url:
             return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
 
+        # Look for UDN embeds
+        mobj = re.search(
+            r'<iframe[^>]+src="(?P<url>%s)"' % UDNEmbedIE._VALID_URL, webpage)
+        if mobj is not None:
+            return self.url_result(
+                compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True
index 2467f8bdd35304a57cc6bd951fa14fd32a25cd4c..ec309dadd848f7c1ae46b28c3be329c32cef48c9 100644 (file)
@@ -21,7 +21,7 @@ from ..utils import (
 
 class LivestreamIE(InfoExtractor):
     IE_NAME = 'livestream'
-    _VALID_URL = r'https?://new\.livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
+    _VALID_URL = r'https?://(?:new\.)?livestream\.com/.*?/(?P<event_name>.*?)(/videos/(?P<id>[0-9]+)(?:/player)?)?/?(?:$|[?#])'
     _TESTS = [{
         'url': 'http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370',
         'md5': '53274c76ba7754fb0e8d072716f2292b',
@@ -51,6 +51,9 @@ class LivestreamIE(InfoExtractor):
     }, {
         'url': 'https://new.livestream.com/accounts/362/events/3557232/videos/67864563/player?autoPlay=false&height=360&mute=false&width=640',
         'only_matching': True,
+    }, {
+        'url': 'http://livestream.com/bsww/concacafbeachsoccercampeonato2015',
+        'only_matching': True,
     }]
 
     def _parse_smil(self, video_id, smil_url):
index 0e3e627f495aa5051cb96fe05f180f9790761129..2d2178331ec396b937ee1115d4e22aee1844b8fb 100644 (file)
@@ -28,7 +28,7 @@ class ThePlatformIE(InfoExtractor):
            (?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/)?
          |theplatform:)(?P<id>[^/\?&]+)'''
 
-    _TEST = {
+    _TESTS = [{
         # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
         'url': 'http://link.theplatform.com/s/dJ5BDC/e9I_cZgTgIPd/meta.smil?format=smil&Tracking=true&mbr=true',
         'info_dict': {
@@ -42,7 +42,20 @@ class ThePlatformIE(InfoExtractor):
             # rtmp download
             'skip_download': True,
         },
-    }
+        # from http://www.cnet.com/videos/tesla-model-s-a-second-step-towards-a-cleaner-motoring-future/
+    }, {
+        'url': 'http://link.theplatform.com/s/kYEXFC/22d_qsQ6MIRT',
+        'info_dict': {
+            'id': '22d_qsQ6MIRT',
+            'ext': 'flv',
+            'description': 'md5:ac330c9258c04f9d7512cf26b9595409',
+            'title': 'Tesla Model S: A second step towards a cleaner motoring future',
+        },
+        'params': {
+            # rtmp download
+            'skip_download': True,
+        }
+    }]
 
     @staticmethod
     def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -127,6 +140,10 @@ class ThePlatformIE(InfoExtractor):
         else:
             formats = []
             switch = body.find(_x('smil:switch'))
+            if switch is None:
+                switch = body.find(_x('smil:par//smil:switch'))
+            if switch is None:
+                switch = body.find(_x('smil:par'))
             if switch is not None:
                 base_url = head.find(_x('smil:meta')).attrib['base']
                 for f in switch.findall(_x('smil:video')):
diff --git a/youtube_dl/extractor/udn.py b/youtube_dl/extractor/udn.py
new file mode 100644 (file)
index 0000000..bba25bb
--- /dev/null
@@ -0,0 +1,64 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+from .common import InfoExtractor
+from ..utils import js_to_json
+from ..compat import compat_urlparse
+
+
+class UDNEmbedIE(InfoExtractor):
+    _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'http://video.udn.com/embed/news/300040',
+        'md5': 'de06b4c90b042c128395a88f0384817e',
+        'info_dict': {
+            'id': '300040',
+            'ext': 'mp4',
+            'title': '生物老師男變女 全校挺"做自己"',
+            'thumbnail': 're:^https?://.*\.jpg$',
+        }
+    }, {
+        'url': '//video.udn.com/embed/news/300040',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        page = self._download_webpage(url, video_id)
+
+        options = json.loads(js_to_json(self._html_search_regex(
+            r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))
+
+        video_urls = options['video']
+
+        if video_urls.get('youtube'):
+            return self.url_result(video_urls.get('youtube'), 'Youtube')
+
+        try:
+            del video_urls['youtube']
+        except KeyError:
+            pass
+
+        formats = [{
+            'url': self._download_webpage(
+                compat_urlparse.urljoin(url, api_url), video_id,
+                'retrieve url for %s video' % video_type),
+            'format_id': video_type,
+            'preference': 0 if video_type == 'mp4' else -1,
+        } for video_type, api_url in video_urls.items()]
+
+        self._sort_formats(formats)
+
+        thumbnail = None
+
+        if options.get('gallery') and len(options['gallery']):
+            thumbnail = options['gallery'][0].get('original')
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'title': options['title'],
+            'thumbnail': thumbnail
+        }
index 79ce39aa40bdbf59f7afef403f0afc301333138b..2774ec30b26d408817a8f4328747a99e791ec8f5 100644 (file)
@@ -788,33 +788,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             errnote='Could not download DASH manifest')
 
         formats = []
-        for r in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}Representation'):
-            url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
-            if url_el is None:
-                continue
-            format_id = r.attrib['id']
-            video_url = url_el.text
-            filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
-            f = {
-                'format_id': format_id,
-                'url': video_url,
-                'width': int_or_none(r.attrib.get('width')),
-                'height': int_or_none(r.attrib.get('height')),
-                'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
-                'asr': int_or_none(r.attrib.get('audioSamplingRate')),
-                'filesize': filesize,
-                'fps': int_or_none(r.attrib.get('frameRate')),
-            }
-            try:
-                existing_format = next(
-                    fo for fo in formats
-                    if fo['format_id'] == format_id)
-            except StopIteration:
-                full_info = self._formats.get(format_id, {}).copy()
-                full_info.update(f)
-                formats.append(full_info)
-            else:
-                existing_format.update(f)
+        for a in dash_doc.findall('.//{urn:mpeg:DASH:schema:MPD:2011}AdaptationSet'):
+            mime_type = a.attrib.get('mimeType')
+            for r in a.findall('{urn:mpeg:DASH:schema:MPD:2011}Representation'):
+                url_el = r.find('{urn:mpeg:DASH:schema:MPD:2011}BaseURL')
+                if url_el is None:
+                    continue
+                if mime_type == 'text/vtt':
+                    # TODO implement WebVTT downloading
+                    pass
+                elif mime_type.startswith('audio/') or mime_type.startswith('video/'):
+                    format_id = r.attrib['id']
+                    video_url = url_el.text
+                    filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength'))
+                    f = {
+                        'format_id': format_id,
+                        'url': video_url,
+                        'width': int_or_none(r.attrib.get('width')),
+                        'height': int_or_none(r.attrib.get('height')),
+                        'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),
+                        'asr': int_or_none(r.attrib.get('audioSamplingRate')),
+                        'filesize': filesize,
+                        'fps': int_or_none(r.attrib.get('frameRate')),
+                    }
+                    try:
+                        existing_format = next(
+                            fo for fo in formats
+                            if fo['format_id'] == format_id)
+                    except StopIteration:
+                        full_info = self._formats.get(format_id, {}).copy()
+                        full_info.update(f)
+                        formats.append(full_info)
+                    else:
+                        existing_format.update(f)
+                else:
+                    self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
         return formats
 
     def _real_extract(self, url):